From d2fc7c7001ed3babcd0106d15dd70224abfb6f29 Mon Sep 17 00:00:00 2001
From: dim <dim@FreeBSD.org>
Date: Sun, 5 Jul 2015 14:21:36 +0000
Subject: Vendor import of llvm trunk r241361:
 https://llvm.org/svn/llvm-project/llvm/trunk@241361

---
 .gitignore                                         |    4 +-
 CMakeLists.txt                                     |    2 +-
 CODE_OWNERS.TXT                                    |    4 +
 CREDITS.TXT                                        |    3 +-
 Makefile.config.in                                 |   13 +-
 autoconf/configure.ac                              |   18 +-
 .../ocaml/executionengine/llvm_executionengine.ml  |   10 +-
 cmake/config-ix.cmake                              |   10 +-
 cmake/modules/AddLLVM.cmake                        |   24 +-
 cmake/modules/HandleLLVMOptions.cmake              |    4 +-
 cmake/modules/Makefile                             |   43 +-
 configure                                          |   29 +-
 docs/AMDGPUUsage.rst                               |   83 +
 docs/AliasAnalysis.rst                             |    4 +-
 docs/CMake.rst                                     |    4 +-
 docs/CodeGenerator.rst                             |    4 +-
 docs/CommandGuide/llvm-dwarfdump.rst               |    4 +-
 docs/FaultMaps.rst                                 |   75 +-
 docs/GettingStarted.rst                            |    6 +-
 docs/LangRef.rst                                   |   16 +-
 docs/Phabricator.rst                               |   16 +-
 examples/Kaleidoscope/Orc/fully_lazy/toy.cpp       |    5 +-
 examples/Kaleidoscope/Orc/initial/toy.cpp          |    9 +-
 examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp     |   11 +-
 examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp       |    5 +-
 include/llvm-c/lto.h                               |   36 +-
 include/llvm/ADT/APInt.h                           |   28 +-
 include/llvm/ADT/APSInt.h                          |    9 +
 include/llvm/ADT/ArrayRef.h                        |    7 +-
 include/llvm/ADT/BitVector.h                       |    2 +-
 include/llvm/ADT/DenseMap.h                        |    2 +-
 include/llvm/ADT/DenseMapInfo.h                    |   27 +
 include/llvm/ADT/DenseSet.h                        |    2 +-
 include/llvm/ADT/DepthFirstIterator.h              |    2 +-
 include/llvm/ADT/EquivalenceClasses.h              |    2 +-
 include/llvm/ADT/GraphTraits.h                     |    2 +-
 include/llvm/ADT/IndexedMap.h                      |    2 +-
 include/llvm/ADT/IntEqClasses.h                    |    2 +-
 include/llvm/ADT/Optional.h                        |    2 +-
 include/llvm/ADT/PointerUnion.h                    |    2 +-
 include/llvm/ADT/PostOrderIterator.h               |    2 +-
 include/llvm/ADT/PriorityQueue.h                   |    2 +-
 include/llvm/ADT/SCCIterator.h                     |    2 +-
 include/llvm/ADT/STLExtras.h                       |    2 +-
 include/llvm/ADT/SetOperations.h                   |    2 +-
 include/llvm/ADT/SetVector.h                       |    2 +-
 include/llvm/ADT/SmallBitVector.h                  |    2 +-
 include/llvm/ADT/SmallPtrSet.h                     |    2 +-
 include/llvm/ADT/SmallString.h                     |    2 +-
 include/llvm/ADT/SmallVector.h                     |    4 +-
 include/llvm/ADT/Statistic.h                       |    2 +-
 include/llvm/ADT/StringExtras.h                    |    2 +-
 include/llvm/ADT/StringMap.h                       |    2 +-
 include/llvm/ADT/StringRef.h                       |    2 +-
 include/llvm/ADT/StringSet.h                       |    2 +-
 include/llvm/ADT/Triple.h                          |    6 +-
 include/llvm/ADT/Twine.h                           |    2 +-
 include/llvm/ADT/edit_distance.h                   |    2 +-
 include/llvm/ADT/ilist.h                           |    2 +-
 include/llvm/ADT/ilist_node.h                      |    2 +-
 include/llvm/ADT/iterator.h                        |    4 +-
 include/llvm/ADT/iterator_range.h                  |    2 +-
 include/llvm/Analysis/AliasAnalysis.h              |   46 +-
 include/llvm/Analysis/AliasSetTracker.h            |   48 +-
 include/llvm/Analysis/BlockFrequencyInfo.h         |    2 +-
 include/llvm/Analysis/BlockFrequencyInfoImpl.h     |    4 +-
 include/llvm/Analysis/BranchProbabilityInfo.h      |    2 +-
 include/llvm/Analysis/CFG.h                        |   13 +-
 include/llvm/Analysis/CFGPrinter.h                 |    2 +-
 include/llvm/Analysis/CGSCCPassManager.h           |    2 +-
 include/llvm/Analysis/CallGraph.h                  |    2 +-
 include/llvm/Analysis/CallGraphSCCPass.h           |    2 +-
 include/llvm/Analysis/CodeMetrics.h                |    2 +-
 include/llvm/Analysis/ConstantFolding.h            |    2 +-
 include/llvm/Analysis/DomPrinter.h                 |    2 +-
 include/llvm/Analysis/DominanceFrontier.h          |    2 +-
 include/llvm/Analysis/DominanceFrontierImpl.h      |    2 +-
 include/llvm/Analysis/IVUsers.h                    |    2 +-
 include/llvm/Analysis/InlineCost.h                 |    4 +-
 include/llvm/Analysis/Interval.h                   |    2 +-
 include/llvm/Analysis/IntervalIterator.h           |    2 +-
 include/llvm/Analysis/IntervalPartition.h          |    2 +-
 include/llvm/Analysis/IteratedDominanceFrontier.h  |    2 +-
 include/llvm/Analysis/JumpInstrTableInfo.h         |    2 +-
 include/llvm/Analysis/LazyCallGraph.h              |    2 +-
 include/llvm/Analysis/LibCallAliasAnalysis.h       |    2 +-
 include/llvm/Analysis/Lint.h                       |    2 +-
 include/llvm/Analysis/Loads.h                      |    2 +-
 include/llvm/Analysis/LoopAccessAnalysis.h         |    2 +-
 include/llvm/Analysis/LoopInfo.h                   |    2 +-
 include/llvm/Analysis/LoopInfoImpl.h               |    2 +-
 include/llvm/Analysis/LoopPass.h                   |    2 +-
 include/llvm/Analysis/MemoryBuiltins.h             |    2 +-
 include/llvm/Analysis/MemoryDependenceAnalysis.h   |    2 +-
 include/llvm/Analysis/MemoryLocation.h             |    2 +-
 include/llvm/Analysis/Passes.h                     |    2 +-
 include/llvm/Analysis/PostDominators.h             |    2 +-
 include/llvm/Analysis/PtrUseVisitor.h              |    2 +-
 include/llvm/Analysis/RegionInfo.h                 |    2 +-
 include/llvm/Analysis/RegionPass.h                 |    2 +-
 include/llvm/Analysis/ScalarEvolution.h            |   82 +-
 include/llvm/Analysis/ScalarEvolutionExpander.h    |    2 +-
 include/llvm/Analysis/ScalarEvolutionExpressions.h |   80 +-
 .../llvm/Analysis/ScalarEvolutionNormalization.h   |    2 +-
 include/llvm/Analysis/TargetFolder.h               |    2 +-
 include/llvm/Analysis/TargetTransformInfo.h        |   13 +-
 include/llvm/Analysis/TargetTransformInfoImpl.h    |   10 +-
 include/llvm/Analysis/VectorUtils.h                |   56 +
 include/llvm/AsmParser/Parser.h                    |   56 +-
 include/llvm/AsmParser/SlotMapping.h               |   34 +
 include/llvm/Bitcode/BitCodes.h                    |    4 +-
 include/llvm/Bitcode/BitcodeWriterPass.h           |    2 +-
 include/llvm/Bitcode/BitstreamReader.h             |    2 +-
 include/llvm/Bitcode/BitstreamWriter.h             |    2 +-
 include/llvm/Bitcode/LLVMBitCodes.h                |    5 +-
 include/llvm/Bitcode/ReaderWriter.h                |    2 +-
 include/llvm/CodeGen/Analysis.h                    |    2 +-
 include/llvm/CodeGen/AsmPrinter.h                  |    2 +-
 include/llvm/CodeGen/BasicTTIImpl.h                |    2 +-
 include/llvm/CodeGen/CalcSpillWeights.h            |    2 +-
 include/llvm/CodeGen/CommandFlags.h                |   13 +-
 include/llvm/CodeGen/DFAPacketizer.h               |    4 +-
 include/llvm/CodeGen/DIE.h                         |  248 +-
 include/llvm/CodeGen/FastISel.h                    |   21 +-
 include/llvm/CodeGen/FaultMaps.h                   |  149 +-
 include/llvm/CodeGen/GCMetadata.h                  |    2 +-
 include/llvm/CodeGen/GCMetadataPrinter.h           |    2 +-
 include/llvm/CodeGen/GCStrategy.h                  |    2 +-
 include/llvm/CodeGen/GCs.h                         |    2 +-
 include/llvm/CodeGen/ISDOpcodes.h                  |    6 +-
 include/llvm/CodeGen/IntrinsicLowering.h           |    2 +-
 include/llvm/CodeGen/LatencyPriorityQueue.h        |    2 +-
 include/llvm/CodeGen/LexicalScopes.h               |    2 +-
 include/llvm/CodeGen/LiveInterval.h                |    2 +-
 include/llvm/CodeGen/LiveIntervalAnalysis.h        |    2 +-
 include/llvm/CodeGen/LivePhysRegs.h                |   22 +-
 include/llvm/CodeGen/LiveRangeEdit.h               |    2 +-
 include/llvm/CodeGen/LiveStackAnalysis.h           |    2 +-
 include/llvm/CodeGen/LiveVariables.h               |    2 +-
 include/llvm/CodeGen/MIRYamlMapping.h              |   76 +-
 include/llvm/CodeGen/MachineBasicBlock.h           |   19 +-
 include/llvm/CodeGen/MachineBlockFrequencyInfo.h   |    2 +-
 .../llvm/CodeGen/MachineBranchProbabilityInfo.h    |    2 +-
 include/llvm/CodeGen/MachineConstantPool.h         |    2 +-
 include/llvm/CodeGen/MachineDominanceFrontier.h    |    2 +-
 include/llvm/CodeGen/MachineDominators.h           |    2 +-
 include/llvm/CodeGen/MachineFrameInfo.h            |    2 +-
 include/llvm/CodeGen/MachineFunction.h             |    2 +-
 include/llvm/CodeGen/MachineFunctionAnalysis.h     |    2 +-
 include/llvm/CodeGen/MachineFunctionPass.h         |    2 +-
 include/llvm/CodeGen/MachineInstr.h                |    4 +-
 include/llvm/CodeGen/MachineInstrBuilder.h         |    9 +-
 include/llvm/CodeGen/MachineInstrBundle.h          |    2 +-
 include/llvm/CodeGen/MachineJumpTableInfo.h        |    2 +-
 include/llvm/CodeGen/MachineLoopInfo.h             |    2 +-
 include/llvm/CodeGen/MachineMemOperand.h           |   14 +-
 include/llvm/CodeGen/MachineModuleInfo.h           |    8 +-
 include/llvm/CodeGen/MachineOperand.h              |   24 +-
 include/llvm/CodeGen/MachineRegionInfo.h           |    2 +-
 include/llvm/CodeGen/MachineRegisterInfo.h         |    2 +-
 include/llvm/CodeGen/MachineSSAUpdater.h           |    2 +-
 include/llvm/CodeGen/MachineValueType.h            |    2 +-
 include/llvm/CodeGen/PBQPRAConstraint.h            |    2 +-
 include/llvm/CodeGen/Passes.h                      |    7 +-
 include/llvm/CodeGen/PseudoSourceValue.h           |    5 +-
 include/llvm/CodeGen/RegisterScavenging.h          |    2 +-
 include/llvm/CodeGen/ResourcePriorityQueue.h       |    2 +-
 include/llvm/CodeGen/RuntimeLibcalls.h             |    4 +-
 include/llvm/CodeGen/ScheduleDAG.h                 |    2 +-
 include/llvm/CodeGen/ScheduleHazardRecognizer.h    |    2 +-
 include/llvm/CodeGen/ScoreboardHazardRecognizer.h  |    2 +-
 include/llvm/CodeGen/SelectionDAG.h                |    3 +
 include/llvm/CodeGen/SelectionDAGISel.h            |    2 +-
 include/llvm/CodeGen/SelectionDAGNodes.h           |   38 +-
 include/llvm/CodeGen/SlotIndexes.h                 |    2 +-
 include/llvm/CodeGen/StackMaps.h                   |    2 +-
 .../llvm/CodeGen/TargetLoweringObjectFileImpl.h    |   11 +-
 include/llvm/CodeGen/ValueTypes.h                  |    2 +-
 include/llvm/CodeGen/VirtRegMap.h                  |    2 +-
 include/llvm/CodeGen/WinEHFuncInfo.h               |    2 +-
 include/llvm/Config/config.h.cmake                 |    3 -
 include/llvm/Config/config.h.in                    |    3 -
 include/llvm/DebugInfo/DIContext.h                 |    2 +-
 .../DebugInfo/DWARF/DWARFAbbreviationDeclaration.h |    2 +-
 .../llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h   |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h    |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFContext.h        |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h    |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h   |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugLine.h      |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h       |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFFormValue.h      |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h       |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFUnit.h           |    2 +-
 .../llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h  |    2 +-
 include/llvm/DebugInfo/PDB/IPDBDataStream.h        |    2 +-
 include/llvm/DebugInfo/PDB/IPDBEnumChildren.h      |    2 +-
 include/llvm/DebugInfo/PDB/IPDBLineNumber.h        |    2 +-
 include/llvm/DebugInfo/PDB/IPDBSession.h           |    2 +-
 include/llvm/DebugInfo/PDB/IPDBSourceFile.h        |    2 +-
 include/llvm/DebugInfo/PDB/PDBContext.h            |    2 +-
 include/llvm/DebugInfo/PDB/PDBExtras.h             |    2 +-
 include/llvm/DebugInfo/PDB/PDBSymDumper.h          |    2 +-
 include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h   |    2 +-
 include/llvm/DebugInfo/PDB/PDBSymbolBlock.h        |    2 +-
 include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h    |    2 +-
 include/llvm/ExecutionEngine/ExecutionEngine.h     |    2 +-
 include/llvm/ExecutionEngine/GenericValue.h        |    2 +-
 include/llvm/ExecutionEngine/MCJIT.h               |    2 +-
 include/llvm/ExecutionEngine/ObjectCache.h         |    2 +-
 .../ExecutionEngine/Orc/CompileOnDemandLayer.h     |    3 +-
 .../llvm/ExecutionEngine/Orc/LazyEmittingLayer.h   |    2 +-
 include/llvm/ExecutionEngine/Orc/NullResolver.h    |   36 +
 .../ExecutionEngine/Orc/ObjectTransformLayer.h     |  112 +
 .../llvm/ExecutionEngine/SectionMemoryManager.h    |    2 +-
 include/llvm/IR/Argument.h                         |    2 +-
 include/llvm/IR/AssemblyAnnotationWriter.h         |    2 +-
 include/llvm/IR/Attributes.h                       |    2 +-
 include/llvm/IR/AutoUpgrade.h                      |    2 +-
 include/llvm/IR/BasicBlock.h                       |    2 +-
 include/llvm/IR/CFG.h                              |    2 +-
 include/llvm/IR/CallSite.h                         |   25 +-
 include/llvm/IR/CallingConv.h                      |    4 +-
 include/llvm/IR/Comdat.h                           |    2 +-
 include/llvm/IR/Constant.h                         |   31 +-
 include/llvm/IR/ConstantFolder.h                   |    2 +-
 include/llvm/IR/ConstantRange.h                    |    2 +-
 include/llvm/IR/Constants.h                        |   79 +-
 include/llvm/IR/DIBuilder.h                        |   32 +-
 include/llvm/IR/DataLayout.h                       |    6 +-
 include/llvm/IR/DebugInfoMetadata.h                |   71 +
 include/llvm/IR/DerivedTypes.h                     |    2 +-
 include/llvm/IR/Dominators.h                       |    2 +-
 include/llvm/IR/Function.h                         |    2 +-
 include/llvm/IR/GVMaterializer.h                   |    2 +-
 include/llvm/IR/GlobalAlias.h                      |    2 +-
 include/llvm/IR/GlobalObject.h                     |    2 +-
 include/llvm/IR/GlobalValue.h                      |    9 +-
 include/llvm/IR/GlobalVariable.h                   |    7 +-
 include/llvm/IR/IRBuilder.h                        |   24 +-
 include/llvm/IR/IRPrintingPasses.h                 |    2 +-
 include/llvm/IR/InlineAsm.h                        |    2 +-
 include/llvm/IR/InstIterator.h                     |    2 +-
 include/llvm/IR/InstVisitor.h                      |    2 +-
 include/llvm/IR/InstrTypes.h                       |    8 +-
 include/llvm/IR/Instruction.h                      |    6 +-
 include/llvm/IR/Instructions.h                     |  196 +-
 include/llvm/IR/IntrinsicInst.h                    |    2 +-
 include/llvm/IR/Intrinsics.h                       |    4 +-
 include/llvm/IR/IntrinsicsARM.td                   |    2 -
 include/llvm/IR/IntrinsicsPowerPC.td               |   26 +
 include/llvm/IR/IntrinsicsX86.td                   | 1167 ++++-
 include/llvm/IR/LLVMContext.h                      |    2 +-
 include/llvm/IR/LegacyPassManager.h                |    4 +-
 include/llvm/IR/LegacyPassManagers.h               |    2 +-
 include/llvm/IR/LegacyPassNameParser.h             |    2 +-
 include/llvm/IR/Mangler.h                          |   31 +-
 include/llvm/IR/Metadata.def                       |    1 +
 include/llvm/IR/Metadata.h                         |   14 +-
 include/llvm/IR/Module.h                           |    4 +-
 include/llvm/IR/ModuleSlotTracker.h                |   68 +
 include/llvm/IR/NoFolder.h                         |    2 +-
 include/llvm/IR/OperandTraits.h                    |    2 +-
 include/llvm/IR/Operator.h                         |    2 +-
 include/llvm/IR/PassManager.h                      |    2 +-
 include/llvm/IR/PassManagerInternal.h              |    2 +-
 include/llvm/IR/Statepoint.h                       |  187 +-
 include/llvm/IR/SymbolTableListTraits.h            |    2 +-
 include/llvm/IR/Type.h                             |    2 +-
 include/llvm/IR/TypeFinder.h                       |    2 +-
 include/llvm/IR/Use.h                              |    2 +-
 include/llvm/IR/User.h                             |    2 +-
 include/llvm/IR/Value.def                          |   90 +
 include/llvm/IR/Value.h                            |   38 +-
 include/llvm/IR/ValueHandle.h                      |   14 +-
 include/llvm/IR/ValueSymbolTable.h                 |    2 +-
 include/llvm/IR/Verifier.h                         |    2 +-
 include/llvm/IRReader/IRReader.h                   |    2 +-
 include/llvm/InitializePasses.h                    |    2 +-
 include/llvm/LTO/LTOCodeGenerator.h                |    2 +-
 include/llvm/LTO/LTOModule.h                       |   33 +-
 include/llvm/LibDriver/LibDriver.h                 |    4 +-
 include/llvm/LineEditor/LineEditor.h               |    2 +-
 include/llvm/Linker/Linker.h                       |    2 +-
 include/llvm/MC/MCAsmBackend.h                     |    2 +-
 include/llvm/MC/MCAsmInfo.h                        |    4 +-
 include/llvm/MC/MCAsmInfoCOFF.h                    |    2 +-
 include/llvm/MC/MCCodeEmitter.h                    |    2 +-
 include/llvm/MC/MCELFObjectWriter.h                |    2 +-
 include/llvm/MC/MCExternalSymbolizer.h             |    2 +-
 include/llvm/MC/MCFixedLenDisassembler.h           |    2 +-
 include/llvm/MC/MCFixup.h                          |    2 +-
 include/llvm/MC/MCFixupKindInfo.h                  |    2 +-
 include/llvm/MC/MCInstrAnalysis.h                  |    2 +-
 include/llvm/MC/MCInstrDesc.h                      |    4 +-
 include/llvm/MC/MCInstrInfo.h                      |    2 +-
 include/llvm/MC/MCInstrItineraries.h               |    2 +-
 include/llvm/MC/MCMachObjectWriter.h               |    2 +-
 include/llvm/MC/MCObjectWriter.h                   |    2 +-
 include/llvm/MC/MCParser/MCAsmLexer.h              |    2 +-
 include/llvm/MC/MCParser/MCAsmParser.h             |   13 +-
 include/llvm/MC/MCParser/MCAsmParserExtension.h    |    2 +-
 include/llvm/MC/MCParser/MCAsmParserUtils.h        |   33 +
 include/llvm/MC/MCRegisterInfo.h                   |    2 +-
 include/llvm/MC/MCRelocationInfo.h                 |    2 +-
 include/llvm/MC/MCSchedule.h                       |    2 +-
 include/llvm/MC/MCStreamer.h                       |    2 +-
 include/llvm/MC/MCSubtargetInfo.h                  |    2 +-
 include/llvm/MC/MCSymbol.h                         |  127 +-
 include/llvm/MC/MCSymbolCOFF.h                     |    2 +-
 include/llvm/MC/MCSymbolELF.h                      |    2 +-
 include/llvm/MC/MCSymbolMachO.h                    |    2 +-
 include/llvm/MC/MCSymbolizer.h                     |    2 +-
 include/llvm/MC/MCTargetAsmParser.h                |    2 +-
 include/llvm/MC/MCWin64EH.h                        |    2 +-
 include/llvm/MC/MCWinCOFFObjectWriter.h            |    2 +-
 include/llvm/MC/MCWinCOFFStreamer.h                |    2 +-
 include/llvm/MC/MCWinEH.h                          |    4 +-
 include/llvm/MC/MachineLocation.h                  |    2 +-
 include/llvm/MC/StringTableBuilder.h               |    2 +-
 include/llvm/MC/YAML.h                             |    4 +-
 include/llvm/Object/Archive.h                      |    4 +-
 include/llvm/Object/ArchiveWriter.h                |    2 +-
 include/llvm/Object/Binary.h                       |    4 +-
 include/llvm/Object/COFF.h                         |   70 +-
 include/llvm/Object/COFFYAML.h                     |    6 +-
 include/llvm/Object/ELF.h                          |  470 +-
 include/llvm/Object/ELFObjectFile.h                |  589 ++-
 include/llvm/Object/ELFTypes.h                     |  125 +-
 include/llvm/Object/Error.h                        |    2 +
 include/llvm/Object/IRObjectFile.h                 |    4 +-
 include/llvm/Object/MachO.h                        |   34 +-
 include/llvm/Object/MachOUniversal.h               |    6 +-
 include/llvm/Object/ObjectFile.h                   |  134 +-
 include/llvm/Object/RelocVisitor.h                 |   16 +-
 include/llvm/Object/StackMapParser.h               |  442 ++
 include/llvm/Object/SymbolSize.h                   |   23 +
 include/llvm/Object/SymbolicFile.h                 |    6 +-
 include/llvm/Option/Arg.h                          |    5 +-
 include/llvm/Option/ArgList.h                      |   51 +-
 include/llvm/Option/OptSpecifier.h                 |    4 +-
 include/llvm/Option/OptTable.h                     |   12 +-
 include/llvm/Pass.h                                |    2 +-
 include/llvm/PassAnalysisSupport.h                 |   77 +-
 include/llvm/PassInfo.h                            |    2 +-
 include/llvm/PassRegistry.h                        |    2 +-
 include/llvm/PassSupport.h                         |    2 +-
 include/llvm/Passes/PassBuilder.h                  |    2 +-
 include/llvm/ProfileData/CoverageMapping.h         |    2 +-
 include/llvm/ProfileData/CoverageMappingReader.h   |    2 +-
 include/llvm/ProfileData/InstrProf.h               |   13 +
 include/llvm/ProfileData/InstrProfReader.h         |   50 +-
 include/llvm/Support/ARMEHABI.h                    |    6 +-
 include/llvm/Support/ARMWinEH.h                    |    6 +-
 include/llvm/Support/ArrayRecycler.h               |    2 +-
 include/llvm/Support/Atomic.h                      |    4 +-
 include/llvm/Support/BlockFrequency.h              |    2 +-
 include/llvm/Support/BranchProbability.h           |    2 +-
 include/llvm/Support/COM.h                         |    4 +-
 include/llvm/Support/Casting.h                     |    2 +-
 include/llvm/Support/CodeGen.h                     |    2 +-
 include/llvm/Support/CrashRecoveryContext.h        |    2 +-
 include/llvm/Support/DOTGraphTraits.h              |    2 +-
 include/llvm/Support/DataStream.h                  |    2 +-
 include/llvm/Support/Debug.h                       |    2 +-
 include/llvm/Support/Dwarf.h                       |    5 +
 include/llvm/Support/DynamicLibrary.h              |    4 +-
 include/llvm/Support/ELF.h                         |    8 +-
 include/llvm/Support/Errc.h                        |    2 +-
 include/llvm/Support/ErrorHandling.h               |    2 +-
 include/llvm/Support/FileSystem.h                  |    4 +-
 include/llvm/Support/FileUtilities.h               |    2 +-
 include/llvm/Support/FormattedStream.h             |    2 +-
 include/llvm/Support/GCOV.h                        |    2 +-
 include/llvm/Support/GenericDomTree.h              |    2 +-
 include/llvm/Support/GenericDomTreeConstruction.h  |    2 +-
 include/llvm/Support/GraphWriter.h                 |    4 +-
 include/llvm/Support/Host.h                        |    4 +-
 include/llvm/Support/LineIterator.h                |    2 +-
 include/llvm/Support/MD5.h                         |    2 +-
 include/llvm/Support/ManagedStatic.h               |    2 +-
 include/llvm/Support/MathExtras.h                  |    2 +-
 include/llvm/Support/Memory.h                      |    4 +-
 include/llvm/Support/MemoryObject.h                |    2 +-
 include/llvm/Support/MipsABIFlags.h                |    4 +-
 include/llvm/Support/Mutex.h                       |    4 +-
 include/llvm/Support/MutexGuard.h                  |    2 +-
 include/llvm/Support/PluginLoader.h                |    2 +-
 include/llvm/Support/Process.h                     |    4 +-
 include/llvm/Support/Program.h                     |    4 +-
 include/llvm/Support/RWMutex.h                     |    4 +-
 include/llvm/Support/RandomNumberGenerator.h       |    2 +-
 include/llvm/Support/Recycler.h                    |    2 +-
 include/llvm/Support/RecyclingAllocator.h          |    2 +-
 include/llvm/Support/Regex.h                       |    2 +-
 include/llvm/Support/Registry.h                    |    2 +-
 include/llvm/Support/Signals.h                     |    4 +-
 include/llvm/Support/SourceMgr.h                   |    2 +-
 include/llvm/Support/StreamingMemoryObject.h       |    2 +-
 include/llvm/Support/StringPool.h                  |    2 +-
 include/llvm/Support/StringSaver.h                 |    2 +-
 include/llvm/Support/SystemUtils.h                 |    2 +-
 include/llvm/Support/TargetParser.h                |   15 +
 include/llvm/Support/TargetRegistry.h              |    2 +-
 include/llvm/Support/TargetSelect.h                |    2 +-
 include/llvm/Support/ThreadLocal.h                 |    4 +-
 include/llvm/Support/Threading.h                   |    2 +-
 include/llvm/Support/TimeValue.h                   |    4 +-
 include/llvm/Support/Timer.h                       |    2 +-
 include/llvm/Support/ToolOutputFile.h              |    2 +-
 include/llvm/Support/UniqueLock.h                  |    2 +-
 include/llvm/Support/Valgrind.h                    |    4 +-
 include/llvm/Support/Watchdog.h                    |    4 +-
 include/llvm/Support/circular_raw_ostream.h        |    2 +-
 include/llvm/Support/raw_os_ostream.h              |    2 +-
 include/llvm/Support/raw_ostream.h                 |    2 +-
 include/llvm/Support/type_traits.h                 |    2 +-
 include/llvm/TableGen/Error.h                      |    2 +-
 include/llvm/TableGen/Record.h                     |   12 +-
 include/llvm/TableGen/StringMatcher.h              |    2 +-
 include/llvm/Target/TargetCallingConv.h            |    4 +-
 include/llvm/Target/TargetFrameLowering.h          |    2 +-
 include/llvm/Target/TargetInstrInfo.h              |    2 +-
 include/llvm/Target/TargetIntrinsicInfo.h          |    2 +-
 include/llvm/Target/TargetLowering.h               |   42 +-
 include/llvm/Target/TargetLoweringObjectFile.h     |    9 +-
 include/llvm/Target/TargetMachine.h                |    2 +-
 include/llvm/Target/TargetOptions.h                |   11 +-
 include/llvm/Target/TargetRecip.h                  |    2 +-
 include/llvm/Target/TargetRegisterInfo.h           |    6 +-
 include/llvm/Target/TargetSelectionDAG.td          |    1 +
 include/llvm/Target/TargetSelectionDAGInfo.h       |    2 +-
 include/llvm/Target/TargetSubtargetInfo.h          |    2 +-
 include/llvm/Transforms/IPO.h                      |    2 +-
 include/llvm/Transforms/IPO/InlinerPass.h          |    2 +-
 include/llvm/Transforms/InstCombine/InstCombine.h  |    2 +-
 include/llvm/Transforms/Instrumentation.h          |    2 +-
 include/llvm/Transforms/ObjCARC.h                  |    2 +-
 include/llvm/Transforms/Scalar.h                   |    2 +-
 include/llvm/Transforms/Scalar/EarlyCSE.h          |    2 +-
 .../llvm/Transforms/Scalar/LowerExpectIntrinsic.h  |    2 +-
 include/llvm/Transforms/Scalar/SimplifyCFG.h       |    2 +-
 .../llvm/Transforms/Utils/ASanStackFrameLayout.h   |    2 +-
 include/llvm/Transforms/Utils/BasicBlockUtils.h    |    8 +-
 include/llvm/Transforms/Utils/BuildLibCalls.h      |    2 +-
 include/llvm/Transforms/Utils/Cloning.h            |    2 +-
 include/llvm/Transforms/Utils/CodeExtractor.h      |    2 +-
 include/llvm/Transforms/Utils/CtorUtils.h          |    2 +-
 include/llvm/Transforms/Utils/GlobalStatus.h       |    2 +-
 include/llvm/Transforms/Utils/IntegerDivision.h    |    2 +-
 include/llvm/Transforms/Utils/Local.h              |    2 +-
 include/llvm/Transforms/Utils/LoopUtils.h          |    2 +-
 include/llvm/Transforms/Utils/ModuleUtils.h        |    2 +-
 include/llvm/Transforms/Utils/PromoteMemToReg.h    |    2 +-
 include/llvm/Transforms/Utils/SSAUpdater.h         |    2 +-
 include/llvm/Transforms/Utils/SSAUpdaterImpl.h     |    2 +-
 include/llvm/Transforms/Utils/SimplifyLibCalls.h   |    2 +-
 include/llvm/Transforms/Utils/SymbolRewriter.h     |    4 +-
 .../llvm/Transforms/Utils/UnifyFunctionExitNodes.h |    2 +-
 include/llvm/Transforms/Utils/UnrollLoop.h         |    2 +-
 include/llvm/Transforms/Utils/ValueMapper.h        |    2 +-
 include/llvm/Transforms/Utils/VectorUtils.h        |  205 -
 include/llvm/Transforms/Vectorize.h                |    2 +-
 include/llvm/module.modulemap                      |    1 +
 lib/Analysis/AliasAnalysis.cpp                     |    4 +-
 lib/Analysis/AliasAnalysisCounter.cpp              |    7 +-
 lib/Analysis/AliasAnalysisEvaluator.cpp            |   26 +-
 lib/Analysis/AliasDebugger.cpp                     |    2 +-
 lib/Analysis/AliasSetTracker.cpp                   |   58 +-
 lib/Analysis/BasicAliasAnalysis.cpp                |   75 +-
 lib/Analysis/BlockFrequencyInfoImpl.cpp            |    2 +-
 lib/Analysis/CFG.cpp                               |   16 +-
 lib/Analysis/CFGPrinter.cpp                        |    8 +-
 lib/Analysis/CFLAliasAnalysis.cpp                  |   18 +-
 lib/Analysis/CMakeLists.txt                        |    1 +
 lib/Analysis/CaptureTracking.cpp                   |  129 +-
 lib/Analysis/Delinearization.cpp                   |    2 +-
 lib/Analysis/DependenceAnalysis.cpp                |   25 +-
 lib/Analysis/DivergenceAnalysis.cpp                |    2 +-
 lib/Analysis/DomPrinter.cpp                        |    2 +-
 lib/Analysis/IPA/CallGraphSCCPass.cpp              |    2 +-
 lib/Analysis/IPA/CallPrinter.cpp                   |    2 +-
 lib/Analysis/IPA/GlobalsModRef.cpp                 |    6 +-
 lib/Analysis/IPA/InlineCost.cpp                    |   82 +-
 lib/Analysis/InstCount.cpp                         |    2 +-
 lib/Analysis/LazyValueInfo.cpp                     |    4 +-
 lib/Analysis/Lint.cpp                              |    9 +-
 lib/Analysis/Loads.cpp                             |   26 +-
 lib/Analysis/LoopAccessAnalysis.cpp                |   52 +-
 lib/Analysis/LoopPass.cpp                          |    2 +-
 lib/Analysis/MemDepPrinter.cpp                     |    2 +-
 lib/Analysis/MemDerefPrinter.cpp                   |    2 +-
 lib/Analysis/MemoryDependenceAnalysis.cpp          |   18 +-
 lib/Analysis/ModuleDebugInfoPrinter.cpp            |    2 +-
 lib/Analysis/RegionPrinter.cpp                     |    2 +-
 lib/Analysis/ScalarEvolution.cpp                   |   54 +-
 lib/Analysis/ScalarEvolutionAliasAnalysis.cpp      |    5 +-
 lib/Analysis/ScalarEvolutionExpander.cpp           |   21 +-
 lib/Analysis/ScopedNoAliasAA.cpp                   |    4 +-
 lib/Analysis/StratifiedSets.h                      |    2 +-
 lib/Analysis/TargetTransformInfo.cpp               |    5 +
 lib/Analysis/TypeBasedAliasAnalysis.cpp            |    7 +-
 lib/Analysis/VectorUtils.cpp                       |  213 +
 lib/AsmParser/LLLexer.cpp                          |   15 +-
 lib/AsmParser/LLParser.cpp                         |   27 +
 lib/AsmParser/LLParser.h                           |   11 +-
 lib/AsmParser/Parser.cpp                           |   20 +-
 lib/Bitcode/Reader/BitcodeReader.cpp               |   81 +-
 lib/Bitcode/Writer/BitcodeWriter.cpp               |   15 +-
 lib/Bitcode/Writer/BitcodeWriterPass.cpp           |    2 +-
 lib/Bitcode/Writer/ValueEnumerator.cpp             |    6 +-
 lib/Bitcode/Writer/ValueEnumerator.h               |    2 +-
 lib/CodeGen/AggressiveAntiDepBreaker.h             |    8 +-
 lib/CodeGen/AllocationOrder.h                      |    2 +-
 lib/CodeGen/AntiDepBreaker.h                       |    4 +-
 lib/CodeGen/AsmPrinter/AddressPool.h               |    2 +-
 lib/CodeGen/AsmPrinter/AsmPrinter.cpp              |   21 +-
 lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp         |    7 +-
 lib/CodeGen/AsmPrinter/ByteStreamer.h              |    2 +-
 lib/CodeGen/AsmPrinter/CMakeLists.txt              |    1 +
 lib/CodeGen/AsmPrinter/DIE.cpp                     |   38 +-
 lib/CodeGen/AsmPrinter/DIEHash.cpp                 |   10 +-
 lib/CodeGen/AsmPrinter/DIEHash.h                   |    4 +-
 lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h |    2 +-
 lib/CodeGen/AsmPrinter/DebugLocEntry.h             |    4 +-
 lib/CodeGen/AsmPrinter/DebugLocStream.cpp          |   46 +
 lib/CodeGen/AsmPrinter/DebugLocStream.h            |   70 +-
 lib/CodeGen/AsmPrinter/DwarfAccelTable.h           |    2 +-
 lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp        |  113 +-
 lib/CodeGen/AsmPrinter/DwarfCompileUnit.h          |   35 +-
 lib/CodeGen/AsmPrinter/DwarfDebug.cpp              |   50 +-
 lib/CodeGen/AsmPrinter/DwarfDebug.h                |  108 +-
 lib/CodeGen/AsmPrinter/DwarfException.h            |    6 +-
 lib/CodeGen/AsmPrinter/DwarfExpression.h           |    2 +-
 lib/CodeGen/AsmPrinter/DwarfFile.cpp               |    6 +-
 lib/CodeGen/AsmPrinter/DwarfFile.h                 |    2 +-
 lib/CodeGen/AsmPrinter/DwarfStringPool.h           |    2 +-
 lib/CodeGen/AsmPrinter/DwarfUnit.cpp               |  117 +-
 lib/CodeGen/AsmPrinter/DwarfUnit.h                 |   15 +-
 lib/CodeGen/AsmPrinter/EHStreamer.h                |    4 +-
 lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp          |    2 +-
 lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp   |    2 +-
 lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h     |    2 +-
 lib/CodeGen/AsmPrinter/WinException.cpp            |   36 +-
 lib/CodeGen/AsmPrinter/WinException.h              |    9 +-
 lib/CodeGen/AtomicExpandPass.cpp                   |    2 +-
 lib/CodeGen/BranchFolding.cpp                      |   81 +-
 lib/CodeGen/BranchFolding.h                        |    4 +-
 lib/CodeGen/CMakeLists.txt                         |    1 +
 lib/CodeGen/CodeGenPrepare.cpp                     |    2 +-
 lib/CodeGen/CoreCLRGC.cpp                          |    2 +-
 lib/CodeGen/CriticalAntiDepBreaker.h               |    4 +-
 lib/CodeGen/DFAPacketizer.cpp                      |    2 +-
 lib/CodeGen/DeadMachineInstructionElim.cpp         |    2 +-
 lib/CodeGen/EdgeBundles.cpp                        |    2 +-
 lib/CodeGen/ExecutionDepsFix.cpp                   |    4 +-
 lib/CodeGen/FaultMaps.cpp                          |   36 +
 lib/CodeGen/GCMetadata.cpp                         |    2 +-
 lib/CodeGen/GCRootLowering.cpp                     |    2 +-
 lib/CodeGen/IfConversion.cpp                       |  293 +-
 lib/CodeGen/ImplicitNullChecks.cpp                 |    9 +-
 lib/CodeGen/InlineSpiller.cpp                      |    4 +-
 lib/CodeGen/InterferenceCache.h                    |    2 +-
 lib/CodeGen/InterleavedAccessPass.cpp              |  286 ++
 lib/CodeGen/LiveDebugVariables.h                   |    2 +-
 lib/CodeGen/LivePhysRegs.cpp                       |   41 +
 lib/CodeGen/MIRParser/CMakeLists.txt               |    2 +
 lib/CodeGen/MIRParser/MILexer.cpp                  |  199 +
 lib/CodeGen/MIRParser/MILexer.h                    |   96 +
 lib/CodeGen/MIRParser/MIParser.cpp                 |  423 ++
 lib/CodeGen/MIRParser/MIParser.h                   |   41 +
 lib/CodeGen/MIRParser/MIRParser.cpp                |   99 +-
 lib/CodeGen/MIRPrinter.cpp                         |  163 +-
 lib/CodeGen/MachineBasicBlock.cpp                  |   48 +-
 lib/CodeGen/MachineBlockPlacement.cpp              |    6 +-
 lib/CodeGen/MachineCombiner.cpp                    |   51 +-
 lib/CodeGen/MachineCopyPropagation.cpp             |    2 +-
 lib/CodeGen/MachineFunction.cpp                    |    7 +-
 lib/CodeGen/MachineFunctionPrinterPass.cpp         |    2 +-
 lib/CodeGen/MachineInstr.cpp                       |   90 +-
 lib/CodeGen/MachineModuleInfo.cpp                  |  129 +-
 lib/CodeGen/MachineSSAUpdater.cpp                  |    2 +-
 lib/CodeGen/MachineScheduler.cpp                   |    6 +-
 lib/CodeGen/MachineTraceMetrics.cpp                |   22 +-
 lib/CodeGen/MachineVerifier.cpp                    |    4 +-
 lib/CodeGen/OptimizePHIs.cpp                       |    2 +-
 lib/CodeGen/PHIElimination.cpp                     |    2 +-
 lib/CodeGen/PeepholeOptimizer.cpp                  |    2 +-
 lib/CodeGen/PostRASchedulerList.cpp                |    2 +-
 lib/CodeGen/RegAllocFast.cpp                       |    2 +-
 lib/CodeGen/RegisterCoalescer.cpp                  |    3 +-
 lib/CodeGen/RegisterCoalescer.h                    |    2 +-
 lib/CodeGen/ScheduleDAGInstrs.cpp                  |    6 +-
 lib/CodeGen/ScheduleDAGPrinter.cpp                 |    2 +-
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp           |   87 +-
 lib/CodeGen/SelectionDAG/FastISel.cpp              |   23 +-
 lib/CodeGen/SelectionDAG/InstrEmitter.cpp          |    2 +
 lib/CodeGen/SelectionDAG/InstrEmitter.h            |    4 +-
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp           |   27 +-
 lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp  |    6 +-
 lib/CodeGen/SelectionDAG/LegalizeTypes.cpp         |    2 +-
 lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp     |    6 +-
 lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp   |    3 +-
 lib/CodeGen/SelectionDAG/SDNodeDbgValue.h          |    2 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp       |    3 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp     |   55 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp    |    6 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h      |    3 +-
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp          |   51 +-
 lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp   |   40 +-
 lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp    |   21 +-
 lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp      |   23 +-
 lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp   |    2 +-
 lib/CodeGen/SelectionDAG/StatepointLowering.cpp    |    6 +-
 lib/CodeGen/SelectionDAG/TargetLowering.cpp        |   45 +-
 lib/CodeGen/ShadowStackGCLowering.cpp              |    4 +-
 lib/CodeGen/Spiller.h                              |    2 +-
 lib/CodeGen/SplitKit.h                             |    6 +-
 lib/CodeGen/StatepointExampleGC.cpp                |    2 +-
 lib/CodeGen/TailDuplication.cpp                    |    9 +-
 lib/CodeGen/TargetLoweringBase.cpp                 |   15 +-
 lib/CodeGen/TargetLoweringObjectFileImpl.cpp       |   53 +-
 lib/CodeGen/TargetOptionsImpl.cpp                  |    7 -
 lib/CodeGen/TwoAddressInstructionPass.cpp          |   20 +-
 lib/CodeGen/UnreachableBlockElim.cpp               |    2 +-
 lib/CodeGen/WinEHPrepare.cpp                       |   92 +-
 lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp      |    2 +-
 lib/DebugInfo/DWARF/DWARFContext.cpp               |   11 +-
 lib/DebugInfo/DWARF/DWARFFormValue.cpp             |    2 +-
 lib/DebugInfo/DWARF/SyntaxHighlighting.h           |    6 +-
 lib/DebugInfo/PDB/PDBSymbolFunc.cpp                |    2 +-
 lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp     |    2 +-
 lib/ExecutionEngine/ExecutionEngine.cpp            |    4 +-
 .../IntelJITEvents/IntelJITEventListener.cpp       |   30 +-
 lib/ExecutionEngine/Interpreter/Interpreter.h      |    2 +-
 lib/ExecutionEngine/MCJIT/MCJIT.cpp                |    9 +-
 lib/ExecutionEngine/MCJIT/MCJIT.h                  |    2 +-
 .../OProfileJIT/OProfileJITEventListener.cpp       |   22 +-
 lib/ExecutionEngine/Orc/CMakeLists.txt             |    1 +
 lib/ExecutionEngine/Orc/NullResolver.cpp           |   27 +
 lib/ExecutionEngine/Orc/OrcMCJITReplacement.h      |    3 +-
 lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp    |   50 +-
 .../RuntimeDyld/RuntimeDyldCOFF.cpp                |   21 +-
 .../RuntimeDyld/RuntimeDyldChecker.cpp             |    2 +-
 .../RuntimeDyld/RuntimeDyldCheckerImpl.h           |    2 +-
 lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp |   44 +-
 .../RuntimeDyld/RuntimeDyldMachO.cpp               |   19 +-
 lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h |    3 +-
 .../RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h    |   12 +-
 .../RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h  |    2 +-
 .../RuntimeDyld/Targets/RuntimeDyldMachOARM.h      |    5 +-
 .../RuntimeDyld/Targets/RuntimeDyldMachOI386.h     |   15 +-
 .../RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h   |    2 +-
 lib/IR/AsmWriter.cpp                               |  176 +-
 lib/IR/AttributeImpl.h                             |    2 +-
 lib/IR/ConstantFold.h                              |    2 +-
 lib/IR/Constants.cpp                               |  199 +-
 lib/IR/DIBuilder.cpp                               |   91 +-
 lib/IR/DebugInfo.cpp                               |    4 +
 lib/IR/DebugInfoMetadata.cpp                       |   17 +
 lib/IR/DiagnosticInfo.cpp                          |    2 +-
 lib/IR/GCOV.cpp                                    |    2 +-
 lib/IR/Globals.cpp                                 |   30 +-
 lib/IR/IRPrintingPasses.cpp                        |    2 +-
 lib/IR/Instruction.cpp                             |   17 +-
 lib/IR/Instructions.cpp                            |  103 +-
 lib/IR/LLVMContextImpl.cpp                         |    4 +-
 lib/IR/LLVMContextImpl.h                           |   31 +-
 lib/IR/LegacyPassManager.cpp                       |   10 +-
 lib/IR/Mangler.cpp                                 |   54 +-
 lib/IR/Operator.cpp                                |    2 +-
 lib/IR/Pass.cpp                                    |    2 +-
 lib/IR/SymbolTableListTraitsImpl.h                 |    2 +-
 lib/IR/Use.cpp                                     |    2 +-
 lib/IR/User.cpp                                    |    2 +-
 lib/IR/Value.cpp                                   |    2 +-
 lib/IR/Verifier.cpp                                |  184 +-
 lib/LTO/LTOCodeGenerator.cpp                       |    2 +-
 lib/LTO/LTOModule.cpp                              |   24 +-
 lib/LibDriver/LibDriver.cpp                        |   33 +-
 lib/Linker/LinkModules.cpp                         |    8 +-
 lib/MC/ELFObjectWriter.cpp                         |   15 +-
 lib/MC/MCAssembler.cpp                             |   22 +-
 lib/MC/MCDisassembler/MCExternalSymbolizer.cpp     |    2 +-
 lib/MC/MCDwarf.cpp                                 |    4 +-
 lib/MC/MCExpr.cpp                                  |    2 +-
 lib/MC/MCNullStreamer.cpp                          |    2 +-
 lib/MC/MCObjectFileInfo.cpp                        |    5 +
 lib/MC/MCObjectStreamer.cpp                        |   11 +-
 lib/MC/MCParser/AsmParser.cpp                      |  207 +-
 lib/MC/MCParser/COFFAsmParser.cpp                  |    2 +-
 lib/MC/MCParser/ELFAsmParser.cpp                   |   16 +-
 lib/MC/MCSymbol.cpp                                |    9 +-
 lib/MC/MCSymbolELF.cpp                             |    4 +-
 lib/MC/MCWin64EH.cpp                               |    2 +-
 lib/MC/MCWinEH.cpp                                 |    4 +-
 lib/MC/WinCOFFObjectWriter.cpp                     |    2 +-
 lib/MC/WinCOFFStreamer.cpp                         |    2 +-
 lib/Object/CMakeLists.txt                          |    1 +
 lib/Object/COFFObjectFile.cpp                      |  166 +-
 lib/Object/COFFYAML.cpp                            |    6 +-
 lib/Object/ELFYAML.cpp                             |    4 +-
 lib/Object/Error.cpp                               |    4 +
 lib/Object/IRObjectFile.cpp                        |    4 +-
 lib/Object/MachOObjectFile.cpp                     |  169 +-
 lib/Object/MachOUniversal.cpp                      |   20 +-
 lib/Object/Object.cpp                              |   32 +-
 lib/Object/ObjectFile.cpp                          |   13 +-
 lib/Object/RecordStreamer.h                        |    2 +-
 lib/Object/SymbolSize.cpp                          |  100 +
 lib/Option/ArgList.cpp                             |   17 +-
 lib/Option/OptTable.cpp                            |   27 +-
 lib/ProfileData/CoverageMapping.cpp                |    6 +-
 lib/ProfileData/CoverageMappingReader.cpp          |    8 +-
 lib/ProfileData/CoverageMappingWriter.cpp          |    2 +-
 lib/ProfileData/InstrProf.cpp                      |    2 +-
 lib/ProfileData/InstrProfIndexed.h                 |    2 +-
 lib/ProfileData/InstrProfReader.cpp                |  112 +-
 lib/ProfileData/InstrProfWriter.cpp                |    2 +-
 lib/ProfileData/SampleProf.cpp                     |    2 +-
 lib/Support/APFloat.cpp                            |    4 +-
 lib/Support/APInt.cpp                              |    2 +-
 lib/Support/APSInt.cpp                             |   19 +
 lib/Support/ARMBuildAttrs.cpp                      |    6 +-
 lib/Support/ARMWinEH.cpp                           |    6 +-
 lib/Support/Allocator.cpp                          |    2 +-
 lib/Support/CMakeLists.txt                         |    2 +-
 lib/Support/CommandLine.cpp                        |    4 +-
 lib/Support/CrashRecoveryContext.cpp               |    2 +-
 lib/Support/DAGDeltaAlgorithm.cpp                  |    2 +-
 lib/Support/DataStream.cpp                         |    2 +-
 lib/Support/Debug.cpp                              |    2 +-
 lib/Support/Dwarf.cpp                              |    3 +
 lib/Support/FileOutputBuffer.cpp                   |    2 +-
 lib/Support/GraphWriter.cpp                        |    2 +-
 lib/Support/LockFileManager.cpp                    |  111 +-
 lib/Support/MD5.cpp                                |    2 +-
 lib/Support/MathExtras.cpp                         |    2 +-
 lib/Support/MemoryBuffer.cpp                       |    4 +-
 lib/Support/Mutex.cpp                              |    2 +-
 lib/Support/RWMutex.cpp                            |    2 +-
 lib/Support/Statistic.cpp                          |    2 +-
 lib/Support/StreamingMemoryObject.cpp              |    2 +-
 lib/Support/TargetParser.cpp                       |   58 +-
 lib/Support/TimeValue.cpp                          |    2 +-
 lib/Support/Timer.cpp                              |    4 +-
 lib/Support/Triple.cpp                             |   14 +
 lib/Support/Unix/Process.inc                       |    2 +-
 lib/Support/Unix/Program.inc                       |    4 +-
 lib/Support/Unix/ThreadLocal.inc                   |    2 +-
 lib/Support/Unix/TimeValue.inc                     |    2 +-
 lib/Support/Unix/Watchdog.inc                      |    4 +-
 lib/Support/Windows/DynamicLibrary.inc             |   30 +-
 lib/Support/Windows/Signals.inc                    |   63 +-
 lib/Support/YAMLParser.cpp                         |    8 +-
 lib/TableGen/Record.cpp                            |   10 +-
 lib/TableGen/TGLexer.h                             |    2 +-
 lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp   |    2 +-
 lib/Target/AArch64/AArch64BranchRelaxation.cpp     |    2 +-
 lib/Target/AArch64/AArch64CallingConvention.h      |    2 +-
 .../AArch64/AArch64CleanupLocalDynamicTLSPass.cpp  |    2 +-
 lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp    |    2 +-
 lib/Target/AArch64/AArch64FastISel.cpp             |   33 +-
 lib/Target/AArch64/AArch64FrameLowering.h          |    2 +-
 lib/Target/AArch64/AArch64ISelLowering.cpp         |  157 +-
 lib/Target/AArch64/AArch64ISelLowering.h           |    9 +
 lib/Target/AArch64/AArch64InstrFormats.td          |   49 +-
 lib/Target/AArch64/AArch64InstrInfo.cpp            |   22 +-
 lib/Target/AArch64/AArch64InstrInfo.td             |    8 +-
 lib/Target/AArch64/AArch64MCInstLower.cpp          |    3 +
 lib/Target/AArch64/AArch64MCInstLower.h            |    2 +-
 lib/Target/AArch64/AArch64MachineFunctionInfo.h    |    2 +-
 lib/Target/AArch64/AArch64PBQPRegAlloc.cpp         |    2 +-
 lib/Target/AArch64/AArch64PBQPRegAlloc.h           |    2 +-
 lib/Target/AArch64/AArch64SelectionDAGInfo.h       |    2 +-
 lib/Target/AArch64/AArch64StorePairSuppress.cpp    |    2 +-
 lib/Target/AArch64/AArch64Subtarget.h              |    2 +-
 lib/Target/AArch64/AArch64TargetMachine.cpp        |    4 +
 lib/Target/AArch64/AArch64TargetTransformInfo.cpp  |   20 +
 lib/Target/AArch64/AArch64TargetTransformInfo.h    |    5 +
 lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp  |   31 +
 .../AArch64/InstPrinter/AArch64InstPrinter.h       |    2 +-
 .../AArch64/MCTargetDesc/AArch64AsmBackend.cpp     |    4 +-
 .../MCTargetDesc/AArch64ELFObjectWriter.cpp        |    2 +-
 .../AArch64/MCTargetDesc/AArch64ELFStreamer.cpp    |   14 +-
 .../AArch64/MCTargetDesc/AArch64MCTargetDesc.h     |    2 +-
 .../MCTargetDesc/AArch64MachObjectWriter.cpp       |    2 +-
 lib/Target/AArch64/Utils/AArch64BaseInfo.h         |   22 +-
 lib/Target/AMDGPU/AMDGPU.td                        |   13 +
 lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp             |  143 +-
 lib/Target/AMDGPU/AMDGPUAsmPrinter.h               |    4 +-
 lib/Target/AMDGPU/AMDGPUISelLowering.cpp           |    2 +-
 lib/Target/AMDGPU/AMDGPUInstrInfo.h                |    2 +-
 lib/Target/AMDGPU/AMDGPUMachineFunction.h          |    2 +-
 lib/Target/AMDGPU/AMDGPUSubtarget.cpp              |    6 +
 lib/Target/AMDGPU/AMDGPUSubtarget.h                |   20 +
 lib/Target/AMDGPU/AMDKernelCodeT.h                 |  121 +-
 lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp    |  314 +-
 lib/Target/AMDGPU/AsmParser/LLVMBuild.txt          |    2 +-
 lib/Target/AMDGPU/CMakeLists.txt                   |    1 +
 lib/Target/AMDGPU/LLVMBuild.txt                    |    4 +-
 .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp       |   12 +-
 .../AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp  |   11 +-
 lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h  |    4 +-
 .../AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp     |   22 +
 .../AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h       |    5 +-
 .../AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp   |  297 ++
 .../AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h     |   77 +
 lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt      |    1 +
 lib/Target/AMDGPU/Makefile                         |    2 +-
 lib/Target/AMDGPU/Processors.td                    |   15 +-
 lib/Target/AMDGPU/R600Defines.h                    |    4 +-
 lib/Target/AMDGPU/R600ISelLowering.h               |    2 +-
 lib/Target/AMDGPU/R600InstrInfo.cpp                |   11 +-
 lib/Target/AMDGPU/R600InstrInfo.h                  |    2 +-
 lib/Target/AMDGPU/R600MachineFunctionInfo.h        |    2 +-
 lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp  |    2 +-
 .../AMDGPU/R600TextureIntrinsicsReplacer.cpp       |    2 +-
 lib/Target/AMDGPU/SIDefines.h                      |   31 +-
 lib/Target/AMDGPU/SIISelLowering.cpp               |   11 +-
 lib/Target/AMDGPU/SIInstrInfo.cpp                  |   27 +-
 lib/Target/AMDGPU/SIInstrInfo.h                    |    2 +-
 lib/Target/AMDGPU/SIInstrInfo.td                   |    2 +-
 lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp         |   60 +
 lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h           |   34 +
 lib/Target/AMDGPU/Utils/CMakeLists.txt             |    3 +
 lib/Target/AMDGPU/Utils/LLVMBuild.txt              |   23 +
 lib/Target/AMDGPU/Utils/Makefile                   |   16 +
 lib/Target/ARM/ARM.h                               |    2 +-
 lib/Target/ARM/ARM.td                              |    4 +-
 lib/Target/ARM/ARMAsmPrinter.cpp                   |   10 +-
 lib/Target/ARM/ARMBaseInstrInfo.cpp                |   14 +-
 lib/Target/ARM/ARMBaseInstrInfo.h                  |    2 +-
 lib/Target/ARM/ARMCallingConv.h                    |    2 +-
 lib/Target/ARM/ARMConstantIslandPass.cpp           |    2 +-
 lib/Target/ARM/ARMConstantPoolValue.h              |    4 +-
 lib/Target/ARM/ARMExpandPseudoInsts.cpp            |    4 +-
 lib/Target/ARM/ARMFastISel.cpp                     |    2 +-
 lib/Target/ARM/ARMFeatures.h                       |    2 +-
 lib/Target/ARM/ARMFrameLowering.cpp                |    2 +-
 lib/Target/ARM/ARMFrameLowering.h                  |    2 +-
 lib/Target/ARM/ARMISelDAGToDAG.cpp                 |    2 +-
 lib/Target/ARM/ARMISelLowering.cpp                 |  163 +-
 lib/Target/ARM/ARMISelLowering.h                   |   13 +-
 lib/Target/ARM/ARMInstrInfo.cpp                    |    2 +-
 lib/Target/ARM/ARMInstrInfo.h                      |    2 +-
 lib/Target/ARM/ARMLoadStoreOptimizer.cpp           |  231 +-
 lib/Target/ARM/ARMMachineFunctionInfo.h            |    2 +-
 lib/Target/ARM/ARMOptimizeBarriersPass.cpp         |    2 +-
 lib/Target/ARM/ARMSelectionDAGInfo.h               |    2 +-
 lib/Target/ARM/ARMSubtarget.h                      |    2 +-
 lib/Target/ARM/ARMTargetMachine.cpp                |    4 +
 lib/Target/ARM/ARMTargetTransformInfo.cpp          |   25 +
 lib/Target/ARM/ARMTargetTransformInfo.h            |    5 +
 lib/Target/ARM/AsmParser/ARMAsmParser.cpp          |   20 +-
 lib/Target/ARM/Disassembler/ARMDisassembler.cpp    |    4 +-
 lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h  |    2 +-
 lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h     |    2 +-
 lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h          |    4 +-
 lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp |    2 +-
 lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp     |   55 +-
 lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h        |    4 +-
 lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp    |    2 +-
 lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h      |    2 +-
 .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp       |    2 +-
 lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp     |    2 +-
 .../ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp    |    2 +-
 lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp |    2 +-
 lib/Target/ARM/MLxExpansionPass.cpp                |    2 +-
 lib/Target/ARM/Thumb1FrameLowering.h               |    2 +-
 lib/Target/ARM/Thumb1InstrInfo.h                   |    2 +-
 lib/Target/ARM/Thumb2ITBlockPass.cpp               |    2 +-
 lib/Target/ARM/Thumb2InstrInfo.h                   |    2 +-
 lib/Target/ARM/Thumb2SizeReduction.cpp             |    2 +-
 lib/Target/ARM/ThumbRegisterInfo.h                 |    2 +-
 lib/Target/BPF/BPFAsmPrinter.cpp                   |    2 +-
 lib/Target/BPF/BPFFrameLowering.h                  |    2 +-
 lib/Target/BPF/BPFISelDAGToDAG.cpp                 |    2 +-
 lib/Target/BPF/BPFISelLowering.cpp                 |    2 +-
 lib/Target/BPF/BPFISelLowering.h                   |    2 +-
 lib/Target/BPF/BPFInstrInfo.h                      |    2 +-
 lib/Target/BPF/BPFMCInstLower.h                    |    2 +-
 lib/Target/BPF/BPFRegisterInfo.h                   |    2 +-
 lib/Target/BPF/BPFSubtarget.h                      |    2 +-
 lib/Target/BPF/BPFTargetMachine.cpp                |    2 +-
 lib/Target/BPF/BPFTargetMachine.h                  |    2 +-
 lib/Target/BPF/InstPrinter/BPFInstPrinter.h        |    2 +-
 lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp      |    2 +-
 lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp |    2 +-
 lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h         |    2 +-
 lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp   |    2 +-
 lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h      |    2 +-
 lib/Target/CppBackend/CPPBackend.cpp               |    3 +-
 lib/Target/CppBackend/CPPTargetMachine.h           |    2 +-
 .../Hexagon/Disassembler/HexagonDisassembler.cpp   |    2 +-
 lib/Target/Hexagon/Hexagon.h                       |    2 +-
 lib/Target/Hexagon/HexagonAsmPrinter.h             |    2 +-
 lib/Target/Hexagon/HexagonCFGOptimizer.cpp         |    2 +-
 lib/Target/Hexagon/HexagonExpandCondsets.cpp       |    2 +-
 lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp  |    2 +-
 lib/Target/Hexagon/HexagonFixupHwLoops.cpp         |    2 +-
 lib/Target/Hexagon/HexagonFrameLowering.cpp        |    2 +-
 lib/Target/Hexagon/HexagonFrameLowering.h          |    2 +-
 lib/Target/Hexagon/HexagonISelLowering.cpp         |    2 +-
 lib/Target/Hexagon/HexagonISelLowering.h           |    2 +-
 lib/Target/Hexagon/HexagonInstrInfo.h              |    2 +-
 lib/Target/Hexagon/HexagonMachineFunctionInfo.h    |    2 +-
 lib/Target/Hexagon/HexagonMachineScheduler.h       |    2 +-
 lib/Target/Hexagon/HexagonPeephole.cpp             |    2 +-
 lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp      |    2 +-
 lib/Target/Hexagon/HexagonSelectionDAGInfo.h       |    2 +-
 .../Hexagon/HexagonSplitConst32AndConst64.cpp      |    2 +-
 lib/Target/Hexagon/HexagonTargetMachine.cpp        |    2 +-
 lib/Target/Hexagon/HexagonTargetStreamer.h         |    2 +-
 lib/Target/Hexagon/HexagonVLIWPacketizer.cpp       |   19 +-
 .../MCTargetDesc/HexagonELFObjectWriter.cpp        |    2 +-
 .../Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp  |    2 +-
 .../Hexagon/MCTargetDesc/HexagonMCCompound.cpp     |   10 +-
 .../Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp    |    2 +-
 .../Hexagon/MCTargetDesc/HexagonMCInstrInfo.h      |    4 +-
 .../Hexagon/MCTargetDesc/HexagonMCShuffler.h       |    2 +-
 .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp   |   11 +-
 .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.h     |    2 +-
 .../Hexagon/MCTargetDesc/HexagonShuffler.cpp       |    3 +
 lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h  |    5 +-
 lib/Target/LLVMBuild.txt                           |    1 +
 lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h  |    2 +-
 lib/Target/MSP430/MSP430.h                         |    4 +-
 lib/Target/MSP430/MSP430BranchSelector.cpp         |    2 +-
 lib/Target/MSP430/MSP430FrameLowering.h            |    2 +-
 lib/Target/MSP430/MSP430ISelDAGToDAG.cpp           |    2 +-
 lib/Target/MSP430/MSP430ISelLowering.h             |    2 +-
 lib/Target/MSP430/MSP430InstrInfo.h                |    4 +-
 lib/Target/MSP430/MSP430MCInstLower.h              |    2 +-
 lib/Target/MSP430/MSP430MachineFunctionInfo.h      |    2 +-
 lib/Target/MSP430/MSP430SelectionDAGInfo.h         |    2 +-
 lib/Target/MSP430/MSP430Subtarget.h                |    2 +-
 lib/Target/Mips/AsmParser/MipsAsmParser.cpp        |  435 +-
 lib/Target/Mips/Disassembler/MipsDisassembler.cpp  |   19 +-
 .../Mips/MCTargetDesc/MipsABIFlagsSection.cpp      |    2 +-
 lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h |    2 +-
 lib/Target/Mips/MCTargetDesc/MipsABIInfo.h         |    2 +-
 lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp    |    6 -
 lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h      |    2 +-
 lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h        |    4 +-
 .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp      |    2 +-
 lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp |   15 +-
 lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h          |    2 +-
 lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h    |    2 +-
 .../Mips/MCTargetDesc/MipsNaClELFStreamer.cpp      |    2 +-
 lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp  |    3 +
 .../Mips/MCTargetDesc/MipsTargetStreamer.cpp       |   53 +-
 lib/Target/Mips/MicroMips32r6InstrFormats.td       |   47 +
 lib/Target/Mips/MicroMips32r6InstrInfo.td          |   23 +
 lib/Target/Mips/MicroMipsInstrInfo.td              |    4 +
 lib/Target/Mips/Mips.h                             |    2 +-
 lib/Target/Mips/Mips16FrameLowering.h              |    2 +-
 lib/Target/Mips/Mips16HardFloat.cpp                |    2 +-
 lib/Target/Mips/Mips16HardFloatInfo.cpp            |    4 +-
 lib/Target/Mips/Mips16HardFloatInfo.h              |    4 +-
 lib/Target/Mips/Mips16ISelDAGToDAG.h               |    2 +-
 lib/Target/Mips/Mips16ISelLowering.cpp             |    2 +-
 lib/Target/Mips/Mips16ISelLowering.h               |    2 +-
 lib/Target/Mips/Mips16InstrInfo.h                  |    2 +-
 lib/Target/Mips/Mips64InstrInfo.td                 |   16 +-
 lib/Target/Mips/MipsAnalyzeImmediate.h             |    2 +-
 lib/Target/Mips/MipsAsmPrinter.cpp                 |    3 +-
 lib/Target/Mips/MipsAsmPrinter.h                   |    2 +-
 lib/Target/Mips/MipsCCState.h                      |    2 +-
 lib/Target/Mips/MipsFastISel.cpp                   |   13 +-
 lib/Target/Mips/MipsFrameLowering.h                |    2 +-
 lib/Target/Mips/MipsISelDAGToDAG.h                 |    2 +-
 lib/Target/Mips/MipsISelLowering.h                 |    4 +-
 lib/Target/Mips/MipsInstrInfo.h                    |    2 +-
 lib/Target/Mips/MipsInstrInfo.td                   |   42 +-
 lib/Target/Mips/MipsMCInstLower.cpp                |    6 +
 lib/Target/Mips/MipsMCInstLower.h                  |    2 +-
 lib/Target/Mips/MipsModuleISelDAGToDAG.cpp         |    2 +-
 lib/Target/Mips/MipsOptionRecord.h                 |    2 +
 lib/Target/Mips/MipsOs16.cpp                       |    2 +-
 lib/Target/Mips/MipsRegisterInfo.td                |   16 +
 lib/Target/Mips/MipsSEFrameLowering.cpp            |    2 +-
 lib/Target/Mips/MipsSEFrameLowering.h              |    2 +-
 lib/Target/Mips/MipsSEISelDAGToDAG.h               |    2 +-
 lib/Target/Mips/MipsSEISelLowering.h               |    2 +-
 lib/Target/Mips/MipsSEInstrInfo.h                  |    2 +-
 lib/Target/Mips/MipsSelectionDAGInfo.h             |    2 +-
 lib/Target/Mips/MipsSubtarget.h                    |    2 +-
 lib/Target/Mips/MipsTargetMachine.h                |    2 +-
 lib/Target/Mips/MipsTargetStreamer.h               |   38 +-
 lib/Target/NVPTX/CMakeLists.txt                    |    1 +
 lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h    |    2 +-
 lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h      |    4 +-
 .../NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp       |    5 +-
 lib/Target/NVPTX/ManagedStringPool.h               |    2 +-
 lib/Target/NVPTX/NVPTX.h                           |   11 +-
 lib/Target/NVPTX/NVPTXAsmPrinter.cpp               |    4 +-
 lib/Target/NVPTX/NVPTXAsmPrinter.h                 |    2 +-
 lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp   |    2 +-
 .../NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp       |    2 +-
 lib/Target/NVPTX/NVPTXFrameLowering.cpp            |   46 +-
 lib/Target/NVPTX/NVPTXFrameLowering.h              |    2 +-
 lib/Target/NVPTX/NVPTXISelDAGToDAG.h               |    2 +-
 lib/Target/NVPTX/NVPTXISelLowering.cpp             |    7 +
 lib/Target/NVPTX/NVPTXISelLowering.h               |    2 +-
 lib/Target/NVPTX/NVPTXImageOptimizer.cpp           |    2 +-
 lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp          |    4 +
 lib/Target/NVPTX/NVPTXMachineFunctionInfo.h        |    2 +-
 lib/Target/NVPTX/NVPTXPeephole.cpp                 |  154 +
 lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp         |    2 +-
 lib/Target/NVPTX/NVPTXRegisterInfo.cpp             |    2 +-
 lib/Target/NVPTX/NVPTXRegisterInfo.td              |    2 +-
 lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp      |    2 +-
 lib/Target/NVPTX/NVPTXSubtarget.h                  |    2 +-
 lib/Target/NVPTX/NVPTXTargetMachine.cpp            |    4 +
 lib/Target/NVPTX/NVPTXUtilities.h                  |    2 +-
 lib/Target/NVPTX/NVVMReflect.cpp                   |    2 +-
 .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp    |    2 +-
 lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h    |    4 +-
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp       |    2 +-
 lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h  |    2 +-
 .../PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp   |    2 +-
 lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h    |    4 +-
 lib/Target/PowerPC/PPC.h                           |    2 +-
 lib/Target/PowerPC/PPCBranchSelector.cpp           |    2 +-
 lib/Target/PowerPC/PPCCallingConv.h                |    2 +-
 lib/Target/PowerPC/PPCEarlyReturn.cpp              |    2 +-
 lib/Target/PowerPC/PPCFastISel.cpp                 |    6 +-
 lib/Target/PowerPC/PPCFrameLowering.h              |    2 +-
 lib/Target/PowerPC/PPCISelDAGToDAG.cpp             |   25 +-
 lib/Target/PowerPC/PPCISelLowering.cpp             |  107 +-
 lib/Target/PowerPC/PPCISelLowering.h               |   11 +-
 lib/Target/PowerPC/PPCInstrAltivec.td              |   50 +
 lib/Target/PowerPC/PPCInstrBuilder.h               |    2 +-
 lib/Target/PowerPC/PPCInstrInfo.cpp                |   23 +-
 lib/Target/PowerPC/PPCInstrInfo.h                  |    2 +-
 lib/Target/PowerPC/PPCInstrVSX.td                  |   24 +-
 lib/Target/PowerPC/PPCLoopDataPrefetch.cpp         |    2 +-
 lib/Target/PowerPC/PPCLoopPreIncPrep.cpp           |    4 +-
 lib/Target/PowerPC/PPCMCInstLower.cpp              |    2 +-
 lib/Target/PowerPC/PPCSelectionDAGInfo.h           |    2 +-
 lib/Target/PowerPC/PPCSubtarget.h                  |    4 +-
 lib/Target/PowerPC/PPCTLSDynamicCall.cpp           |    2 +-
 lib/Target/PowerPC/PPCTOCRegDeps.cpp               |    2 +-
 lib/Target/PowerPC/PPCTargetStreamer.h             |    2 +-
 lib/Target/PowerPC/PPCVSXCopy.cpp                  |    2 +-
 lib/Target/PowerPC/PPCVSXFMAMutate.cpp             |    2 +-
 lib/Target/PowerPC/PPCVSXSwapRemoval.cpp           |   69 +-
 .../Sparc/Disassembler/SparcDisassembler.cpp       |    2 +-
 .../Sparc/MCTargetDesc/SparcELFObjectWriter.cpp    |    2 +-
 lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h    |    4 +-
 lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h  |    2 +-
 lib/Target/Sparc/Sparc.h                           |    4 +-
 lib/Target/Sparc/SparcFrameLowering.h              |    2 +-
 lib/Target/Sparc/SparcISelLowering.h               |    2 +-
 lib/Target/Sparc/SparcInstrInfo.h                  |    2 +-
 lib/Target/Sparc/SparcInstrInfo.td                 |  148 +-
 lib/Target/Sparc/SparcMachineFunctionInfo.h        |    2 +-
 lib/Target/Sparc/SparcSelectionDAGInfo.h           |    2 +-
 lib/Target/SystemZ/SystemZISelDAGToDAG.cpp         |   13 +-
 lib/Target/SystemZ/SystemZISelLowering.cpp         |   12 +-
 lib/Target/WebAssembly/CMakeLists.txt              |   24 +
 lib/Target/WebAssembly/InstPrinter/CMakeLists.txt  |    3 +
 lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt   |   23 +
 lib/Target/WebAssembly/InstPrinter/Makefile        |   16 +
 .../InstPrinter/WebAssemblyInstPrinter.cpp         |   43 +
 .../InstPrinter/WebAssemblyInstPrinter.h           |   38 +
 lib/Target/WebAssembly/LLVMBuild.txt               |   32 +
 lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt |    4 +
 lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt  |   23 +
 lib/Target/WebAssembly/MCTargetDesc/Makefile       |   16 +
 .../MCTargetDesc/WebAssemblyMCAsmInfo.cpp          |   53 +
 .../MCTargetDesc/WebAssemblyMCAsmInfo.h            |   32 +
 .../MCTargetDesc/WebAssemblyMCTargetDesc.cpp       |   56 +
 .../MCTargetDesc/WebAssemblyMCTargetDesc.h         |   53 +
 lib/Target/WebAssembly/Makefile                    |   19 +
 lib/Target/WebAssembly/README.txt                  |   15 +
 lib/Target/WebAssembly/TargetInfo/CMakeLists.txt   |    7 +
 lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt    |   23 +
 lib/Target/WebAssembly/TargetInfo/Makefile         |   15 +
 .../TargetInfo/WebAssemblyTargetInfo.cpp           |   30 +
 lib/Target/WebAssembly/WebAssembly.h               |   31 +
 lib/Target/WebAssembly/WebAssembly.td              |   62 +
 .../WebAssembly/WebAssemblyFrameLowering.cpp       |   74 +
 lib/Target/WebAssembly/WebAssemblyFrameLowering.h  |   48 +
 lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp |   73 +
 lib/Target/WebAssembly/WebAssemblyISelLowering.cpp |   63 +
 lib/Target/WebAssembly/WebAssemblyISelLowering.h   |   49 +
 lib/Target/WebAssembly/WebAssemblyInstrAtomics.td  |   46 +
 lib/Target/WebAssembly/WebAssemblyInstrFormats.td  |   28 +
 lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp    |   28 +
 lib/Target/WebAssembly/WebAssemblyInstrInfo.h      |   37 +
 lib/Target/WebAssembly/WebAssemblyInstrInfo.td     |   46 +
 lib/Target/WebAssembly/WebAssemblyInstrSIMD.td     |   15 +
 .../WebAssembly/WebAssemblyMachineFunctionInfo.cpp |   19 +
 .../WebAssembly/WebAssemblyMachineFunctionInfo.h   |   37 +
 lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp |   33 +
 lib/Target/WebAssembly/WebAssemblyRegisterInfo.h   |   35 +
 lib/Target/WebAssembly/WebAssemblyRegisterInfo.td  |   28 +
 .../WebAssembly/WebAssemblySelectionDAGInfo.cpp    |   23 +
 .../WebAssembly/WebAssemblySelectionDAGInfo.h      |   31 +
 lib/Target/WebAssembly/WebAssemblySubtarget.cpp    |   48 +
 lib/Target/WebAssembly/WebAssemblySubtarget.h      |   79 +
 .../WebAssembly/WebAssemblyTargetMachine.cpp       |  173 +
 lib/Target/WebAssembly/WebAssemblyTargetMachine.h  |   51 +
 .../WebAssembly/WebAssemblyTargetObjectFile.h      |   67 +
 .../WebAssembly/WebAssemblyTargetTransformInfo.cpp |   28 +
 .../WebAssembly/WebAssemblyTargetTransformInfo.h   |   87 +
 lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp |    2 +-
 lib/Target/X86/AsmParser/X86AsmInstrumentation.h   |    2 +-
 lib/Target/X86/AsmParser/X86Operand.h              |   16 +
 lib/Target/X86/Disassembler/X86Disassembler.cpp    |   10 +-
 .../X86/Disassembler/X86DisassemblerDecoder.cpp    |   39 +-
 lib/Target/X86/InstPrinter/X86ATTInstPrinter.h     |    2 +-
 lib/Target/X86/InstPrinter/X86IntelInstPrinter.h   |    2 +-
 lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp      |   32 +-
 lib/Target/X86/MCTargetDesc/X86BaseInfo.h          |   12 +-
 lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp |    2 +-
 .../X86/MCTargetDesc/X86ELFRelocationInfo.cpp      |   13 +-
 lib/Target/X86/MCTargetDesc/X86FixupKinds.h        |    4 +-
 lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h      |    4 +-
 .../X86/MCTargetDesc/X86MachORelocationInfo.cpp    |   16 +-
 .../X86/MCTargetDesc/X86MachObjectWriter.cpp       |    2 +-
 .../X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp    |    2 +-
 lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp |    2 +-
 lib/Target/X86/Utils/X86ShuffleDecode.cpp          |    2 +-
 lib/Target/X86/Utils/X86ShuffleDecode.h            |    2 +-
 lib/Target/X86/X86.h                               |    2 +-
 lib/Target/X86/X86AsmPrinter.cpp                   |   69 +-
 lib/Target/X86/X86AsmPrinter.h                     |    2 -
 lib/Target/X86/X86CallFrameOptimization.cpp        |  119 +-
 lib/Target/X86/X86CallingConv.h                    |    2 +-
 lib/Target/X86/X86FastISel.cpp                     |    7 +-
 lib/Target/X86/X86FixupLEAs.cpp                    |    2 +-
 lib/Target/X86/X86FloatingPoint.cpp                |    6 +-
 lib/Target/X86/X86FrameLowering.h                  |    2 +-
 lib/Target/X86/X86ISelDAGToDAG.cpp                 |   45 +-
 lib/Target/X86/X86ISelLowering.cpp                 |  708 ++-
 lib/Target/X86/X86ISelLowering.h                   |   17 +-
 lib/Target/X86/X86InstrAVX512.td                   |  794 ++--
 lib/Target/X86/X86InstrBuilder.h                   |    2 +-
 lib/Target/X86/X86InstrCompiler.td                 |   16 +-
 lib/Target/X86/X86InstrFPStack.td                  |   16 +-
 lib/Target/X86/X86InstrFragmentsSIMD.td            |   58 +-
 lib/Target/X86/X86InstrInfo.cpp                    |  352 +-
 lib/Target/X86/X86InstrInfo.h                      |    4 +-
 lib/Target/X86/X86InstrInfo.td                     |   10 +-
 lib/Target/X86/X86InstrSSE.td                      |    9 +-
 lib/Target/X86/X86IntrinsicsInfo.h                 |  355 +-
 lib/Target/X86/X86MCInstLower.cpp                  |   10 +-
 lib/Target/X86/X86MachineFunctionInfo.h            |    2 +-
 lib/Target/X86/X86PadShortFunction.cpp             |    2 +-
 lib/Target/X86/X86RegisterInfo.cpp                 |   24 +-
 lib/Target/X86/X86RegisterInfo.h                   |   13 +-
 lib/Target/X86/X86SelectionDAGInfo.h               |    2 +-
 lib/Target/X86/X86Subtarget.h                      |    2 +-
 lib/Target/X86/X86TargetMachine.cpp                |   15 +-
 lib/Target/X86/X86TargetMachine.h                  |    2 +-
 lib/Target/X86/X86TargetObjectFile.cpp             |   64 +-
 lib/Target/X86/X86TargetTransformInfo.cpp          |   15 +
 lib/Target/X86/X86TargetTransformInfo.h            |    2 +
 lib/Target/X86/X86VZeroUpper.cpp                   |    2 +-
 lib/Target/X86/X86WinEHState.cpp                   |   17 +-
 .../XCore/Disassembler/XCoreDisassembler.cpp       |    2 +-
 .../XCore/MCTargetDesc/XCoreMCTargetDesc.cpp       |    2 +-
 lib/Target/XCore/XCore.h                           |    2 +-
 lib/Target/XCore/XCoreFrameLowering.h              |    2 +-
 lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp    |    2 +-
 lib/Target/XCore/XCoreISelLowering.h               |    4 +-
 lib/Target/XCore/XCoreInstrInfo.cpp                |   25 +-
 lib/Target/XCore/XCoreInstrInfo.h                  |    2 +-
 lib/Target/XCore/XCoreLowerThreadLocal.cpp         |    2 +-
 lib/Target/XCore/XCoreMCInstLower.h                |    2 +-
 lib/Target/XCore/XCoreMachineFunctionInfo.h        |    2 +-
 lib/Target/XCore/XCoreSelectionDAGInfo.h           |    2 +-
 lib/Target/XCore/XCoreSubtarget.h                  |    2 +-
 lib/Target/XCore/XCoreTargetStreamer.h             |    2 +-
 lib/Transforms/Hello/Hello.cpp                     |    4 +-
 lib/Transforms/IPO/ArgumentPromotion.cpp           |    2 +-
 lib/Transforms/IPO/BarrierNoopPass.cpp             |    2 +-
 lib/Transforms/IPO/ConstantMerge.cpp               |    2 +-
 lib/Transforms/IPO/DeadArgumentElimination.cpp     |    4 +-
 lib/Transforms/IPO/ExtractGV.cpp                   |    2 +-
 lib/Transforms/IPO/FunctionAttrs.cpp               |    6 +-
 lib/Transforms/IPO/GlobalDCE.cpp                   |    2 +-
 lib/Transforms/IPO/GlobalOpt.cpp                   |   10 +-
 lib/Transforms/IPO/IPConstantPropagation.cpp       |    2 +-
 lib/Transforms/IPO/InlineAlways.cpp                |    2 +-
 lib/Transforms/IPO/Inliner.cpp                     |   17 +-
 lib/Transforms/IPO/LoopExtractor.cpp               |    4 +-
 lib/Transforms/IPO/LowerBitSets.cpp                |   10 +-
 lib/Transforms/IPO/MergeFunctions.cpp              |    2 +-
 lib/Transforms/IPO/PartialInlining.cpp             |    2 +-
 lib/Transforms/IPO/PruneEH.cpp                     |   65 +-
 lib/Transforms/IPO/StripSymbols.cpp                |    8 +-
 lib/Transforms/InstCombine/InstCombineAddSub.cpp   |   28 +-
 lib/Transforms/InstCombine/InstCombineCompares.cpp |    6 +-
 .../InstCombine/InstCombineMulDivRem.cpp           |    2 +-
 lib/Transforms/InstCombine/InstCombinePHI.cpp      |   37 +-
 .../InstCombine/InstructionCombining.cpp           |    4 +-
 .../Instrumentation/AddressSanitizer.cpp           |   19 +-
 lib/Transforms/Instrumentation/BoundsChecking.cpp  |    2 +-
 .../Instrumentation/DataFlowSanitizer.cpp          |    2 +-
 lib/Transforms/Instrumentation/GCOVProfiling.cpp   |    4 +-
 lib/Transforms/Instrumentation/InstrProfiling.cpp  |    2 +-
 lib/Transforms/Instrumentation/MemorySanitizer.cpp |   17 +
 lib/Transforms/Instrumentation/SafeStack.cpp       |    7 +-
 .../Instrumentation/SanitizerCoverage.cpp          |    7 +
 lib/Transforms/ObjCARC/BlotMapVector.h             |    2 +-
 lib/Transforms/ObjCARC/ObjCARCAPElim.cpp           |    2 +-
 lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp    |    5 +-
 lib/Transforms/ObjCARC/ObjCARCContract.cpp         |    2 +-
 lib/Transforms/ObjCARC/ObjCARCExpand.cpp           |    2 +-
 lib/Transforms/ObjCARC/ObjCARCOpts.cpp             |   20 +-
 lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp      |    8 +-
 lib/Transforms/Scalar/ADCE.cpp                     |    2 +-
 lib/Transforms/Scalar/AlignmentFromAssumptions.cpp |    2 +-
 lib/Transforms/Scalar/BDCE.cpp                     |    2 +-
 lib/Transforms/Scalar/ConstantHoisting.cpp         |    2 +-
 lib/Transforms/Scalar/ConstantProp.cpp             |    2 +-
 .../Scalar/CorrelatedValuePropagation.cpp          |    2 +-
 lib/Transforms/Scalar/DCE.cpp                      |    4 +-
 lib/Transforms/Scalar/DeadStoreElimination.cpp     |    2 +-
 lib/Transforms/Scalar/EarlyCSE.cpp                 |   12 +-
 lib/Transforms/Scalar/FlattenCFGPass.cpp           |    2 +-
 lib/Transforms/Scalar/Float2Int.cpp                |    2 +-
 lib/Transforms/Scalar/GVN.cpp                      |   20 +-
 lib/Transforms/Scalar/IndVarSimplify.cpp           |   15 +-
 .../Scalar/InductiveRangeCheckElimination.cpp      |    4 +-
 lib/Transforms/Scalar/JumpThreading.cpp            |    2 +-
 lib/Transforms/Scalar/LICM.cpp                     |    4 +-
 lib/Transforms/Scalar/LoadCombine.cpp              |    2 +-
 lib/Transforms/Scalar/LoopDeletion.cpp             |    2 +-
 lib/Transforms/Scalar/LoopDistribute.cpp           |   54 +-
 lib/Transforms/Scalar/LoopIdiomRecognize.cpp       |    2 +-
 lib/Transforms/Scalar/LoopInstSimplify.cpp         |    2 +-
 lib/Transforms/Scalar/LoopRerollPass.cpp           |    2 +-
 lib/Transforms/Scalar/LoopRotation.cpp             |    2 +-
 lib/Transforms/Scalar/LoopStrengthReduce.cpp       |   18 +-
 lib/Transforms/Scalar/LoopUnrollPass.cpp           |    2 +-
 lib/Transforms/Scalar/LoopUnswitch.cpp             |  194 +-
 lib/Transforms/Scalar/LowerAtomic.cpp              |    2 +-
 lib/Transforms/Scalar/LowerExpectIntrinsic.cpp     |    2 +-
 lib/Transforms/Scalar/MemCpyOptimizer.cpp          |    6 +-
 lib/Transforms/Scalar/MergedLoadStoreMotion.cpp    |    2 +-
 lib/Transforms/Scalar/NaryReassociate.cpp          |   69 +-
 lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp  |    2 +-
 lib/Transforms/Scalar/PlaceSafepoints.cpp          |    4 +-
 lib/Transforms/Scalar/Reassociate.cpp              |   14 +-
 lib/Transforms/Scalar/Reg2Mem.cpp                  |    2 +-
 lib/Transforms/Scalar/RewriteStatepointsForGC.cpp  |  183 +-
 lib/Transforms/Scalar/SCCP.cpp                     |    2 +-
 lib/Transforms/Scalar/SROA.cpp                     |   12 +-
 lib/Transforms/Scalar/SampleProfile.cpp            |    2 +-
 lib/Transforms/Scalar/ScalarReplAggregates.cpp     |    4 +-
 lib/Transforms/Scalar/SimplifyCFGPass.cpp          |   13 +-
 .../Scalar/StraightLineStrengthReduce.cpp          |    6 +-
 lib/Transforms/Scalar/TailRecursionElimination.cpp |    4 +-
 lib/Transforms/Utils/ASanStackFrameLayout.cpp      |    2 +-
 lib/Transforms/Utils/BasicBlockUtils.cpp           |    7 +-
 lib/Transforms/Utils/BreakCriticalEdges.cpp        |    2 +-
 lib/Transforms/Utils/BypassSlowDivision.cpp        |    4 +-
 lib/Transforms/Utils/CloneFunction.cpp             |    2 +-
 lib/Transforms/Utils/CloneModule.cpp               |    4 +
 lib/Transforms/Utils/CtorUtils.cpp                 |    2 +-
 lib/Transforms/Utils/FlattenCFG.cpp                |    2 +-
 lib/Transforms/Utils/InlineFunction.cpp            |    2 +-
 lib/Transforms/Utils/InstructionNamer.cpp          |    2 +-
 lib/Transforms/Utils/LCSSA.cpp                     |    2 +-
 lib/Transforms/Utils/LoopSimplify.cpp              |    5 +-
 lib/Transforms/Utils/LoopUnrollRuntime.cpp         |    2 +-
 lib/Transforms/Utils/LowerSwitch.cpp               |    2 +-
 lib/Transforms/Utils/MetaRenamer.cpp               |    2 +-
 lib/Transforms/Utils/SSAUpdater.cpp                |    2 +-
 lib/Transforms/Utils/SimplifyCFG.cpp               |  201 +-
 lib/Transforms/Utils/SimplifyIndVar.cpp            |    2 +-
 lib/Transforms/Utils/SimplifyInstructions.cpp      |    2 +-
 lib/Transforms/Utils/SymbolRewriter.cpp            |    2 +-
 lib/Transforms/Vectorize/BBVectorize.cpp           |    2 +-
 lib/Transforms/Vectorize/LoopVectorize.cpp         |   61 +-
 lib/Transforms/Vectorize/SLPVectorizer.cpp         |    2 +-
 test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll |   26 +
 .../LoopAccessAnalysis/non-wrapping-pointer.ll     |   41 +
 test/Assembler/dimodule.ll                         |   15 +
 test/Bindings/llvm-c/ARM/disassemble.test          |   21 +
 test/Bindings/llvm-c/ARM/lit.local.cfg             |    2 +
 test/Bindings/llvm-c/X86/disassemble.test          |   23 +
 test/Bindings/llvm-c/X86/lit.local.cfg             |    2 +
 test/Bindings/llvm-c/disassemble.test              |   43 -
 test/Bindings/llvm-c/lit.local.cfg                 |    4 -
 test/Bitcode/Inputs/PR23310.bc                     |  Bin 0 -> 181848 bytes
 test/Bitcode/PR23310.test                          |    1 +
 test/CodeGen/AArch64/aarch-multipart.ll            |   18 +
 .../AArch64/aarch64-interleaved-accesses.ll        |  197 +
 test/CodeGen/AMDGPU/commute-shifts.ll              |   33 +
 test/CodeGen/AMDGPU/elf.ll                         |   12 +-
 test/CodeGen/AMDGPU/hsa.ll                         |   29 +-
 test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll        |    4 +-
 test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll  |   37 +
 test/CodeGen/ARM/arm-interleaved-accesses.ll       |  204 +
 test/CodeGen/ARM/build-attributes.ll               |   19 +-
 test/CodeGen/ARM/fnattr-trap.ll                    |   40 +
 test/CodeGen/ARM/ldrd.ll                           |   29 +-
 test/CodeGen/ARM/load-store-flags.ll               |   43 +
 test/CodeGen/ARM/wrong-t2stmia-size-opt.ll         |    2 +-
 test/CodeGen/Generic/vector-casts.ll               |   56 +-
 test/CodeGen/MIR/X86/expected-machine-operand.mir  |   21 +
 test/CodeGen/MIR/X86/expected-number-after-bb.mir  |   37 +
 test/CodeGen/MIR/X86/global-value-operands.mir     |   49 +
 test/CodeGen/MIR/X86/immediate-operands.mir        |   40 +
 test/CodeGen/MIR/X86/large-index-number-error.mir  |   35 +
 test/CodeGen/MIR/X86/lit.local.cfg                 |    2 +
 .../MIR/X86/machine-basic-block-operands.mir       |   75 +
 test/CodeGen/MIR/X86/machine-instructions.mir      |   25 +
 test/CodeGen/MIR/X86/missing-comma.mir             |   21 +
 test/CodeGen/MIR/X86/missing-instruction.mir       |   19 +
 test/CodeGen/MIR/X86/named-registers.mir           |   23 +
 test/CodeGen/MIR/X86/null-register-operands.mir    |   24 +
 test/CodeGen/MIR/X86/register-mask-operands.mir    |   43 +
 test/CodeGen/MIR/X86/undefined-global-value.mir    |   28 +
 .../MIR/X86/undefined-named-global-value.mir       |   28 +
 test/CodeGen/MIR/X86/unknown-instruction.mir       |   21 +
 .../MIR/X86/unknown-machine-basic-block.mir        |   38 +
 .../MIR/X86/unknown-named-machine-basic-block.mir  |   39 +
 test/CodeGen/MIR/X86/unknown-register.mir          |   22 +
 test/CodeGen/MIR/X86/unrecognized-character.mir    |   19 +
 test/CodeGen/MIR/basic-blocks.mir                  |   18 +-
 .../MIR/expected-eof-after-successor-mbb.mir       |   29 +
 .../expected-mbb-reference-for-successor-mbb.mir   |   29 +
 .../MIR/machine-basic-block-redefinition-error.mir |   17 +
 .../MIR/machine-basic-block-unknown-name.mir       |    3 +-
 test/CodeGen/MIR/machine-function.mir              |    8 +-
 test/CodeGen/MIR/register-info.mir                 |   36 +
 test/CodeGen/MIR/successor-basic-blocks.mir        |   58 +
 test/CodeGen/NVPTX/call-with-alloca-buffer.ll      |    3 +-
 test/CodeGen/NVPTX/extloadv.ll                     |   15 +
 test/CodeGen/NVPTX/globals_lowering.ll             |   15 +
 test/CodeGen/NVPTX/intrinsics.ll                   |    2 +
 test/CodeGen/NVPTX/local-stack-frame.ll            |   72 +-
 test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll         |   13 +-
 test/CodeGen/PowerPC/builtins-ppc-elf2-abi.ll      |  165 +
 test/CodeGen/PowerPC/lxvw4x-bug.ll                 |   25 +
 test/CodeGen/PowerPC/swaps-le-3.ll                 |   24 +
 test/CodeGen/PowerPC/swaps-le-4.ll                 |   27 +
 test/CodeGen/PowerPC/vec_mergeow.ll                |  101 +
 test/CodeGen/PowerPC/vsx.ll                        |  294 +-
 test/CodeGen/Thumb2/float-ops.ll                   |    2 +-
 test/CodeGen/WinEH/cppeh-prepared-catch.ll         |    4 +-
 test/CodeGen/X86/StackColoring.ll                  |   33 +-
 test/CodeGen/X86/asm-mismatched-types.ll           |  135 +
 test/CodeGen/X86/asm-reject-reg-type-mismatch.ll   |    6 +-
 test/CodeGen/X86/avx512-build-vector.ll            |   10 -
 test/CodeGen/X86/avx512-fma-intrinsics.ll          |  579 ++-
 test/CodeGen/X86/avx512-fma.ll                     |  122 +-
 test/CodeGen/X86/avx512-gather-scatter-intrin.ll   |  748 ++-
 test/CodeGen/X86/avx512-intrinsics.ll              |  177 +-
 test/CodeGen/X86/avx512-shuffle.ll                 |  392 --
 test/CodeGen/X86/avx512bw-intrinsics.ll            |   79 +
 test/CodeGen/X86/avx512bwvl-intrinsics.ll          | 1103 ++++-
 test/CodeGen/X86/avx512vl-intrinsics.ll            |  209 +
 test/CodeGen/X86/coff-weak.ll                      |    9 +
 test/CodeGen/X86/commute-two-addr.ll               |    2 +-
 test/CodeGen/X86/dllexport-x86_64.ll               |   60 +-
 test/CodeGen/X86/dllexport.ll                      |   71 +-
 test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll  |  204 +
 test/CodeGen/X86/fma-intrinsics-x86.ll             |  493 ++
 test/CodeGen/X86/fma-intrinsics-x86_64.ll          |  278 --
 test/CodeGen/X86/fma-phi-213-to-231.ll             |  283 +-
 test/CodeGen/X86/fma.ll                            |   83 +-
 test/CodeGen/X86/fma3-intrinsics.ll                |  150 -
 test/CodeGen/X86/fold-load-binops.ll               |  142 +
 test/CodeGen/X86/fold-vector-sext-crash2.ll        |   92 +
 test/CodeGen/X86/fold-vector-shl-crash.ll          |    9 +
 test/CodeGen/X86/fp-fast.ll                        |   78 -
 test/CodeGen/X86/implicit-null-check-negative.ll   |   11 +-
 test/CodeGen/X86/implicit-null-check.ll            |   28 +-
 test/CodeGen/X86/machine-combiner.ll               |   99 +
 test/CodeGen/X86/movtopush.ll                      |   93 +-
 test/CodeGen/X86/or-branch.ll                      |   31 +-
 test/CodeGen/X86/pr23900.ll                        |   29 +
 test/CodeGen/X86/recip-fastmath.ll                 |   38 +-
 test/CodeGen/X86/rrlist-livereg-corrutpion.ll      |   26 +
 test/CodeGen/X86/sdiv-exact.ll                     |   13 +-
 test/CodeGen/X86/seh-catch-all-win32.ll            |   29 +-
 test/CodeGen/X86/seh-filter-no-personality.ll      |   33 +
 test/CodeGen/X86/seh-safe-div-win32.ll             |   46 +-
 test/CodeGen/X86/shift-combine.ll                  |   59 +
 test/CodeGen/X86/sqrt-fastmath.ll                  |   70 +-
 test/CodeGen/X86/stack-folding-fp-sse42.ll         |    8 +-
 test/CodeGen/X86/stack-folding-int-avx2.ll         |   16 +-
 test/CodeGen/X86/statepoint-stackmap-format.ll     |    5 +-
 test/CodeGen/X86/system-intrinsics-64.ll           |   33 +
 test/CodeGen/X86/system-intrinsics.ll              |   17 +
 test/CodeGen/X86/twoaddr-lea.ll                    |    3 +-
 test/CodeGen/X86/vec_int_to_fp.ll                  |   50 +-
 test/CodeGen/X86/vec_shift8.ll                     |  527 ---
 test/CodeGen/X86/vector-sext.ll                    |   40 +
 test/CodeGen/X86/vector-shift-ashr-128.ll          | 1041 +++++
 test/CodeGen/X86/vector-shift-ashr-256.ll          |  767 ++++
 test/CodeGen/X86/vector-shift-lshr-128.ll          |  778 ++++
 test/CodeGen/X86/vector-shift-lshr-256.ll          |  548 +++
 test/CodeGen/X86/vector-shift-shl-128.ll           |  639 +++
 test/CodeGen/X86/vector-shift-shl-256.ll           |  459 ++
 test/CodeGen/X86/vector-shuffle-128-v16.ll         |   42 +-
 test/CodeGen/X86/vector-shuffle-128-v8.ll          |   24 +-
 test/CodeGen/X86/vector-shuffle-256-v4.ll          |   95 +-
 test/CodeGen/X86/vector-shuffle-256-v8.ll          |   13 +-
 test/CodeGen/X86/vector-shuffle-512-v8.ll          |  713 ++-
 test/CodeGen/X86/widen_conv-3.ll                   |    2 +-
 test/CodeGen/X86/win64_params.ll                   |    9 +-
 test/CodeGen/X86/win_cst_pool.ll                   |   13 +
 test/CodeGen/X86/win_ftol2.ll                      |   22 +
 test/CodeGen/X86/xor.ll                            |   21 +-
 test/DebugInfo/AArch64/bitfields.ll                |   73 +
 test/DebugInfo/ARM/bitfield.ll                     |   45 +
 test/DebugInfo/X86/DIModule.ll                     |   25 +
 test/DebugInfo/X86/asm-macro-line-number.s         |   14 +
 test/DebugInfo/X86/bitfields.ll                    |   73 +
 test/DebugInfo/X86/debug-info-packed-struct.ll     |  198 +
 test/DebugInfo/X86/debug-loc-empty-entries.ll      |   66 +
 test/DebugInfo/X86/dwarf-public-names.ll           |    4 +-
 test/DebugInfo/dwarfdump-invalid.test              |    6 +-
 .../X86/asm_more_registers_than_available.ll       |   56 +
 .../do-not-instrument-llvm-metadata-darwin.ll      |    5 +-
 .../do-not-instrument-llvm-metadata.ll             |    5 +-
 test/Instrumentation/SanitizerCoverage/coverage.ll |    9 +
 test/Instrumentation/ThreadSanitizer/atomic.ll     | 1995 ++++----
 test/Linker/comdat10.ll                            |    6 +
 test/MC/AArch64/alias-addsubimm.s                  |   94 +
 test/MC/AArch64/basic-a64-diagnostics.s            |   10 +-
 test/MC/AMDGPU/hsa.s                               |  233 +
 test/MC/AMDGPU/hsa_code_object_isa_noargs.s        |   16 +
 test/MC/ARM/directive-fpu-multiple.s               |    5 +
 test/MC/ARM/directive-type-diagnostics.s           |   10 +
 test/MC/ARM/thumb_set-diagnostics.s                |   28 +
 test/MC/ARM/thumb_set.s                            |    2 -
 test/MC/COFF/ARM/directive-type-diagnostics.s      |   10 +
 test/MC/COFF/ARM/lit.local.cfg                     |    3 +
 test/MC/Disassembler/Mips/micromips.txt            |    4 +
 test/MC/Disassembler/Mips/micromips32r6.txt        |   16 +
 test/MC/Disassembler/Mips/micromips_le.txt         |    4 +
 test/MC/Disassembler/Mips/mips1/valid-mips1.txt    |  180 +-
 test/MC/Disassembler/Mips/mips2.txt                |   13 -
 test/MC/Disassembler/Mips/mips2/valid-mips2.txt    |  268 +-
 test/MC/Disassembler/Mips/mips3/valid-mips3.txt    |  356 +-
 test/MC/Disassembler/Mips/mips32.txt               |  451 --
 .../Disassembler/Mips/mips32/valid-mips32-el.txt   |    2 +
 test/MC/Disassembler/Mips/mips32/valid-mips32.txt  |  249 +-
 test/MC/Disassembler/Mips/mips32_le.txt            |    6 +
 test/MC/Disassembler/Mips/mips32r2.txt             |  453 --
 .../Mips/mips32r2/valid-mips32r2-el.txt            |  174 +
 .../Mips/mips32r2/valid-mips32r2-le.txt            |  172 -
 .../Disassembler/Mips/mips32r2/valid-mips32r2.txt  |  284 +-
 test/MC/Disassembler/Mips/mips32r2_le.txt          |    6 +
 .../Mips/mips32r3/valid-mips32r3-el.txt            |  171 +
 .../Mips/mips32r3/valid-mips32r3-le.txt            |  169 -
 .../Disassembler/Mips/mips32r3/valid-mips32r3.txt  |  282 +-
 .../Mips/mips32r5/valid-mips32r5-el.txt            |  171 +
 .../Mips/mips32r5/valid-mips32r5-le.txt            |  169 -
 .../Disassembler/Mips/mips32r5/valid-mips32r5.txt  |  282 +-
 test/MC/Disassembler/Mips/mips32r6.txt             |  127 -
 .../Mips/mips32r6/valid-mips32r6-el.txt            |    2 +
 .../Disassembler/Mips/mips32r6/valid-mips32r6.txt  |  261 +-
 test/MC/Disassembler/Mips/mips4/valid-mips4.txt    |  392 +-
 test/MC/Disassembler/Mips/mips64.txt               |   93 -
 .../Disassembler/Mips/mips64/valid-mips64-el.txt   |    4 +
 test/MC/Disassembler/Mips/mips64/valid-mips64.txt  |  398 +-
 test/MC/Disassembler/Mips/mips64r2.txt             |   90 -
 .../Mips/mips64r2/valid-mips64r2-el.txt            |    4 +
 .../Disassembler/Mips/mips64r2/valid-mips64r2.txt  |  428 +-
 .../Mips/mips64r3/valid-mips64r3-el.txt            |    4 +
 .../Disassembler/Mips/mips64r3/valid-mips64r3.txt  |  398 +-
 .../Mips/mips64r5/valid-mips64r5-el.txt            |    4 +
 .../Disassembler/Mips/mips64r5/valid-mips64r5.txt  |  398 +-
 test/MC/Disassembler/Mips/mips64r6.txt             |  145 -
 .../Mips/mips64r6/valid-mips64r6-el.txt            |    4 +
 .../Disassembler/Mips/mips64r6/valid-mips64r6.txt  |  314 +-
 .../MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt |    6 +
 test/MC/Disassembler/X86/x86-16.txt                |    2 +
 test/MC/Disassembler/X86/x86-32.txt                |    3 +
 test/MC/Disassembler/X86/x86-64.txt                |    6 +
 test/MC/ELF/discriminator.s                        |    6 +-
 test/MC/ELF/many-sections-3.s                      |  107 +
 test/MC/ELF/relax-arith.s                          |  122 +-
 test/MC/ELF/relax-arith2.s                         |  118 +
 test/MC/ELF/relax-arith3.s                         |   76 +
 test/MC/ELF/symver-pr23914.s                       |   16 +
 test/MC/ELF/undef-temp.s                           |    4 +
 test/MC/ELF/undef.s                                |   10 -
 test/MC/ELF/undef2.s                               |   18 -
 test/MC/MachO/ARM/directive-type-diagnostics.s     |   10 +
 test/MC/MachO/cstexpr-gotpcrel-64.ll               |    9 +
 test/MC/Mips/branch-pseudos.s                      |   74 +-
 test/MC/Mips/expr1.s                               |   15 +
 test/MC/Mips/micromips32r6/invalid.s               |    6 +
 test/MC/Mips/micromips32r6/valid.s                 |    9 +
 test/MC/Mips/mips-cop0-reginfo.s                   |   28 +
 test/MC/Mips/mips-expansions-bad.s                 |   27 +-
 test/MC/Mips/mips-expansions.s                     |  558 ++-
 test/MC/Mips/mips-fpu-instructions.s               |    2 +-
 test/MC/Mips/mips32/valid.s                        |    4 +-
 test/MC/Mips/mips32r2/valid.s                      |    4 +-
 test/MC/Mips/mips32r3/valid.s                      |    4 +-
 test/MC/Mips/mips32r5/valid.s                      |    4 +-
 test/MC/Mips/mips32r6/relocations.s                |   12 +-
 test/MC/Mips/mips32r6/valid.s                      |    2 +
 test/MC/Mips/mips64-expansions.s                   |  180 +
 test/MC/Mips/mips64/valid.s                        |    6 +-
 test/MC/Mips/mips64r2/valid.s                      |    6 +-
 test/MC/Mips/mips64r3/valid.s                      |    6 +-
 test/MC/Mips/mips64r5/valid.s                      |    6 +-
 test/MC/Mips/mips64r6/relocations.s                |   40 +-
 test/MC/Mips/mips64r6/valid.s                      |    4 +
 test/MC/Mips/mips_abi_flags_xx.s                   |   27 +-
 test/MC/Mips/mips_abi_flags_xx_set.s               |   21 +-
 test/MC/Mips/module-hardfloat.s                    |   26 +
 test/MC/Mips/module-softfloat.s                    |   20 +
 test/MC/Mips/relocation.s                          |    4 +-
 test/MC/Mips/set-nomacro.s                         |   19 +
 test/MC/Mips/set-oddspreg-nooddspreg-error.s       |   10 +
 test/MC/Mips/set-oddspreg-nooddspreg.s             |   10 +
 test/MC/Mips/update-module-level-options.s         |   14 +
 test/MC/PowerPC/ppc64-encoding-vmx.s               |   10 +-
 .../X86/AlignedBundling/misaligned-bundle-group.s  |   23 +
 test/MC/X86/AlignedBundling/misaligned-bundle.s    |   31 +
 test/MC/X86/AlignedBundling/rodata-section.s       |   30 +
 test/MC/X86/avx512-encodings.s                     | 4778 +++++++++++++++++++-
 test/MC/X86/avx512vl-encoding.s                    |  880 ++++
 test/MC/X86/faultmap-section-parsing.s             |   29 +
 test/MC/X86/inline-asm-obj.ll                      |   13 +
 test/MC/X86/x86-64-avx512bw.s                      |  107 +
 test/MC/X86/x86-64-avx512bw_vl.s                   |   73 +
 test/MC/X86/x86-64-avx512f_vl.s                    | 4256 +++++++++++++++++
 test/Object/ARM/nm-mapping-symbol.s                |   11 +
 test/Object/Inputs/invalid-section-index.elf       |  Bin 0 -> 536 bytes
 test/Object/Inputs/invalid-section-size.elf        |  Bin 0 -> 584 bytes
 test/Object/Inputs/invalid-sh_entsize.elf          |  Bin 0 -> 1736 bytes
 test/Object/Inputs/invalid-strtab-non-null.elf     |  Bin 0 -> 536 bytes
 test/Object/Inputs/invalid-strtab-size.elf         |  Bin 0 -> 536 bytes
 test/Object/Inputs/invalid-strtab-type.elf         |  Bin 0 -> 536 bytes
 test/Object/Inputs/stackmap-test.macho-x86-64      |  Bin 0 -> 568 bytes
 test/Object/X86/nm-print-size.s                    |   12 +
 test/Object/dllimport-globalref.ll                 |   14 +
 test/Object/invalid.test                           |   44 +
 test/Object/lit.local.cfg                          |    2 +-
 test/Object/objdump-symbol-table.test              |    6 +-
 test/Object/relocation-executable.test             |   11 +
 test/Object/stackmap-dump.test                     |   16 +
 test/Transforms/GVN/br-identical.ll                |   38 +
 test/Transforms/GVN/pr12979.ll                     |   14 +
 test/Transforms/Inline/X86/inline-target-attr.ll   |   35 +
 test/Transforms/Inline/X86/lit.local.cfg           |    3 +
 test/Transforms/Inline/nonnull.ll                  |   45 +
 test/Transforms/InstCombine/pr23809.ll             |   22 +
 test/Transforms/InstCombine/select.ll              |   17 +
 test/Transforms/InstCombine/sub.ll                 |   22 +
 .../LoopDistribute/basic-with-memchecks.ll         |   12 +-
 test/Transforms/LoopDistribute/outside-use.ll      |    2 +-
 test/Transforms/LoopIdiom/basic.ll                 |    2 +-
 test/Transforms/LoopReroll/nonconst_lb.ll          |    4 +-
 test/Transforms/LoopSimplify/single-backedge.ll    |   38 +-
 .../LoopStrengthReduce/2011-10-06-ReusePhi.ll      |    4 +-
 .../LoopStrengthReduce/X86/ivchain-stress-X86.ll   |    2 +-
 .../LoopStrengthReduce/post-inc-icmpzero.ll        |    2 +-
 test/Transforms/LoopStrengthReduce/shl.ll          |   38 +
 test/Transforms/LoopUnroll/X86/mmx.ll              |   24 +
 .../Transforms/LoopUnswitch/2015-06-17-Metadata.ll |   77 +
 .../LoopVectorize/X86/ptr-indvar-crash.ll          |   20 +
 test/Transforms/LoopVectorize/optsize.ll           |   34 +
 test/Transforms/LoopVectorize/runtime-check.ll     |   45 +-
 test/Transforms/LowerBitSets/nonglobal.ll          |   19 +
 test/Transforms/NaryReassociate/NVPTX/nary-gep.ll  |   36 +
 test/Transforms/PruneEH/pr23971.ll                 |   21 +
 test/Transforms/Reassociate/basictest.ll           |    8 +-
 test/Transforms/Reassociate/wrap-flags.ll          |   11 +
 .../RewriteStatepointsForGC/live-vector.ll         |   44 +-
 test/Transforms/SCCP/crash.ll                      |    5 +
 test/Transforms/SafeStack/no-attr.ll               |    2 +
 .../StraightLineStrengthReduce/AMDGPU/pr23975.ll   |   20 +
 test/tools/gold/slp-vectorize.ll                   |    4 +-
 test/tools/llvm-cxxdump/X86/lit.local.cfg          |    2 +
 test/tools/llvm-cxxdump/X86/sym-size.s             |   47 +
 .../tools/llvm-objdump/X86/macho-symbol-table.test |   10 +-
 test/tools/llvm-symbolizer/Inputs/fat.c            |   15 +
 test/tools/llvm-symbolizer/Inputs/fat.o            |  Bin 0 -> 49444 bytes
 test/tools/llvm-symbolizer/fat.test                |   11 +
 tools/CMakeLists.txt                               |   13 +-
 tools/dsymutil/DebugMap.cpp                        |    8 +-
 tools/dsymutil/DwarfLinker.cpp                     |  115 +-
 tools/dsymutil/MachODebugMapParser.cpp             |   31 +-
 tools/lli/OrcLazyJIT.h                             |    4 +-
 tools/llvm-ar/llvm-ar.cpp                          |    2 +-
 tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp          |  212 +-
 tools/llvm-cov/CodeCoverage.cpp                    |   15 +-
 tools/llvm-cxxdump/llvm-cxxdump.cpp                |   34 +-
 tools/llvm-dwarfdump/llvm-dwarfdump.cpp            |   21 +-
 tools/llvm-nm/llvm-nm.cpp                          |  137 +-
 tools/llvm-objdump/CMakeLists.txt                  |    1 +
 tools/llvm-objdump/COFFDump.cpp                    |    8 +-
 tools/llvm-objdump/ELFDump.cpp                     |    6 +-
 tools/llvm-objdump/MachODump.cpp                   |  169 +-
 tools/llvm-objdump/llvm-objdump.cpp                |  231 +-
 tools/llvm-readobj/ARMEHABIPrinter.h               |   44 +-
 tools/llvm-readobj/ARMWinEHPrinter.cpp             |   40 +-
 tools/llvm-readobj/CMakeLists.txt                  |    1 -
 tools/llvm-readobj/COFFDumper.cpp                  |   59 +-
 tools/llvm-readobj/ELFDumper.cpp                   |  224 +-
 tools/llvm-readobj/MachODumper.cpp                 |   46 +-
 tools/llvm-readobj/ObjDumper.h                     |    3 +
 tools/llvm-readobj/StackMapPrinter.h               |   80 +
 tools/llvm-readobj/Win64EHDumper.cpp               |   20 +-
 tools/llvm-readobj/llvm-readobj.cpp                |   26 +-
 tools/llvm-rtdyld/llvm-rtdyld.cpp                  |   72 +-
 tools/llvm-stress/llvm-stress.cpp                  |   89 +-
 tools/llvm-symbolizer/LLVMSymbolize.cpp            |   45 +-
 tools/llvm-symbolizer/LLVMSymbolize.h              |    2 +-
 tools/lto/lto.cpp                                  |   16 +-
 tools/lto/lto.exports                              |    6 +-
 tools/obj2yaml/coff2yaml.cpp                       |    5 +-
 tools/obj2yaml/elf2yaml.cpp                        |   80 +-
 unittests/ADT/APIntTest.cpp                        |  166 +
 unittests/ADT/APSIntTest.cpp                       |   17 +
 unittests/ADT/DenseMapTest.cpp                     |   25 +
 unittests/ADT/TripleTest.cpp                       |   30 +
 unittests/AsmParser/AsmParserTest.cpp              |   20 +
 unittests/CodeGen/DIEHashTest.cpp                  |  481 +-
 unittests/ExecutionEngine/Orc/CMakeLists.txt       |    1 +
 .../Orc/ObjectTransformLayerTest.cpp               |  302 ++
 unittests/IR/IRBuilderTest.cpp                     |   35 +
 unittests/IR/MetadataTest.cpp                      |   47 +
 unittests/IR/ValueTest.cpp                         |   69 +
 unittests/Option/OptionParsingTest.cpp             |  137 +-
 unittests/ProfileData/InstrProfTest.cpp            |    6 +
 unittests/Transforms/Utils/Cloning.cpp             |   35 +
 utils/TableGen/CodeGenDAGPatterns.cpp              |    3 +-
 utils/TableGen/RegisterInfoEmitter.cpp             |   22 +
 utils/TableGen/X86RecognizableInstr.cpp            |    6 +
 utils/lit/lit/TestingConfig.py                     |    2 +-
 utils/release/test-release.sh                      |    9 +-
 1641 files changed, 48467 insertions(+), 17099 deletions(-)
 create mode 100644 include/llvm/Analysis/VectorUtils.h
 create mode 100644 include/llvm/AsmParser/SlotMapping.h
 create mode 100644 include/llvm/ExecutionEngine/Orc/NullResolver.h
 create mode 100644 include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
 create mode 100644 include/llvm/IR/ModuleSlotTracker.h
 create mode 100644 include/llvm/IR/Value.def
 create mode 100644 include/llvm/MC/MCParser/MCAsmParserUtils.h
 create mode 100644 include/llvm/Object/StackMapParser.h
 create mode 100644 include/llvm/Object/SymbolSize.h
 delete mode 100644 include/llvm/Transforms/Utils/VectorUtils.h
 create mode 100644 lib/Analysis/VectorUtils.cpp
 create mode 100644 lib/CodeGen/AsmPrinter/DebugLocStream.cpp
 create mode 100644 lib/CodeGen/InterleavedAccessPass.cpp
 create mode 100644 lib/CodeGen/MIRParser/MILexer.cpp
 create mode 100644 lib/CodeGen/MIRParser/MILexer.h
 create mode 100644 lib/CodeGen/MIRParser/MIParser.cpp
 create mode 100644 lib/CodeGen/MIRParser/MIParser.h
 create mode 100644 lib/ExecutionEngine/Orc/NullResolver.cpp
 create mode 100644 lib/Object/SymbolSize.cpp
 create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
 create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
 create mode 100644 lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
 create mode 100644 lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
 create mode 100644 lib/Target/AMDGPU/Utils/CMakeLists.txt
 create mode 100644 lib/Target/AMDGPU/Utils/LLVMBuild.txt
 create mode 100644 lib/Target/AMDGPU/Utils/Makefile
 create mode 100644 lib/Target/NVPTX/NVPTXPeephole.cpp
 create mode 100644 lib/Target/WebAssembly/CMakeLists.txt
 create mode 100644 lib/Target/WebAssembly/InstPrinter/CMakeLists.txt
 create mode 100644 lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt
 create mode 100644 lib/Target/WebAssembly/InstPrinter/Makefile
 create mode 100644 lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
 create mode 100644 lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
 create mode 100644 lib/Target/WebAssembly/LLVMBuild.txt
 create mode 100644 lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
 create mode 100644 lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt
 create mode 100644 lib/Target/WebAssembly/MCTargetDesc/Makefile
 create mode 100644 lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
 create mode 100644 lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
 create mode 100644 lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
 create mode 100644 lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
 create mode 100644 lib/Target/WebAssembly/Makefile
 create mode 100644 lib/Target/WebAssembly/README.txt
 create mode 100644 lib/Target/WebAssembly/TargetInfo/CMakeLists.txt
 create mode 100644 lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt
 create mode 100644 lib/Target/WebAssembly/TargetInfo/Makefile
 create mode 100644 lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssembly.h
 create mode 100644 lib/Target/WebAssembly/WebAssembly.td
 create mode 100644 lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyFrameLowering.h
 create mode 100644 lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyISelLowering.h
 create mode 100644 lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
 create mode 100644 lib/Target/WebAssembly/WebAssemblyInstrFormats.td
 create mode 100644 lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyInstrInfo.h
 create mode 100644 lib/Target/WebAssembly/WebAssemblyInstrInfo.td
 create mode 100644 lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
 create mode 100644 lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
 create mode 100644 lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
 create mode 100644 lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
 create mode 100644 lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
 create mode 100644 lib/Target/WebAssembly/WebAssemblySubtarget.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblySubtarget.h
 create mode 100644 lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyTargetMachine.h
 create mode 100644 lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
 create mode 100644 lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
 create mode 100644 test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll
 create mode 100644 test/Analysis/LoopAccessAnalysis/non-wrapping-pointer.ll
 create mode 100644 test/Assembler/dimodule.ll
 create mode 100644 test/Bindings/llvm-c/ARM/disassemble.test
 create mode 100644 test/Bindings/llvm-c/ARM/lit.local.cfg
 create mode 100644 test/Bindings/llvm-c/X86/disassemble.test
 create mode 100644 test/Bindings/llvm-c/X86/lit.local.cfg
 delete mode 100644 test/Bindings/llvm-c/disassemble.test
 delete mode 100644 test/Bindings/llvm-c/lit.local.cfg
 create mode 100644 test/Bitcode/Inputs/PR23310.bc
 create mode 100644 test/Bitcode/PR23310.test
 create mode 100644 test/CodeGen/AArch64/aarch-multipart.ll
 create mode 100644 test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
 create mode 100644 test/CodeGen/AMDGPU/commute-shifts.ll
 create mode 100644 test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
 create mode 100644 test/CodeGen/ARM/arm-interleaved-accesses.ll
 create mode 100644 test/CodeGen/ARM/fnattr-trap.ll
 create mode 100644 test/CodeGen/ARM/load-store-flags.ll
 create mode 100644 test/CodeGen/MIR/X86/expected-machine-operand.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-number-after-bb.mir
 create mode 100644 test/CodeGen/MIR/X86/global-value-operands.mir
 create mode 100644 test/CodeGen/MIR/X86/immediate-operands.mir
 create mode 100644 test/CodeGen/MIR/X86/large-index-number-error.mir
 create mode 100644 test/CodeGen/MIR/X86/lit.local.cfg
 create mode 100644 test/CodeGen/MIR/X86/machine-basic-block-operands.mir
 create mode 100644 test/CodeGen/MIR/X86/machine-instructions.mir
 create mode 100644 test/CodeGen/MIR/X86/missing-comma.mir
 create mode 100644 test/CodeGen/MIR/X86/missing-instruction.mir
 create mode 100644 test/CodeGen/MIR/X86/named-registers.mir
 create mode 100644 test/CodeGen/MIR/X86/null-register-operands.mir
 create mode 100644 test/CodeGen/MIR/X86/register-mask-operands.mir
 create mode 100644 test/CodeGen/MIR/X86/undefined-global-value.mir
 create mode 100644 test/CodeGen/MIR/X86/undefined-named-global-value.mir
 create mode 100644 test/CodeGen/MIR/X86/unknown-instruction.mir
 create mode 100644 test/CodeGen/MIR/X86/unknown-machine-basic-block.mir
 create mode 100644 test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir
 create mode 100644 test/CodeGen/MIR/X86/unknown-register.mir
 create mode 100644 test/CodeGen/MIR/X86/unrecognized-character.mir
 create mode 100644 test/CodeGen/MIR/expected-eof-after-successor-mbb.mir
 create mode 100644 test/CodeGen/MIR/expected-mbb-reference-for-successor-mbb.mir
 create mode 100644 test/CodeGen/MIR/machine-basic-block-redefinition-error.mir
 create mode 100644 test/CodeGen/MIR/register-info.mir
 create mode 100644 test/CodeGen/MIR/successor-basic-blocks.mir
 create mode 100644 test/CodeGen/NVPTX/extloadv.ll
 create mode 100644 test/CodeGen/NVPTX/globals_lowering.ll
 create mode 100644 test/CodeGen/PowerPC/builtins-ppc-elf2-abi.ll
 create mode 100644 test/CodeGen/PowerPC/lxvw4x-bug.ll
 create mode 100644 test/CodeGen/PowerPC/swaps-le-3.ll
 create mode 100644 test/CodeGen/PowerPC/swaps-le-4.ll
 create mode 100644 test/CodeGen/PowerPC/vec_mergeow.ll
 create mode 100644 test/CodeGen/X86/asm-mismatched-types.ll
 delete mode 100644 test/CodeGen/X86/avx512-shuffle.ll
 create mode 100644 test/CodeGen/X86/coff-weak.ll
 create mode 100644 test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll
 create mode 100644 test/CodeGen/X86/fma-intrinsics-x86.ll
 delete mode 100644 test/CodeGen/X86/fma-intrinsics-x86_64.ll
 delete mode 100755 test/CodeGen/X86/fma3-intrinsics.ll
 create mode 100644 test/CodeGen/X86/fold-load-binops.ll
 create mode 100644 test/CodeGen/X86/fold-vector-sext-crash2.ll
 create mode 100644 test/CodeGen/X86/fold-vector-shl-crash.ll
 create mode 100644 test/CodeGen/X86/machine-combiner.ll
 create mode 100644 test/CodeGen/X86/pr23900.ll
 create mode 100644 test/CodeGen/X86/rrlist-livereg-corrutpion.ll
 create mode 100644 test/CodeGen/X86/seh-filter-no-personality.ll
 create mode 100644 test/CodeGen/X86/system-intrinsics-64.ll
 create mode 100644 test/CodeGen/X86/system-intrinsics.ll
 delete mode 100644 test/CodeGen/X86/vec_shift8.ll
 create mode 100644 test/CodeGen/X86/vector-shift-ashr-128.ll
 create mode 100644 test/CodeGen/X86/vector-shift-ashr-256.ll
 create mode 100644 test/CodeGen/X86/vector-shift-lshr-128.ll
 create mode 100644 test/CodeGen/X86/vector-shift-lshr-256.ll
 create mode 100644 test/CodeGen/X86/vector-shift-shl-128.ll
 create mode 100644 test/CodeGen/X86/vector-shift-shl-256.ll
 create mode 100644 test/DebugInfo/AArch64/bitfields.ll
 create mode 100644 test/DebugInfo/ARM/bitfield.ll
 create mode 100644 test/DebugInfo/X86/DIModule.ll
 create mode 100644 test/DebugInfo/X86/bitfields.ll
 create mode 100644 test/DebugInfo/X86/debug-info-packed-struct.ll
 create mode 100644 test/DebugInfo/X86/debug-loc-empty-entries.ll
 create mode 100644 test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll
 create mode 100644 test/Linker/comdat10.ll
 create mode 100644 test/MC/AArch64/alias-addsubimm.s
 create mode 100644 test/MC/AMDGPU/hsa.s
 create mode 100644 test/MC/AMDGPU/hsa_code_object_isa_noargs.s
 create mode 100644 test/MC/ARM/directive-type-diagnostics.s
 create mode 100644 test/MC/COFF/ARM/directive-type-diagnostics.s
 create mode 100644 test/MC/COFF/ARM/lit.local.cfg
 delete mode 100644 test/MC/Disassembler/Mips/mips2.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips32.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips32r2.txt
 create mode 100644 test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-el.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-le.txt
 create mode 100644 test/MC/Disassembler/Mips/mips32r3/valid-mips32r3-el.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips32r3/valid-mips32r3-le.txt
 create mode 100644 test/MC/Disassembler/Mips/mips32r5/valid-mips32r5-el.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips32r5/valid-mips32r5-le.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips32r6.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips64.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips64r2.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips64r6.txt
 create mode 100644 test/MC/ELF/many-sections-3.s
 create mode 100644 test/MC/ELF/relax-arith2.s
 create mode 100644 test/MC/ELF/relax-arith3.s
 create mode 100644 test/MC/ELF/symver-pr23914.s
 create mode 100644 test/MC/ELF/undef-temp.s
 delete mode 100644 test/MC/ELF/undef2.s
 create mode 100644 test/MC/MachO/ARM/directive-type-diagnostics.s
 create mode 100644 test/MC/Mips/micromips32r6/invalid.s
 create mode 100644 test/MC/Mips/mips-cop0-reginfo.s
 create mode 100644 test/MC/Mips/module-hardfloat.s
 create mode 100644 test/MC/Mips/module-softfloat.s
 create mode 100644 test/MC/Mips/set-oddspreg-nooddspreg-error.s
 create mode 100644 test/MC/Mips/set-oddspreg-nooddspreg.s
 create mode 100644 test/MC/Mips/update-module-level-options.s
 create mode 100644 test/MC/X86/AlignedBundling/misaligned-bundle-group.s
 create mode 100644 test/MC/X86/AlignedBundling/misaligned-bundle.s
 create mode 100644 test/MC/X86/AlignedBundling/rodata-section.s
 create mode 100644 test/MC/X86/faultmap-section-parsing.s
 create mode 100644 test/MC/X86/inline-asm-obj.ll
 create mode 100644 test/Object/ARM/nm-mapping-symbol.s
 create mode 100644 test/Object/Inputs/invalid-section-index.elf
 create mode 100644 test/Object/Inputs/invalid-section-size.elf
 create mode 100755 test/Object/Inputs/invalid-sh_entsize.elf
 create mode 100644 test/Object/Inputs/invalid-strtab-non-null.elf
 create mode 100644 test/Object/Inputs/invalid-strtab-size.elf
 create mode 100644 test/Object/Inputs/invalid-strtab-type.elf
 create mode 100644 test/Object/Inputs/stackmap-test.macho-x86-64
 create mode 100644 test/Object/X86/nm-print-size.s
 create mode 100644 test/Object/dllimport-globalref.ll
 create mode 100644 test/Object/stackmap-dump.test
 create mode 100644 test/Transforms/GVN/br-identical.ll
 create mode 100644 test/Transforms/Inline/X86/inline-target-attr.ll
 create mode 100644 test/Transforms/Inline/X86/lit.local.cfg
 create mode 100644 test/Transforms/Inline/nonnull.ll
 create mode 100644 test/Transforms/InstCombine/pr23809.ll
 create mode 100644 test/Transforms/LoopStrengthReduce/shl.ll
 create mode 100644 test/Transforms/LoopUnroll/X86/mmx.ll
 create mode 100644 test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll
 create mode 100644 test/Transforms/LoopVectorize/X86/ptr-indvar-crash.ll
 create mode 100644 test/Transforms/LoopVectorize/optsize.ll
 create mode 100644 test/Transforms/LowerBitSets/nonglobal.ll
 create mode 100644 test/Transforms/PruneEH/pr23971.ll
 create mode 100644 test/Transforms/StraightLineStrengthReduce/AMDGPU/pr23975.ll
 create mode 100644 test/tools/llvm-cxxdump/X86/lit.local.cfg
 create mode 100644 test/tools/llvm-cxxdump/X86/sym-size.s
 create mode 100644 test/tools/llvm-symbolizer/Inputs/fat.c
 create mode 100644 test/tools/llvm-symbolizer/Inputs/fat.o
 create mode 100644 test/tools/llvm-symbolizer/fat.test
 create mode 100644 tools/llvm-readobj/StackMapPrinter.h
 create mode 100644 unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp

diff --git a/.gitignore b/.gitignore
index 02146ef..f3424d2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,9 +43,7 @@ autoconf/autom4te.cache
 # Directories to ignore (do not add trailing '/'s, they skip symlinks).
 #==============================================================================#
 # External projects that are tracked independently.
-projects/*
-!projects/CMakeLists.txt
-!projects/Makefile
+projects/*/
 # Clang, which is tracked independently.
 tools/clang
 # LLDB, which is tracked independently.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index da73149..3194197 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -530,7 +530,7 @@ endif()
 # check its symbols. This is wasteful (the check was done when foo.so
 # was created) and can fail since it is not the dynamic linker and
 # doesn't know how to handle search paths correctly.
-if (UNIX AND NOT APPLE)
+if (UNIX AND NOT APPLE AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
   set(CMAKE_EXE_LINKER_FLAGS
       "${CMAKE_EXE_LINKER_FLAGS} -Wl,-allow-shlib-undefined")
 endif()
diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT
index 25d9d38..a15f291 100644
--- a/CODE_OWNERS.TXT
+++ b/CODE_OWNERS.TXT
@@ -65,6 +65,10 @@ N: Hal Finkel
 E: hfinkel@anl.gov
 D: BBVectorize, the loop reroller, alias analysis and the PowerPC target
 
+N: Dan Gohman
+E: sunfish@mozilla.com
+D: WebAssembly Backend (lib/Target/WebAssembly/*)
+
 N: Renato Golin
 E: renato.golin@linaro.org
 D: ARM Linux support
diff --git a/CREDITS.TXT b/CREDITS.TXT
index 40d67f4..da1fb01 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -152,8 +152,9 @@ E: foldr@codedgers.com
 D: Author of llvmc2
 
 N: Dan Gohman
-E: dan433584@gmail.com
+E: sunfish@mozilla.com
 D: Miscellaneous bug fixes
+D: WebAssembly Backend
 
 N: David Goodwin
 E: david@goodwinz.net
diff --git a/Makefile.config.in b/Makefile.config.in
index 7af5d3c..3258714 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -58,20 +58,9 @@ LLVM_OBJ_ROOT   := $(call realpath, @abs_top_builddir@)
 PROJ_SRC_ROOT   := $(LLVM_SRC_ROOT)
 PROJ_SRC_DIR    := $(LLVM_SRC_ROOT)$(patsubst $(PROJ_OBJ_ROOT)%,%,$(PROJ_OBJ_DIR))
 
-# FIXME: This is temporary during the grace period where in-source builds are
-# deprecated. Convert to a hard error when that period is up.
-#
 # See: http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20150323/268067.html
 ifeq ($(LLVM_SRC_ROOT), $(LLVM_OBJ_ROOT))
-  $(warning ######################################################################################)
-  $(warning #                                                                                    #)
-  $(warning #                                   WARNING                                          #)
-  $(warning #                                                                                    #)
-  $(warning #                        In-source builds are deprecated.                            #)
-  $(warning #                                                                                    #)
-  $(warning #               Please configure from a separate build directory!                    #)
-  $(warning #                                                                                    #)
-  $(warning ######################################################################################)
+  $(error In-source builds are not allowed. Please configure from a separate build directory!)
 endif
 
 ifneq ($(CLANG_SRC_ROOT),)
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 0942c8e..d6778ac 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -75,15 +75,7 @@ fi
 
 dnl Quit if it is an in-source build
 if test ${srcdir} == "." ; then
-  AC_MSG_WARN([**************************************************************************************])
-  AC_MSG_WARN([*                                                                                    *])
-  AC_MSG_WARN([*                                   WARNING                                          *])
-  AC_MSG_WARN([*                                                                                    *])
-  AC_MSG_WARN([*                        In-source builds are deprecated.                            *])
-  AC_MSG_WARN([*                                                                                    *])
-  AC_MSG_WARN([*               Please configure from a separate build directory!                    *])
-  AC_MSG_WARN([*                                                                                    *])
-  AC_MSG_WARN([**************************************************************************************])
+  AC_MSG_ERROR([In-source builds are not allowed. Please configure from a separate build directory!])
 fi
 
 dnl Default to empty (i.e. assigning the null string to) CFLAGS and CXXFLAGS,
@@ -445,6 +437,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
   nvptx-*)                llvm_cv_target_arch="NVPTX" ;;
   s390x-*)                llvm_cv_target_arch="SystemZ" ;;
+  wasm*-*)                llvm_cv_target_arch="WebAssembly" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
 esac])
 
@@ -480,6 +473,7 @@ case $host in
   msp430-*)               host_arch="MSP430" ;;
   hexagon-*)              host_arch="Hexagon" ;;
   s390x-*)                host_arch="SystemZ" ;;
+  wasm*-*)                host_arch="WebAssembly" ;;
   *)                      host_arch="Unknown" ;;
 esac
 
@@ -812,6 +806,7 @@ else
     Hexagon)     AC_SUBST(TARGET_HAS_JIT,0) ;;
     NVPTX)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     SystemZ)     AC_SUBST(TARGET_HAS_JIT,1) ;;
+    WebAssembly) AC_SUBST(TARGET_HAS_JIT,0) ;;
     *)           AC_SUBST(TARGET_HAS_JIT,0) ;;
   esac
 fi
@@ -1105,7 +1100,7 @@ TARGETS_TO_BUILD=""
 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
     [Build specific host targets: all or target1,target2,... Valid targets are:
      host, x86, x86_64, sparc, powerpc, arm64, arm, aarch64, mips, hexagon,
-     xcore, msp430, nvptx, systemz, r600, bpf, and cpp (default=all)]),,
+     xcore, msp430, nvptx, systemz, r600, bpf, wasm, and cpp (default=all)]),,
     enableval=all)
 if test "$enableval" = host-only ; then
   enableval=host
@@ -1134,6 +1129,7 @@ case "$enableval" in
         systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
         amdgpu)   TARGETS_TO_BUILD="AMDGPU $TARGETS_TO_BUILD" ;;
         r600)     TARGETS_TO_BUILD="AMDGPU $TARGETS_TO_BUILD" ;;
+        wasm)     TARGETS_TO_BUILD="WebAssembly $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -1147,6 +1143,7 @@ case "$enableval" in
             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
             NVPTX)       TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
             SystemZ)     TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
+            WebAssembly) TARGETS_TO_BUILD="WebAssembly $TARGETS_TO_BUILD" ;;
             *)       AC_MSG_ERROR([Can not set target to build]) ;;
           esac ;;
         *) AC_MSG_ERROR([Unrecognized target $a_target]) ;;
@@ -1631,7 +1628,6 @@ dnl===-----------------------------------------------------------------------===
 
 AC_CHECK_LIB(m,sin)
 if test "$llvm_cv_os_type" = "MingW" ; then
-  AC_CHECK_LIB(imagehlp, main)
   AC_CHECK_LIB(ole32, main)
   AC_CHECK_LIB(psapi, main)
   AC_CHECK_LIB(shell32, main)
diff --git a/bindings/ocaml/executionengine/llvm_executionengine.ml b/bindings/ocaml/executionengine/llvm_executionengine.ml
index 34031be..3f37e0c 100644
--- a/bindings/ocaml/executionengine/llvm_executionengine.ml
+++ b/bindings/ocaml/executionengine/llvm_executionengine.ml
@@ -43,11 +43,11 @@ external run_static_dtors : llexecutionengine -> unit
   = "llvm_ee_run_static_dtors"
 external data_layout : llexecutionengine -> Llvm_target.DataLayout.t
   = "llvm_ee_get_data_layout"
-external add_global_mapping_ : Llvm.llvalue -> int64 -> llexecutionengine -> unit
+external add_global_mapping_ : Llvm.llvalue -> nativeint -> llexecutionengine -> unit
   = "llvm_ee_add_global_mapping"
-external get_global_value_address_ : string -> llexecutionengine -> int64
+external get_global_value_address_ : string -> llexecutionengine -> nativeint
   = "llvm_ee_get_global_value_address"
-external get_function_address_ : string -> llexecutionengine -> int64
+external get_function_address_ : string -> llexecutionengine -> nativeint
   = "llvm_ee_get_function_address"
 
 let add_global_mapping llval ptr ee =
@@ -55,14 +55,14 @@ let add_global_mapping llval ptr ee =
 
 let get_global_value_address name typ ee =
   let vptr = get_global_value_address_ name ee in
-  if Int64.to_int vptr <> 0 then
+  if Nativeint.to_int vptr <> 0 then
     let open Ctypes in !@ (coerce (ptr void) (ptr typ) (ptr_of_raw_address vptr))
   else
     raise (Error ("Value " ^ name ^ " not found"))
 
 let get_function_address name typ ee =
   let fptr = get_function_address_ name ee in
-  if Int64.to_int fptr <> 0 then
+  if Nativeint.to_int fptr <> 0 then
     let open Ctypes in coerce (ptr void) typ (ptr_of_raw_address fptr)
   else
     raise (Error ("Function " ^ name ^ " not found"))
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 4e22aab..3203d1e 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -358,6 +358,10 @@ elseif (LLVM_NATIVE_ARCH MATCHES "hexagon")
   set(LLVM_NATIVE_ARCH Hexagon)
 elseif (LLVM_NATIVE_ARCH MATCHES "s390x")
   set(LLVM_NATIVE_ARCH SystemZ)
+elseif (LLVM_NATIVE_ARCH MATCHES "wasm32")
+  set(LLVM_NATIVE_ARCH WebAssembly)
+elseif (LLVM_NATIVE_ARCH MATCHES "wasm64")
+  set(LLVM_NATIVE_ARCH WebAssembly)
 else ()
   message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}")
 endif ()
@@ -393,12 +397,10 @@ else ()
 endif ()
 
 if( MINGW )
-  set(HAVE_LIBIMAGEHLP 1)
   set(HAVE_LIBPSAPI 1)
   set(HAVE_LIBSHELL32 1)
   # TODO: Check existence of libraries.
   #   include(CheckLibraryExists)
-  #   CHECK_LIBRARY_EXISTS(imagehlp ??? . HAVE_LIBIMAGEHLP)
 endif( MINGW )
 
 if (NOT HAVE_STRTOLL)
@@ -546,13 +548,13 @@ else()
     if( OCAML_VERSION VERSION_LESS "4.00.0" )
       message(STATUS "OCaml bindings disabled, need OCaml >=4.00.0.")
     else()
-      find_ocamlfind_package(ctypes VERSION 0.3 OPTIONAL)
+      find_ocamlfind_package(ctypes VERSION 0.4 OPTIONAL)
       if( HAVE_OCAML_CTYPES )
         message(STATUS "OCaml bindings enabled.")
         find_ocamlfind_package(oUnit VERSION 2 OPTIONAL)
         set(LLVM_BINDINGS "${LLVM_BINDINGS} ocaml")
       else()
-        message(STATUS "OCaml bindings disabled, need ctypes >=0.3.")
+        message(STATUS "OCaml bindings disabled, need ctypes >=0.4.")
       endif()
     endif()
   endif()
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index d80fcd7..4f60d9e 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -83,8 +83,13 @@ function(add_llvm_symbol_exports target_name export_file)
       DEPENDS ${export_file}
       VERBATIM
       COMMENT "Creating export file for ${target_name}")
-    set_property(TARGET ${target_name} APPEND_STRING PROPERTY
-                 LINK_FLAGS "  -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/${native_export_file}")
+    if (${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
+      set_property(TARGET ${target_name} APPEND_STRING PROPERTY
+                   LINK_FLAGS "  -Wl,-M,${CMAKE_CURRENT_BINARY_DIR}/${native_export_file}")
+    else()
+      set_property(TARGET ${target_name} APPEND_STRING PROPERTY
+                   LINK_FLAGS "  -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/${native_export_file}")
+    endif()
   else()
     set(native_export_file "${target_name}.def")
 
@@ -163,7 +168,7 @@ function(add_link_opts target_name)
 
     # Pass -O3 to the linker. This enabled different optimizations on different
     # linkers.
-    if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin" OR WIN32))
+    if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin|SunOS" OR WIN32))
       set_property(TARGET ${target_name} APPEND_STRING PROPERTY
                    LINK_FLAGS " -Wl,-O3")
     endif()
@@ -181,6 +186,9 @@ function(add_link_opts target_name)
         # ld64's implementation of -dead_strip breaks tools that use plugins.
         set_property(TARGET ${target_name} APPEND_STRING PROPERTY
                      LINK_FLAGS " -Wl,-dead_strip")
+      elseif(${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
+        set_property(TARGET ${target_name} APPEND_STRING PROPERTY
+                     LINK_FLAGS " -Wl,-z -Wl,discard-unused=sections")
       elseif(NOT WIN32 AND NOT LLVM_LINKER_IS_GOLD)
         # Object files are compiled with -ffunction-data-sections.
         # Versions of bfd ld < 2.23.1 have a bug in --gc-sections that breaks
@@ -495,11 +503,17 @@ macro(add_llvm_library name)
   else()
     llvm_add_library(${name} ${ARGN})
   endif()
-  set_property( GLOBAL APPEND PROPERTY LLVM_LIBS ${name} )
+  # The gtest libraries should not be installed or exported as a target
+  if ("${name}" STREQUAL gtest OR "${name}" STREQUAL gtest_main)
+    set(_is_gtest TRUE)
+  else()
+    set(_is_gtest FALSE)
+    set_property( GLOBAL APPEND PROPERTY LLVM_LIBS ${name} )
+  endif()
 
   if( EXCLUDE_FROM_ALL )
     set_target_properties( ${name} PROPERTIES EXCLUDE_FROM_ALL ON)
-  else()
+  elseif(NOT _is_gtest)
     if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ${name} STREQUAL "LTO")
       if(ARG_SHARED OR BUILD_SHARED_LIBS)
         if(WIN32 OR CYGWIN)
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index a936894..9f5a3a0 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -308,9 +308,9 @@ if( MSVC )
     -wd4805 # Suppress 'unsafe mix of type <type> and type <type> in operation'
     -wd4204 # Suppress 'nonstandard extension used : non-constant aggregate initializer'
 
-	# Idelly, we'd like this warning to be enabled, but MSVC 2013 doesn't
+	# Ideally, we'd like this warning to be enabled, but MSVC 2013 doesn't
 	# support the 'aligned' attribute in the way that clang sources requires (for
-	# any code that uses the LLVM_ALIGNAS marco), so this is must be disabled to
+	# any code that uses the LLVM_ALIGNAS macro), so this is must be disabled to
 	# avoid unwanted alignment warnings.
 	# When we switch to requiring a version of MSVC that supports the 'alignas'
 	# specifier (MSVC 2015?) this warning can be re-enabled.
diff --git a/cmake/modules/Makefile b/cmake/modules/Makefile
index 97ee7d3..f644c45 100644
--- a/cmake/modules/Makefile
+++ b/cmake/modules/Makefile
@@ -47,6 +47,12 @@ ifeq ($(LLVM_LIBS_TO_EXPORT),Error)
 $(error llvm-config --libs failed)
 endif
 
+# Strip out gtest and gtest_main from LLVM_LIBS_TO_EXPORT, these are not
+# installed and won't be available from the install tree.
+# FIXME: If we used llvm-config from the install tree this wouldn't be
+# necessary.
+LLVM_LIBS_TO_EXPORT := $(filter-out gtest gtest_main,$(LLVM_LIBS_TO_EXPORT))
+
 ifndef LLVM_LIBS_TO_EXPORT
 $(error LLVM_LIBS_TO_EXPORT cannot be empty)
 endif
@@ -54,11 +60,27 @@ endif
 
 OBJMODS := LLVMConfig.cmake LLVMConfigVersion.cmake LLVMExports.cmake
 
+LLVM_CONFIG_CODE := \
+\# Compute the CMake directory from the LLVMConfig.cmake file location.\n\
+get_filename_component(_LLVM_CMAKE_DIR "$${CMAKE_CURRENT_LIST_FILE}" PATH)\n\
+\# Compute the installation prefix from the LLVMConfig.cmake file location.\n\
+get_filename_component(LLVM_INSTALL_PREFIX "$${CMAKE_CURRENT_LIST_FILE}" PATH)\n
+
+# Compute number of levels (typically 3 - ``share/llvm/cmake/``) to PROJ_prefix
+# from PROJ_cmake, then emit the appropriate number of calls to
+# get_filename_components(). Note this assumes there are no spaces in the
+# cmake_path_suffix variable.
+cmake_path_suffix := $(subst $(PROJ_prefix),,$(subst $(DESTDIR),,$(PROJ_cmake)))
+cmake_path_dirs := $(subst /, ,$(cmake_path_suffix))
+LLVM_CONFIG_CODE += $(foreach __not_used,$(cmake_path_dirs),get_filename_component(LLVM_INSTALL_PREFIX "$${LLVM_INSTALL_PREFIX}" PATH)\n)
+
+LLVM_CONFIG_CODE += set(_LLVM_LIBRARY_DIR "$${LLVM_INSTALL_PREFIX}\/lib")
+
 $(PROJ_OBJ_DIR)/LLVMConfig.cmake: LLVMConfig.cmake.in Makefile $(LLVMBuildCMakeFrag)
 	$(Echo) 'Generating LLVM CMake package config file'
 	$(Verb) ( \
 	 cat $< | sed \
-	  -e 's/@LLVM_CONFIG_CODE@/set(LLVM_INSTALL_PREFIX "'"$(subst /,\/,$(PROJ_prefix))"'")/' \
+	  -e 's/@LLVM_CONFIG_CODE@/$(LLVM_CONFIG_CODE)/' \
 	  -e 's/@LLVM_VERSION_MAJOR@/'"$(LLVM_VERSION_MAJOR)"'/' \
 	  -e 's/@LLVM_VERSION_MINOR@/'"$(LLVM_VERSION_MINOR)"'/' \
 	  -e 's/@LLVM_VERSION_PATCH@/'"$(LLVM_VERSION_PATCH)"'/' \
@@ -81,17 +103,20 @@ $(PROJ_OBJ_DIR)/LLVMConfig.cmake: LLVMConfig.cmake.in Makefile $(LLVMBuildCMakeF
 	  -e 's/@LLVM_ON_UNIX@/'"$(LLVM_ON_UNIX)"'/' \
 	  -e 's/@LLVM_ON_WIN32@/'"$(LLVM_ON_WIN32)"'/' \
 	  -e 's/@LLVM_LIBDIR_SUFFIX@//' \
-	  -e 's/@LLVM_CONFIG_INCLUDE_DIRS@/'"$(subst /,\/,$(PROJ_includedir))"'/' \
-	  -e 's/@LLVM_CONFIG_LIBRARY_DIRS@/'"$(subst /,\/,$(PROJ_libdir))"'/' \
-	  -e 's/@LLVM_CONFIG_CMAKE_DIR@/'"$(subst /,\/,$(PROJ_cmake))"'/' \
-	  -e 's/@LLVM_CONFIG_TOOLS_BINARY_DIR@/'"$(subst /,\/,$(PROJ_bindir))"'/' \
+	  -e 's#@LLVM_CONFIG_INCLUDE_DIRS@#$${LLVM_INSTALL_PREFIX}/include#' \
+	  -e 's#@LLVM_CONFIG_LIBRARY_DIRS@#$${_LLVM_LIBRARY_DIR}#' \
+	  -e 's#@LLVM_CONFIG_CMAKE_DIR@#$${_LLVM_CMAKE_DIR}#' \
+	  -e 's#@LLVM_CONFIG_TOOLS_BINARY_DIR@#$${LLVM_INSTALL_PREFIX}/bin#' \
 	  -e 's/@LLVM_CONFIG_EXPORTS_FILE@/$${LLVM_CMAKE_DIR}\/LLVMExports.cmake/' \
 	  -e 's/@all_llvm_lib_deps@//' \
 	 && \
-	 grep '^set_property.*LLVMBUILD_LIB_DEPS_' "$(LLVMBuildCMakeFrag)" \
+	 ( grep '^set_property.*LLVMBUILD_LIB_DEPS_' "$(LLVMBuildCMakeFrag)" | \
+	   grep -v LLVMBUILD_LIB_DEPS_gtest ) && \
+	 echo 'unset(_LLVM_CMAKE_DIR)' && \
+	 echo 'unset(_LLVM_LIBRARY_DIR)' \
 	) > $@
 
-$(PROJ_OBJ_DIR)/LLVMConfigVersion.cmake: LLVMConfigVersion.cmake.in
+$(PROJ_OBJ_DIR)/LLVMConfigVersion.cmake: LLVMConfigVersion.cmake.in Makefile
 	$(Echo) 'Generating LLVM CMake package version file'
 	$(Verb) cat $< | sed \
 	  -e 's/@PACKAGE_VERSION@/'"$(LLVMVersion)"'/' \
@@ -100,13 +125,13 @@ $(PROJ_OBJ_DIR)/LLVMConfigVersion.cmake: LLVMConfigVersion.cmake.in
 	  -e 's/@LLVM_VERSION_PATCH@/'"$(LLVM_VERSION_PATCH)"'/' \
 	  > $@
 
-$(PROJ_OBJ_DIR)/LLVMExports.cmake: $(LLVMBuildCMakeExportsFrag)
+$(PROJ_OBJ_DIR)/LLVMExports.cmake: $(LLVMBuildCMakeExportsFrag) Makefile
 	$(Echo) 'Generating LLVM CMake target exports file'
 	$(Verb) ( \
 	  echo '# LLVM CMake target exports.  Do not include directly.' && \
 	  for lib in $(LLVM_LIBS_TO_EXPORT); do \
 	    echo 'add_library('"$$lib"' STATIC IMPORTED)' && \
-	    echo 'set_property(TARGET '"$$lib"' PROPERTY IMPORTED_LOCATION "'"$(PROJ_libdir)/lib$$lib.a"'")' ; \
+	    echo 'set_property(TARGET '"$$lib"' PROPERTY IMPORTED_LOCATION "$${_LLVM_LIBRARY_DIR}/'lib$$lib.a'")' ; \
 	  done && \
 	  cat "$(LLVMBuildCMakeExportsFrag)" && \
 	  echo 'set_property(TARGET LLVMSupport APPEND PROPERTY IMPORTED_LINK_INTERFACE_LIBRARIES '"$(subst -l,,$(LIBS))"')' \
diff --git a/configure b/configure
index b09045e..a5acfde 100755
--- a/configure
+++ b/configure
@@ -1463,7 +1463,7 @@ Optional Features:
                           target1,target2,... Valid targets are: host, x86,
                           x86_64, sparc, powerpc, arm64, arm, aarch64, mips,
                           hexagon, xcore, msp430, nvptx, systemz, r600, bpf,
-                          and cpp (default=all)
+                          wasm, and cpp (default=all)
   --enable-experimental-targets
                           Build experimental host targets: disable or
                           target1,target2,... (default=disable)
@@ -2033,24 +2033,9 @@ echo "$as_me: error: Already configured in ${srcdir}" >&2;}
 fi
 
 if test ${srcdir} == "." ; then
-  { echo "$as_me:$LINENO: WARNING: **************************************************************************************" >&5
-echo "$as_me: WARNING: **************************************************************************************" >&2;}
-  { echo "$as_me:$LINENO: WARNING: *                                                                                    *" >&5
-echo "$as_me: WARNING: *                                                                                    *" >&2;}
-  { echo "$as_me:$LINENO: WARNING: *                                   WARNING                                          *" >&5
-echo "$as_me: WARNING: *                                   WARNING                                          *" >&2;}
-  { echo "$as_me:$LINENO: WARNING: *                                                                                    *" >&5
-echo "$as_me: WARNING: *                                                                                    *" >&2;}
-  { echo "$as_me:$LINENO: WARNING: *                        In-source builds are deprecated.                            *" >&5
-echo "$as_me: WARNING: *                        In-source builds are deprecated.                            *" >&2;}
-  { echo "$as_me:$LINENO: WARNING: *                                                                                    *" >&5
-echo "$as_me: WARNING: *                                                                                    *" >&2;}
-  { echo "$as_me:$LINENO: WARNING: *               Please configure from a separate build directory!                    *" >&5
-echo "$as_me: WARNING: *               Please configure from a separate build directory!                    *" >&2;}
-  { echo "$as_me:$LINENO: WARNING: *                                                                                    *" >&5
-echo "$as_me: WARNING: *                                                                                    *" >&2;}
-  { echo "$as_me:$LINENO: WARNING: **************************************************************************************" >&5
-echo "$as_me: WARNING: **************************************************************************************" >&2;}
+  { { echo "$as_me:$LINENO: error: In-source builds are not allowed. Please configure from a separate build directory!" >&5
+echo "$as_me: error: In-source builds are not allowed. Please configure from a separate build directory!" >&2;}
+   { (exit 1); exit 1; }; }
 fi
 
 : ${CFLAGS=}
@@ -4207,6 +4192,7 @@ else
   hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
   nvptx-*)                llvm_cv_target_arch="NVPTX" ;;
   s390x-*)                llvm_cv_target_arch="SystemZ" ;;
+  wasm*-*)                llvm_cv_target_arch="WebAssembly" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
 esac
 fi
@@ -4243,6 +4229,7 @@ case $host in
   msp430-*)               host_arch="MSP430" ;;
   hexagon-*)              host_arch="Hexagon" ;;
   s390x-*)                host_arch="SystemZ" ;;
+  wasm*-*)                host_arch="WebAssembly" ;;
   *)                      host_arch="Unknown" ;;
 esac
 
@@ -5170,6 +5157,8 @@ else
  ;;
     SystemZ)     TARGET_HAS_JIT=1
  ;;
+    WebAssembly) TARGET_HAS_JIT=0
+ ;;
     *)           TARGET_HAS_JIT=0
  ;;
   esac
@@ -5667,6 +5656,7 @@ case "$enableval" in
         systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
         amdgpu)   TARGETS_TO_BUILD="AMDGPU $TARGETS_TO_BUILD" ;;
         r600)     TARGETS_TO_BUILD="AMDGPU $TARGETS_TO_BUILD" ;;
+        wasm)     TARGETS_TO_BUILD="WebAssembly $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -5680,6 +5670,7 @@ case "$enableval" in
             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
             NVPTX)       TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
             SystemZ)     TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
+            WebAssembly) TARGETS_TO_BUILD="WebAssembly $TARGETS_TO_BUILD" ;;
             *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
 echo "$as_me: error: Can not set target to build" >&2;}
    { (exit 1); exit 1; }; } ;;
diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst
index 3cb41ce..97d6662 100644
--- a/docs/AMDGPUUsage.rst
+++ b/docs/AMDGPUUsage.rst
@@ -92,3 +92,86 @@ strings:
    v_mul_i32_i24 v1, v2, v3
    v_mul_i32_i24_e32 v1, v2, v3
    v_mul_i32_i24_e64 v1, v2, v3
+
+Assembler Directives
+--------------------
+
+.hsa_code_object_version major, minor
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+*major* and *minor* are integers that specify the version of the HSA code
+object that will be generated by the assembler.  This value will be stored
+in an entry of the .note section.
+
+.hsa_code_object_isa [major, minor, stepping, vendor, arch]
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+*major*, *minor*, and *stepping* are all integers that describe the instruction
+set architecture (ISA) version of the assembly program.
+
+*vendor* and *arch* are quoted strings.  *vendor* should always be equal to
+"AMD" and *arch* should always be equal to "AMDGPU".
+
+If no arguments are specified, then the assembler will derive the ISA version,
+*vendor*, and *arch* from the value of the -mcpu option that is passed to the
+assembler.
+
+ISA version, *vendor*, and *arch* will all be stored in a single entry of the
+.note section.
+
+.amd_kernel_code_t
+^^^^^^^^^^^^^^^^^^
+
+This directive marks the beginning of a list of key / value pairs that are used
+to specify the amd_kernel_code_t object that will be emitted by the assembler.
+The list must be terminated by the *.end_amd_kernel_code_t* directive.  For
+any amd_kernel_code_t values that are unspecified a default value will be
+used.  The default value for all keys is 0, with the following exceptions:
+
+- *kernel_code_version_major* defaults to 1.
+- *machine_kind* defaults to 1.
+- *machine_version_major*, *machine_version_minor*, and
+  *machine_version_stepping* are derived from the value of the -mcpu option
+  that is passed to the assembler.
+- *kernel_code_entry_byte_offset* defaults to 256.
+- *wavefront_size* defaults to 6.
+- *kernarg_segment_alignment*, *group_segment_alignment*, and
+  *private_segment_alignment* default to 4.  Note that alignments are specified
+  as a power of two, so a value of **n** means an alignment of 2^ **n**.
+
+The *.amd_kernel_code_t* directive must be placed immediately after the
+function label and before any instructions.
+
+For a full list of amd_kernel_code_t keys, see the examples in
+test/CodeGen/AMDGPU/hsa.s.  For an explanation of the meanings of the different
+keys, see the comments in lib/Target/AMDGPU/AmdKernelCodeT.h
+
+Here is an example of a minimal amd_kernel_code_t specification:
+
+.. code-block:: nasm
+
+   .hsa_code_object_version 1,0
+   .hsa_code_object_isa
+
+   .text
+
+   hello_world:
+
+      .amd_kernel_code_t
+         enable_sgpr_kernarg_segment_ptr = 1
+         is_ptr64 = 1
+         compute_pgm_rsrc1_vgprs = 0
+         compute_pgm_rsrc1_sgprs = 0
+         compute_pgm_rsrc2_user_sgpr = 2
+         kernarg_segment_byte_size = 8
+         wavefront_sgpr_count = 2
+         workitem_vgpr_count = 3
+     .end_amd_kernel_code_t
+
+     s_load_dwordx2 s[0:1], s[0:1] 0x0
+     v_mov_b32 v0, 3.14159
+     s_waitcnt lgkmcnt(0)
+     v_mov_b32 v1, s0
+     v_mov_b32 v2, s1
+     flat_store_dword v0, v[1:2]
+     s_endpgm
diff --git a/docs/AliasAnalysis.rst b/docs/AliasAnalysis.rst
index 1cbaee7..f62cc3f 100644
--- a/docs/AliasAnalysis.rst
+++ b/docs/AliasAnalysis.rst
@@ -286,8 +286,8 @@ Mod/Ref result, simply return whatever the superclass computes.  For example:
 
 .. code-block:: c++
 
-  AliasAnalysis::AliasResult alias(const Value *V1, unsigned V1Size,
-                                   const Value *V2, unsigned V2Size) {
+  AliasResult alias(const Value *V1, unsigned V1Size,
+                    const Value *V2, unsigned V2Size) {
     if (...)
       return NoAlias;
     ...
diff --git a/docs/CMake.rst b/docs/CMake.rst
index b9e473f..b6dd838 100644
--- a/docs/CMake.rst
+++ b/docs/CMake.rst
@@ -327,8 +327,8 @@ LLVM-specific variables
 
 **LLVM_USE_SANITIZER**:STRING
   Define the sanitizer used to build LLVM binaries and tests. Possible values
-  are ``Address``, ``Memory``, ``MemoryWithOrigins`` and ``Undefined``.
-  Defaults to empty string.
+  are ``Address``, ``Memory``, ``MemoryWithOrigins``, ``Undefined``, ``Thread``,
+  and ``Address;Undefined``. Defaults to empty string.
 
 **LLVM_PARALLEL_COMPILE_JOBS**:STRING
   Define the maximum number of concurrent compilation jobs.
diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst
index 75d40db..516031d 100644
--- a/docs/CodeGenerator.rst
+++ b/docs/CodeGenerator.rst
@@ -749,7 +749,7 @@ The SelectionDAG is a Directed-Acyclic-Graph whose nodes are instances of the
 ``SDNode`` class.  The primary payload of the ``SDNode`` is its operation code
 (Opcode) that indicates what operation the node performs and the operands to the
 operation.  The various operation node types are described at the top of the
-``include/llvm/CodeGen/SelectionDAGNodes.h`` file.
+``include/llvm/CodeGen/ISDOpcodes.h`` file.
 
 Although most operations define a single value, each node in the graph may
 define multiple values.  For example, a combined div/rem operation will define
@@ -829,7 +829,7 @@ One great way to visualize what is going on here is to take advantage of a few
 LLC command line options.  The following options pop up a window displaying the
 SelectionDAG at specific times (if you only get errors printed to the console
 while using this, you probably `need to configure your
-system <ProgrammersManual.html#ViewGraph>`_ to add support for it).
+system <ProgrammersManual.html#viewing-graphs-while-debugging-code>`_ to add support for it).
 
 * ``-view-dag-combine1-dags`` displays the DAG after being built, before the
   first optimization pass.
diff --git a/docs/CommandGuide/llvm-dwarfdump.rst b/docs/CommandGuide/llvm-dwarfdump.rst
index afaa0be..30c18ad 100644
--- a/docs/CommandGuide/llvm-dwarfdump.rst
+++ b/docs/CommandGuide/llvm-dwarfdump.rst
@@ -26,5 +26,5 @@ OPTIONS
 EXIT STATUS
 -----------
 
-:program:`llvm-dwarfdump` returns 0. Other exit codes imply internal
-program error.
+:program:`llvm-dwarfdump` returns 0 if the input files were parsed and dumped
+successfully. Otherwise, it returns 1.
diff --git a/docs/FaultMaps.rst b/docs/FaultMaps.rst
index 692cacf..4ecdd86 100644
--- a/docs/FaultMaps.rst
+++ b/docs/FaultMaps.rst
@@ -49,6 +49,79 @@ The format of this section is
     FunctionFaultInfo[NumFaultingPCs] {
       uint32  : FaultKind = FaultMaps::FaultingLoad (only legal value currently)
       uint32  : FaultingPCOffset
-      uint32  : handlerPCOffset
+      uint32  : HandlerPCOffset
     }
   }
+
+
+The ``ImplicitNullChecks`` pass
+===============================
+
+The ``ImplicitNullChecks`` pass transforms explicit control flow for
+checking if a pointer is ``null``, like:
+
+.. code-block:: llvm
+
+    %ptr = call i32* @get_ptr()
+    %ptr_is_null = icmp i32* %ptr, null
+    br i1 %ptr_is_null, label %is_null, label %not_null, !make.implicit !0
+  
+  not_null:
+    %t = load i32, i32* %ptr
+    br label %do_something_with_t
+    
+  is_null:
+    call void @HFC()
+    unreachable
+  
+  !0 = !{}
+
+to control flow implicit in the instruction loading or storing through
+the pointer being null checked:
+
+.. code-block:: llvm
+
+    %ptr = call i32* @get_ptr()
+    %t = load i32, i32* %ptr  ;; handler-pc = label %is_null
+    br label %do_something_with_t
+    
+  is_null:
+    call void @HFC()
+    unreachable
+
+This transform happens at the ``MachineInstr`` level, not the LLVM IR
+level (so the above example is only representative, not literal).  The
+``ImplicitNullChecks`` pass runs during codegen, if
+``-enable-implicit-null-checks`` is passed to ``llc``.
+
+The ``ImplicitNullChecks`` pass adds entries to the
+``__llvm_faultmaps`` section described above as needed.
+
+``make.implicit`` metadata
+--------------------------
+
+Making null checks implicit is an aggressive optimization, and it can
+be a net performance pessimization if too many memory operations end
+up faulting because of it.  A language runtime typically needs to
+ensure that only a negligible number of implicit null checks actually
+fault once the application has reached a steady state.  A standard way
+of doing this is by healing failed implicit null checks into explicit
+null checks via code patching or recompilation.  It follows that there
+are two requirements an explicit null check needs to satisfy for it to
+be profitable to convert it to an implicit null check:
+
+  1. The case where the pointer is actually null (i.e. the "failing"
+     case) is extremely rare.
+
+  2. The failing path heals the implicit null check into an explicit
+     null check so that the application does not repeatedly page
+     fault.
+
+The frontend is expected to mark branches that satisfy (1) and (2)
+using a ``!make.implicit`` metadata node (the actual content of the
+metadata node is ignored).  Only branches that are marked with
+``!make.implicit`` metadata are considered as candidates for
+conversion into implicit null checks.
+
+(Note that while we could deal with (1) using profiling data, dealing
+with (2) requires some information not present in branch profiles.)
diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst
index 212fa0b..75f0e60 100644
--- a/docs/GettingStarted.rst
+++ b/docs/GettingStarted.rst
@@ -326,7 +326,11 @@ Easy steps for installing GCC 4.8.2:
 
 .. code-block:: console
 
-  % wget ftp://ftp.gnu.org/gnu/gcc/gcc-4.8.2/gcc-4.8.2.tar.bz2
+  % wget https://ftp.gnu.org/gnu/gcc/gcc-4.8.2/gcc-4.8.2.tar.bz2
+  % wget https://ftp.gnu.org/gnu/gcc/gcc-4.8.2/gcc-4.8.2.tar.bz2.sig
+  % wget https://ftp.gnu.org/gnu/gnu-keyring.gpg
+  % signature_invalid=`gpg --verify --no-default-keyring --keyring ./gnu-keyring.gpg gcc-4.8.2.tar.bz2.sig`
+  % if [ $signature_invalid ]; then echo "Invalid signature" ; exit 1 ; fi
   % tar -xvjf gcc-4.8.2.tar.bz2
   % cd gcc-4.8.2
   % ./contrib/download_prerequisites
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index ef9fd92..167280f 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -823,9 +823,11 @@ with the same name.  This is necessary because both globals belong to different
 COMDAT groups and COMDATs, at the object file level, are represented by
 sections.
 
-Note that certain IR constructs like global variables and functions may create
-COMDATs in the object file in addition to any which are specified using COMDAT
-IR.  This arises, for example, when a global variable has linkonce_odr linkage.
+Note that certain IR constructs like global variables and functions may
+create COMDATs in the object file in addition to any which are specified using
+COMDAT IR.  This arises when the code generator is configured to emit globals
+in individual sections (e.g. when `-data-sections` or `-function-sections`
+is supplied to `llc`).
 
 .. _namedmetadatastructure:
 
@@ -3640,7 +3642,7 @@ will be partially unrolled.
 '``llvm.loop.unroll.disable``' Metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-This metadata either disables loop unrolling. The metadata has a single operand
+This metadata disables loop unrolling. The metadata has a single operand
 which is the string ``llvm.loop.unroll.disable``.  For example:
 
 .. code-block:: llvm
@@ -3650,7 +3652,7 @@ which is the string ``llvm.loop.unroll.disable``.  For example:
 '``llvm.loop.unroll.runtime.disable``' Metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-This metadata either disables runtime loop unrolling. The metadata has a single
+This metadata disables runtime loop unrolling. The metadata has a single
 operand which is the string ``llvm.loop.unroll.runtime.disable``.  For example:
 
 .. code-block:: llvm
@@ -3660,8 +3662,8 @@ operand which is the string ``llvm.loop.unroll.runtime.disable``.  For example:
 '``llvm.loop.unroll.full``' Metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-This metadata either suggests that the loop should be unrolled fully. The
-metadata has a single operand which is the string ``llvm.loop.unroll.disable``.
+This metadata suggests that the loop should be unrolled fully. The
+metadata has a single operand which is the string ``llvm.loop.unroll.full``.
 For example:
 
 .. code-block:: llvm
diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst
index f7238af..1dcd6a0 100644
--- a/docs/Phabricator.rst
+++ b/docs/Phabricator.rst
@@ -60,26 +60,28 @@ to upload your patch):
 To upload a new patch:
 
 * Click *Differential*.
-* Click *Create Diff*.
-* Paste the text diff or upload the patch file.
-  Note that TODO
+* Click *+ Create Diff*.
+* Paste the text diff or browse to the patch file. Click *Create Diff*.
+* Leave the Repository field blank.
 * Leave the drop down on *Create a new Revision...* and click *Continue*.
 * Enter a descriptive title and summary.  The title and summary are usually
   in the form of a :ref:`commit message <commit messages>`.
 * Add reviewers and mailing
   lists that you want to be included in the review. If your patch is
-  for LLVM, add llvm-commits as a subscriber; if your patch is for Clang,
+  for LLVM, add llvm-commits as a Subscriber; if your patch is for Clang,
   add cfe-commits.
+* Leave the Repository and Project fields blank.
 * Click *Save*.
 
 To submit an updated patch:
 
 * Click *Differential*.
-* Click *Create Diff*.
-* Paste the updated diff.
+* Click *+ Create Diff*.
+* Paste the updated diff or browse to the updated patch file. Click *Create Diff*.
 * Select the review you want to from the *Attach To* dropdown and click
   *Continue*.
-* Click *Save*.
+* Leave the Repository and Project fields blank.
+* Add comments about the changes in the new diff. Click *Save*.
 
 Reviewing code with Phabricator
 -------------------------------
diff --git a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp
index 93de333..c9b2c6a 100644
--- a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp
+++ b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp
@@ -1168,7 +1168,6 @@ public:
 
   KaleidoscopeJIT(SessionContext &Session)
     : Session(Session),
-      Mang(Session.getTarget().getDataLayout()),
       CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())),
       LazyEmitLayer(CompileLayer),
       CompileCallbacks(LazyEmitLayer, CCMgrMemMgr, Session.getLLVMContext(),
@@ -1179,7 +1178,8 @@ public:
     std::string MangledName;
     {
       raw_string_ostream MangledNameStream(MangledName);
-      Mang.getNameWithPrefix(MangledNameStream, Name);
+      Mangler::getNameWithPrefix(MangledNameStream, Name,
+                                 *Session.getTarget().getDataLayout());
     }
     return MangledName;
   }
@@ -1306,7 +1306,6 @@ private:
   }
 
   SessionContext &Session;
-  Mangler Mang;
   SectionMemoryManager CCMgrMemMgr;
   ObjLayerT ObjectLayer;
   CompileLayerT CompileLayer;
diff --git a/examples/Kaleidoscope/Orc/initial/toy.cpp b/examples/Kaleidoscope/Orc/initial/toy.cpp
index bf43f29..7e99c0f 100644
--- a/examples/Kaleidoscope/Orc/initial/toy.cpp
+++ b/examples/Kaleidoscope/Orc/initial/toy.cpp
@@ -1160,14 +1160,14 @@ public:
   typedef CompileLayerT::ModuleSetHandleT ModuleHandleT;
 
   KaleidoscopeJIT(SessionContext &Session)
-    : Mang(Session.getTarget().getDataLayout()),
-      CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())) {}
+      : DL(*Session.getTarget().getDataLayout()),
+        CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())) {}
 
   std::string mangle(const std::string &Name) {
     std::string MangledName;
     {
       raw_string_ostream MangledNameStream(MangledName);
-      Mang.getNameWithPrefix(MangledNameStream, Name);
+      Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
     }
     return MangledName;
   }
@@ -1201,8 +1201,7 @@ public:
   }
 
 private:
-
-  Mangler Mang;
+  const DataLayout &DL;
   ObjLayerT ObjectLayer;
   CompileLayerT CompileLayer;
 };
diff --git a/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp b/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp
index 1369ba6..4b4c191 100644
--- a/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp
+++ b/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp
@@ -1162,15 +1162,15 @@ public:
   typedef LazyEmitLayerT::ModuleSetHandleT ModuleHandleT;
 
   KaleidoscopeJIT(SessionContext &Session)
-    : Mang(Session.getTarget().getDataLayout()),
-      CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())),
-      LazyEmitLayer(CompileLayer) {}
+      : DL(*Session.getTarget().getDataLayout()),
+        CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())),
+        LazyEmitLayer(CompileLayer) {}
 
   std::string mangle(const std::string &Name) {
     std::string MangledName;
     {
       raw_string_ostream MangledNameStream(MangledName);
-      Mang.getNameWithPrefix(MangledNameStream, Name);
+      Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
     }
     return MangledName;
   }
@@ -1204,8 +1204,7 @@ public:
   }
 
 private:
-
-  Mangler Mang;
+  const DataLayout &DL;
   ObjLayerT ObjectLayer;
   CompileLayerT CompileLayer;
   LazyEmitLayerT LazyEmitLayer;
diff --git a/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp b/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp
index c489a45..ca34de7 100644
--- a/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp
+++ b/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp
@@ -1162,7 +1162,6 @@ public:
 
   KaleidoscopeJIT(SessionContext &Session)
     : Session(Session),
-      Mang(Session.getTarget().getDataLayout()),
       CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())),
       LazyEmitLayer(CompileLayer) {}
 
@@ -1170,7 +1169,8 @@ public:
     std::string MangledName;
     {
       raw_string_ostream MangledNameStream(MangledName);
-      Mang.getNameWithPrefix(MangledNameStream, Name);
+      Mangler::getNameWithPrefix(MangledNameStream, Name,
+                                 *Session.getTarget().getDataLayout());
     }
     return MangledName;
   }
@@ -1236,7 +1236,6 @@ private:
   }
 
   SessionContext &Session;
-  Mangler Mang;
   ObjLayerT ObjectLayer;
   CompileLayerT CompileLayer;
   LazyEmitLayerT LazyEmitLayer;
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 9f37dd7..42c05a2 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -40,7 +40,7 @@ typedef bool lto_bool_t;
  * @{
  */
 
-#define LTO_API_VERSION 15
+#define LTO_API_VERSION 16
 
 /**
  * \since prior to LTO_API_VERSION=3
@@ -280,39 +280,15 @@ lto_module_get_symbol_attribute(lto_module_t mod, unsigned int index);
 
 
 /**
- * Returns the number of dependent libraries in the object module.
+ * Returns the module's linker options.
  *
- * \since LTO_API_VERSION=8
- */
-extern unsigned int
-lto_module_get_num_deplibs(lto_module_t mod);
-
-
-/**
- * Returns the ith dependent library in the module.
- *
- * \since LTO_API_VERSION=8
- */
-extern const char*
-lto_module_get_deplib(lto_module_t mod, unsigned int index);
-
-
-/**
- * Returns the number of linker options in the object module.
- *
- * \since LTO_API_VERSION=8
- */
-extern unsigned int
-lto_module_get_num_linkeropts(lto_module_t mod);
-
-
-/**
- * Returns the ith linker option in the module.
+ * The linker options may consist of multiple flags. It is the linker's
+ * responsibility to split the flags using a platform-specific mechanism.
  *
- * \since LTO_API_VERSION=8
+ * \since LTO_API_VERSION=16
  */
 extern const char*
-lto_module_get_linkeropt(lto_module_t mod, unsigned int index);
+lto_module_get_linkeropts(lto_module_t mod);
 
 
 /**
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index a790203..5013f29 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -1038,7 +1038,9 @@ public:
   /// the validity of the less-than relationship.
   ///
   /// \returns true if *this < RHS when considered unsigned.
-  bool ult(uint64_t RHS) const { return ult(APInt(getBitWidth(), RHS)); }
+  bool ult(uint64_t RHS) const {
+    return getActiveBits() > 64 ? false : getZExtValue() < RHS;
+  }
 
   /// \brief Signed less than comparison
   ///
@@ -1054,7 +1056,9 @@ public:
   /// the validity of the less-than relationship.
   ///
   /// \returns true if *this < RHS when considered signed.
-  bool slt(uint64_t RHS) const { return slt(APInt(getBitWidth(), RHS)); }
+  bool slt(int64_t RHS) const {
+    return getMinSignedBits() > 64 ? isNegative() : getSExtValue() < RHS;
+  }
 
   /// \brief Unsigned less or equal comparison
   ///
@@ -1070,7 +1074,7 @@ public:
   /// the validity of the less-or-equal relationship.
   ///
   /// \returns true if *this <= RHS when considered unsigned.
-  bool ule(uint64_t RHS) const { return ule(APInt(getBitWidth(), RHS)); }
+  bool ule(uint64_t RHS) const { return !ugt(RHS); }
 
   /// \brief Signed less or equal comparison
   ///
@@ -1086,7 +1090,7 @@ public:
   /// validity of the less-or-equal relationship.
   ///
   /// \returns true if *this <= RHS when considered signed.
-  bool sle(uint64_t RHS) const { return sle(APInt(getBitWidth(), RHS)); }
+  bool sle(uint64_t RHS) const { return !sgt(RHS); }
 
   /// \brief Unsigned greather than comparison
   ///
@@ -1102,7 +1106,9 @@ public:
   /// the validity of the greater-than relationship.
   ///
   /// \returns true if *this > RHS when considered unsigned.
-  bool ugt(uint64_t RHS) const { return ugt(APInt(getBitWidth(), RHS)); }
+  bool ugt(uint64_t RHS) const {
+    return getActiveBits() > 64 ? true : getZExtValue() > RHS;
+  }
 
   /// \brief Signed greather than comparison
   ///
@@ -1118,7 +1124,9 @@ public:
   /// the validity of the greater-than relationship.
   ///
   /// \returns true if *this > RHS when considered signed.
-  bool sgt(uint64_t RHS) const { return sgt(APInt(getBitWidth(), RHS)); }
+  bool sgt(int64_t RHS) const {
+    return getMinSignedBits() > 64 ? !isNegative() : getSExtValue() > RHS;
+  }
 
   /// \brief Unsigned greater or equal comparison
   ///
@@ -1134,7 +1142,7 @@ public:
   /// the validity of the greater-or-equal relationship.
   ///
   /// \returns true if *this >= RHS when considered unsigned.
-  bool uge(uint64_t RHS) const { return uge(APInt(getBitWidth(), RHS)); }
+  bool uge(uint64_t RHS) const { return !ult(RHS); }
 
   /// \brief Signed greather or equal comparison
   ///
@@ -1150,7 +1158,7 @@ public:
   /// the validity of the greater-or-equal relationship.
   ///
   /// \returns true if *this >= RHS when considered signed.
-  bool sge(uint64_t RHS) const { return sge(APInt(getBitWidth(), RHS)); }
+  bool sge(int64_t RHS) const { return !slt(RHS); }
 
   /// This operation tests if there are any pairs of corresponding bits
   /// between this APInt and RHS that are both set.
@@ -1896,11 +1904,11 @@ inline APInt Xor(const APInt &LHS, const APInt &RHS) { return LHS ^ RHS; }
 /// Performs a bitwise complement operation on APInt.
 inline APInt Not(const APInt &APIVal) { return ~APIVal; }
 
-} // namespace APIntOps
+} // End of APIntOps namespace
 
 // See friend declaration above. This additional declaration is required in
 // order to compile LLVM with IBM xlC compiler.
 hash_code hash_value(const APInt &Arg);
-} // namespace llvm
+} // End of llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/APSInt.h b/include/llvm/ADT/APSInt.h
index 91ccda2..a187515 100644
--- a/include/llvm/ADT/APSInt.h
+++ b/include/llvm/ADT/APSInt.h
@@ -33,6 +33,15 @@ public:
   explicit APSInt(APInt I, bool isUnsigned = true)
    : APInt(std::move(I)), IsUnsigned(isUnsigned) {}
 
+  /// Construct an APSInt from a string representation.
+  ///
+  /// This constructor interprets the string \p Str using the radix of 10.
+  /// The interpretation stops at the end of the string. The bit width of the
+  /// constructed APSInt is determined automatically.
+  ///
+  /// \param Str the string to be interpreted.
+  explicit APSInt(StringRef Str);
+
   APSInt &operator=(APInt RHS) {
     // Retain our current sign.
     APInt::operator=(std::move(RHS));
diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h
index 397e2ee..c8795fd 100644
--- a/include/llvm/ADT/ArrayRef.h
+++ b/include/llvm/ADT/ArrayRef.h
@@ -286,6 +286,11 @@ namespace llvm {
       return MutableArrayRef<T>(data()+N, M);
     }
 
+    MutableArrayRef<T> drop_back(unsigned N) const {
+      assert(this->size() >= N && "Dropping more elements than exist");
+      return slice(0, this->size() - N);
+    }
+
     /// @}
     /// @name Operator Overloads
     /// @{
@@ -361,6 +366,6 @@ namespace llvm {
   template <typename T> struct isPodLike<ArrayRef<T> > {
     static const bool value = true;
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index e57171d..f58dd73 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -568,7 +568,7 @@ private:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 namespace std {
   /// Implement std::swap in terms of BitVector swap.
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index bf58bec..27f7315 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -42,7 +42,7 @@ struct DenseMapPair : public std::pair<KeyT, ValueT> {
   ValueT &getSecond() { return std::pair<KeyT, ValueT>::second; }
   const ValueT &getSecond() const { return std::pair<KeyT, ValueT>::second; }
 };
-} // namespace detail
+}
 
 template <
     typename KeyT, typename ValueT, typename KeyInfoT = DenseMapInfo<KeyT>,
diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h
index 6f17a64..b0a0530 100644
--- a/include/llvm/ADT/DenseMapInfo.h
+++ b/include/llvm/ADT/DenseMapInfo.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_ADT_DENSEMAPINFO_H
 #define LLVM_ADT_DENSEMAPINFO_H
 
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 #include "llvm/Support/type_traits.h"
 
@@ -163,6 +165,31 @@ struct DenseMapInfo<std::pair<T, U> > {
   }
 };
 
+// Provide DenseMapInfo for StringRefs.
+template <> struct DenseMapInfo<StringRef> {
+  static inline StringRef getEmptyKey() {
+    return StringRef(reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)),
+                     0);
+  }
+  static inline StringRef getTombstoneKey() {
+    return StringRef(reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)),
+                     0);
+  }
+  static unsigned getHashValue(StringRef Val) {
+    assert(Val.data() != getEmptyKey().data() && "Cannot hash the empty key!");
+    assert(Val.data() != getTombstoneKey().data() &&
+           "Cannot hash the tombstone key!");
+    return (unsigned)(hash_value(Val));
+  }
+  static bool isEqual(StringRef LHS, StringRef RHS) {
+    if (RHS.data() == getEmptyKey().data())
+      return LHS.data() == getEmptyKey().data();
+    if (RHS.data() == getTombstoneKey().data())
+      return LHS.data() == getTombstoneKey().data();
+    return LHS == RHS;
+  }
+};
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h
index b1631be..d340240 100644
--- a/include/llvm/ADT/DenseSet.h
+++ b/include/llvm/ADT/DenseSet.h
@@ -32,7 +32,7 @@ public:
   DenseSetEmpty &getSecond() { return *this; }
   const DenseSetEmpty &getSecond() const { return *this; }
 };
-} // namespace detail
+}
 
 /// DenseSet - This implements a dense probed hash-table based set.
 template<typename ValueT, typename ValueInfoT = DenseMapInfo<ValueT> >
diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h
index 01bbe1a..d79b9ac 100644
--- a/include/llvm/ADT/DepthFirstIterator.h
+++ b/include/llvm/ADT/DepthFirstIterator.h
@@ -288,6 +288,6 @@ iterator_range<idf_ext_iterator<T, SetTy>> inverse_depth_first_ext(const T& G,
   return make_range(idf_ext_begin(G, S), idf_ext_end(G, S));
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/EquivalenceClasses.h b/include/llvm/ADT/EquivalenceClasses.h
index 6e87dbd..d6a26f8 100644
--- a/include/llvm/ADT/EquivalenceClasses.h
+++ b/include/llvm/ADT/EquivalenceClasses.h
@@ -278,6 +278,6 @@ public:
   };
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/GraphTraits.h b/include/llvm/ADT/GraphTraits.h
index 21bf23b..823caef 100644
--- a/include/llvm/ADT/GraphTraits.h
+++ b/include/llvm/ADT/GraphTraits.h
@@ -101,6 +101,6 @@ struct GraphTraits<Inverse<Inverse<T> > > {
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/IndexedMap.h b/include/llvm/ADT/IndexedMap.h
index ae9c695..5ba85c0 100644
--- a/include/llvm/ADT/IndexedMap.h
+++ b/include/llvm/ADT/IndexedMap.h
@@ -80,6 +80,6 @@ template <typename T, typename ToIndexT = llvm::identity<unsigned> >
     }
   };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/IntEqClasses.h b/include/llvm/ADT/IntEqClasses.h
index 9dbc228..8e75c48 100644
--- a/include/llvm/ADT/IntEqClasses.h
+++ b/include/llvm/ADT/IntEqClasses.h
@@ -83,6 +83,6 @@ public:
   void uncompress();
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h
index dd48497..855ab89 100644
--- a/include/llvm/ADT/Optional.h
+++ b/include/llvm/ADT/Optional.h
@@ -204,6 +204,6 @@ void operator>=(const Optional<T> &X, const Optional<U> &Y);
 template<typename T, typename U>
 void operator>(const Optional<T> &X, const Optional<U> &Y);
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index 3c63a52..f27b811 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -507,6 +507,6 @@ namespace llvm {
                                   RHS.template get<U>()));
     }
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h
index 059d7b0..759a2db2 100644
--- a/include/llvm/ADT/PostOrderIterator.h
+++ b/include/llvm/ADT/PostOrderIterator.h
@@ -295,6 +295,6 @@ public:
   rpo_iterator end() { return Blocks.rend(); }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/PriorityQueue.h b/include/llvm/ADT/PriorityQueue.h
index 869ef81..827d0b3 100644
--- a/include/llvm/ADT/PriorityQueue.h
+++ b/include/llvm/ADT/PriorityQueue.h
@@ -79,6 +79,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h
index dc78274..bc74416 100644
--- a/include/llvm/ADT/SCCIterator.h
+++ b/include/llvm/ADT/SCCIterator.h
@@ -240,6 +240,6 @@ template <class T> scc_iterator<Inverse<T> > scc_end(const Inverse<T> &G) {
   return scc_iterator<Inverse<T> >::end(G);
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h
index 14204c1..b68345a 100644
--- a/include/llvm/ADT/STLExtras.h
+++ b/include/llvm/ADT/STLExtras.h
@@ -417,6 +417,6 @@ template <typename T> struct deref {
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/SetOperations.h b/include/llvm/ADT/SetOperations.h
index b5f4177..71f5db3 100644
--- a/include/llvm/ADT/SetOperations.h
+++ b/include/llvm/ADT/SetOperations.h
@@ -66,6 +66,6 @@ void set_subtract(S1Ty &S1, const S2Ty &S2) {
     S1.erase(*SI);
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/SetVector.h b/include/llvm/ADT/SetVector.h
index f15f4f7..a7fd408 100644
--- a/include/llvm/ADT/SetVector.h
+++ b/include/llvm/ADT/SetVector.h
@@ -225,7 +225,7 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 // vim: sw=2 ai
 #endif
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
index a74b7bf..ae3d645 100644
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -588,7 +588,7 @@ operator^(const SmallBitVector &LHS, const SmallBitVector &RHS) {
   return Result;
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 namespace std {
   /// Implement std::swap in terms of BitVector swap.
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index 0d1635a..3e3c9c1 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -334,7 +334,7 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 namespace std {
   /// Implement std::swap in terms of SmallPtrSet swap.
diff --git a/include/llvm/ADT/SmallString.h b/include/llvm/ADT/SmallString.h
index 92cd689..e569f54 100644
--- a/include/llvm/ADT/SmallString.h
+++ b/include/llvm/ADT/SmallString.h
@@ -292,6 +292,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index b334ac0..5b208b7 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -924,7 +924,7 @@ static inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
   return X.capacity_in_bytes();
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 namespace std {
   /// Implement std::swap in terms of SmallVector swap.
@@ -940,6 +940,6 @@ namespace std {
   swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) {
     LHS.swap(RHS);
   }
-} // namespace std
+}
 
 #endif
diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h
index 264c6b5..d98abc3 100644
--- a/include/llvm/ADT/Statistic.h
+++ b/include/llvm/ADT/Statistic.h
@@ -176,6 +176,6 @@ void PrintStatistics();
 /// \brief Print statistics to the given output stream.
 void PrintStatistics(raw_ostream &OS);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index 5e8c072..0992f5d 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -207,6 +207,6 @@ inline std::string join(IteratorT Begin, IteratorT End, StringRef Separator) {
   return join_impl(Begin, End, Separator, tag());
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index c8ece8f..8721c73 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -447,6 +447,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 163ec63..95660a4 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -566,6 +566,6 @@ namespace llvm {
   // StringRefs can be treated like a POD type.
   template <typename T> struct isPodLike;
   template <> struct isPodLike<StringRef> { static const bool value = true; };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h
index 7c52476..3e0cc20 100644
--- a/include/llvm/ADT/StringSet.h
+++ b/include/llvm/ADT/StringSet.h
@@ -29,6 +29,6 @@ namespace llvm {
       return base::insert(std::make_pair(Key, '\0'));
     }
   };
-} // namespace llvm
+}
 
 #endif // LLVM_ADT_STRINGSET_H
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index cb6edc8..06f5870 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -85,7 +85,9 @@ public:
     spir64,     // SPIR: standard portable IR for OpenCL 64-bit version
     kalimba,    // Kalimba: generic kalimba
     shave,      // SHAVE: Movidius vector VLIW processors
-    LastArchType = shave
+    wasm32,     // WebAssembly with 32-bit pointers
+    wasm64,     // WebAssembly with 64-bit pointers
+    LastArchType = wasm64
   };
   enum SubArchType {
     NoSubArch,
@@ -609,7 +611,7 @@ public:
   /// @}
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 
 #endif
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
index db4a5be..db0bf4b 100644
--- a/include/llvm/ADT/Twine.h
+++ b/include/llvm/ADT/Twine.h
@@ -537,6 +537,6 @@ namespace llvm {
   }
 
   /// @}
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/ADT/edit_distance.h b/include/llvm/ADT/edit_distance.h
index 5fc4bee..c2b2041 100644
--- a/include/llvm/ADT/edit_distance.h
+++ b/include/llvm/ADT/edit_distance.h
@@ -97,6 +97,6 @@ unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
   return Result;
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h
index 4f10167..a7b9306 100644
--- a/include/llvm/ADT/ilist.h
+++ b/include/llvm/ADT/ilist.h
@@ -655,7 +655,7 @@ struct ilist : public iplist<NodeTy> {
   void resize(size_type newsize) { resize(newsize, NodeTy()); }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 namespace std {
   // Ensure that swap uses the fast list swap...
diff --git a/include/llvm/ADT/ilist_node.h b/include/llvm/ADT/ilist_node.h
index 14ca26b..26d0b55 100644
--- a/include/llvm/ADT/ilist_node.h
+++ b/include/llvm/ADT/ilist_node.h
@@ -101,6 +101,6 @@ public:
   /// @}
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/iterator.h b/include/llvm/ADT/iterator.h
index 28728ca..c307928 100644
--- a/include/llvm/ADT/iterator.h
+++ b/include/llvm/ADT/iterator.h
@@ -162,6 +162,8 @@ protected:
           int>::type = 0)
       : I(std::forward<U &&>(u)) {}
 
+  const WrappedIteratorT &wrapped() const { return I; }
+
 public:
   typedef DifferenceTypeT difference_type;
 
@@ -239,6 +241,6 @@ struct pointee_iterator
   T &operator*() const { return **this->I; }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/ADT/iterator_range.h b/include/llvm/ADT/iterator_range.h
index 009b716..523a86f 100644
--- a/include/llvm/ADT/iterator_range.h
+++ b/include/llvm/ADT/iterator_range.h
@@ -51,6 +51,6 @@ template <class T> iterator_range<T> make_range(T x, T y) {
 template <typename T> iterator_range<T> make_range(std::pair<T, T> p) {
   return iterator_range<T>(std::move(p.first), std::move(p.second));
 }
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index 7f037fb..f4c1167 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -56,6 +56,34 @@ class MemTransferInst;
 class MemIntrinsic;
 class DominatorTree;
 
+/// The possible results of an alias query.
+///
+/// These results are always computed between two MemoryLocation objects as
+/// a query to some alias analysis.
+///
+/// Note that these are unscoped enumerations because we would like to support
+/// implicitly testing a result for the existence of any possible aliasing with
+/// a conversion to bool, but an "enum class" doesn't support this. The
+/// canonical names from the literature are suffixed and unique anyways, and so
+/// they serve as global constants in LLVM for these results.
+///
+/// See docs/AliasAnalysis.html for more information on the specific meanings
+/// of these values.
+enum AliasResult {
+  /// The two locations do not alias at all.
+  ///
+  /// This value is arranged to convert to false, while all other values
+  /// convert to true. This allows a boolean context to convert the result to
+  /// a binary flag indicating whether there is the possibility of aliasing.
+  NoAlias = 0,
+  /// The two locations may or may not alias. This is the least precise result.
+  MayAlias,
+  /// The two locations alias, but only due to a partial overlap.
+  PartialAlias,
+  /// The two locations precisely alias each other.
+  MustAlias,
+};
+
 class AliasAnalysis {
 protected:
   const DataLayout *DL;
@@ -95,22 +123,6 @@ public:
   /// Alias Queries...
   ///
 
-  /// Alias analysis result - Either we know for sure that it does not alias, we
-  /// know for sure it must alias, or we don't know anything: The two pointers
-  /// _might_ alias.  This enum is designed so you can do things like:
-  ///     if (AA.alias(P1, P2)) { ... }
-  /// to check to see if two pointers might alias.
-  ///
-  /// See docs/AliasAnalysis.html for more information on the specific meanings
-  /// of these values.
-  ///
-  enum AliasResult {
-    NoAlias = 0,        ///< No dependencies.
-    MayAlias,           ///< Anything goes.
-    PartialAlias,       ///< Pointers differ, but pointees overlap.
-    MustAlias           ///< Pointers are equal.
-  };
-
   /// alias - The main low level interface to the alias analysis implementation.
   /// Returns an AliasResult indicating whether the two pointers are aliased to
   /// each other.  This is the interface that must be implemented by specific
@@ -558,6 +570,6 @@ bool isIdentifiedObject(const Value *V);
 /// IdentifiedObjects.
 bool isIdentifiedFunctionLocal(const Value *V);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index ba2eae9..881699d 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -117,24 +117,30 @@ class AliasSet : public ilist_node<AliasSet> {
   // AliasSets forwarding to it.
   unsigned RefCount : 28;
 
-  /// AccessType - Keep track of whether this alias set merely refers to the
-  /// locations of memory, whether it modifies the memory, or whether it does
-  /// both.  The lattice goes from "NoModRef" to either Refs or Mods, then to
-  /// ModRef as necessary.
+  /// The kinds of access this alias set models.
   ///
-  enum AccessType {
-    NoModRef = 0, Refs = 1,         // Ref = bit 1
-    Mods     = 2, ModRef = 3        // Mod = bit 2
+  /// We keep track of whether this alias set merely refers to the locations of
+  /// memory (and not any particular access), whether it modifies or references
+  /// the memory, or whether it does both. The lattice goes from "NoAccess" to
+  /// either RefAccess or ModAccess, then to ModRefAccess as necessary.
+  enum AccessLattice {
+    NoAccess = 0,
+    RefAccess = 1,
+    ModAccess = 2,
+    ModRefAccess = RefAccess | ModAccess
   };
-  unsigned AccessTy : 2;
+  unsigned Access : 2;
 
-  /// AliasType - Keep track the relationships between the pointers in the set.
-  /// Lattice goes from MustAlias to MayAlias.
+  /// The kind of alias relationship between pointers of the set.
   ///
-  enum AliasType {
-    MustAlias = 0, MayAlias = 1
+  /// These represent conservatively correct alias results between any members
+  /// of the set. We represent these independently of the values of alias
+  /// results in order to pack it into a single bit. Lattice goes from
+  /// MustAlias to MayAlias.
+  enum AliasLattice {
+    SetMustAlias = 0, SetMayAlias = 1
   };
-  unsigned AliasTy : 1;
+  unsigned Alias : 1;
 
   // Volatile - True if this alias set contains volatile loads or stores.
   bool Volatile : 1;
@@ -153,10 +159,10 @@ class AliasSet : public ilist_node<AliasSet> {
   
 public:
   /// Accessors...
-  bool isRef() const { return AccessTy & Refs; }
-  bool isMod() const { return AccessTy & Mods; }
-  bool isMustAlias() const { return AliasTy == MustAlias; }
-  bool isMayAlias()  const { return AliasTy == MayAlias; }
+  bool isRef() const { return Access & RefAccess; }
+  bool isMod() const { return Access & ModAccess; }
+  bool isMustAlias() const { return Alias == SetMustAlias; }
+  bool isMayAlias()  const { return Alias == SetMayAlias; }
 
   // isVolatile - Return true if this alias set contains volatile loads or
   // stores.
@@ -218,7 +224,7 @@ private:
   friend struct ilist_sentinel_traits<AliasSet>;
   AliasSet()
     : PtrList(nullptr), PtrListEnd(&PtrList), Forward(nullptr), RefCount(0),
-      AccessTy(NoModRef), AliasTy(MustAlias), Volatile(false) {
+      Access(NoAccess), Alias(SetMustAlias), Volatile(false) {
   }
 
   AliasSet(const AliasSet &AS) = delete;
@@ -419,11 +425,11 @@ private:
   }
 
   AliasSet &addPointer(Value *P, uint64_t Size, const AAMDNodes &AAInfo,
-                       AliasSet::AccessType E,
+                       AliasSet::AccessLattice E,
                        bool &NewSet) {
     NewSet = false;
     AliasSet &AS = getAliasSetForPointer(P, Size, AAInfo, &NewSet);
-    AS.AccessTy |= E;
+    AS.Access |= E;
     return AS;
   }
   AliasSet *findAliasSetForPointer(const Value *Ptr, uint64_t Size,
@@ -437,6 +443,6 @@ inline raw_ostream& operator<<(raw_ostream &OS, const AliasSetTracker &AST) {
   return OS;
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/BlockFrequencyInfo.h b/include/llvm/Analysis/BlockFrequencyInfo.h
index 382c080..f27c32d 100644
--- a/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -63,6 +63,6 @@ public:
 
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index bf24f66..32d9609 100644
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -628,7 +628,7 @@ void IrreducibleGraph::addEdges(const BlockNode &Node,
   else
     addBlockEdges(*this, Irr, OuterLoop);
 }
-} // namespace bfi_detail
+}
 
 /// \brief Shared implementation for block frequency analysis.
 ///
@@ -1133,7 +1133,7 @@ template <class BT> struct BlockEdgesAdder {
       G.addEdge(Irr, BFI.getNode(*I), OuterLoop);
   }
 };
-} // namespace bfi_detail
+}
 template <class BT>
 void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass(
     LoopData *OuterLoop, std::list<LoopData>::iterator Insert) {
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index f2ca3e0..9d86756 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -158,6 +158,6 @@ private:
   bool calcInvokeHeuristics(BasicBlock *BB);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h
index f837cb4..7c4df78 100644
--- a/include/llvm/Analysis/CFG.h
+++ b/include/llvm/Analysis/CFG.h
@@ -78,6 +78,17 @@ bool isPotentiallyReachable(const BasicBlock *From, const BasicBlock *To,
                             const DominatorTree *DT = nullptr,
                             const LoopInfo *LI = nullptr);
 
-} // namespace llvm
+/// \brief Determine whether there is at least one path from a block in
+/// 'Worklist' to 'StopBB', returning true if uncertain.
+///
+/// Determine whether there is a path from at least one block in Worklist to
+/// StopBB within a single function. Returns false only if we can prove that
+/// once any block in 'Worklist' has been reached then 'StopBB' can not be
+/// executed. Conservatively returns true.
+bool isPotentiallyReachableFromMany(SmallVectorImpl<BasicBlock *> &Worklist,
+                                    BasicBlock *StopBB,
+                                    const DominatorTree *DT = nullptr,
+                                    const LoopInfo *LI = nullptr);
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h
index 0cc4e5d..0357648 100644
--- a/include/llvm/Analysis/CFGPrinter.h
+++ b/include/llvm/Analysis/CFGPrinter.h
@@ -119,7 +119,7 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
     return "";
   }
 };
-} // namespace llvm
+} // End llvm namespace
 
 namespace llvm {
   class FunctionPass;
diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h
index 42f0e65..6a406cd 100644
--- a/include/llvm/Analysis/CGSCCPassManager.h
+++ b/include/llvm/Analysis/CGSCCPassManager.h
@@ -485,6 +485,6 @@ CGSCCToFunctionPassAdaptor<FunctionPassT>
 createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) {
   return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass));
 }
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index ed52e86..662ae0e 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -481,6 +481,6 @@ struct GraphTraits<const CallGraph *> : public GraphTraits<
   static const CallGraphNode &CGdereference(PairTy P) { return *P.second; }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/CallGraphSCCPass.h b/include/llvm/Analysis/CallGraphSCCPass.h
index 94fa5bd..667e171 100644
--- a/include/llvm/Analysis/CallGraphSCCPass.h
+++ b/include/llvm/Analysis/CallGraphSCCPass.h
@@ -102,6 +102,6 @@ public:
   iterator end() const { return Nodes.end(); }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h
index 6ab83ae..2f59691 100644
--- a/include/llvm/Analysis/CodeMetrics.h
+++ b/include/llvm/Analysis/CodeMetrics.h
@@ -102,6 +102,6 @@ struct CodeMetrics {
                                      SmallPtrSetImpl<const Value *> &EphValues);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h
index a0d5eab..541a210 100644
--- a/include/llvm/Analysis/ConstantFolding.h
+++ b/include/llvm/Analysis/ConstantFolding.h
@@ -97,6 +97,6 @@ bool canConstantFoldCallTo(const Function *F);
 /// with the specified arguments, returning null if unsuccessful.
 Constant *ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
                            const TargetLibraryInfo *TLI = nullptr);
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/DomPrinter.h b/include/llvm/Analysis/DomPrinter.h
index 1402d77..0ed2899 100644
--- a/include/llvm/Analysis/DomPrinter.h
+++ b/include/llvm/Analysis/DomPrinter.h
@@ -25,6 +25,6 @@ namespace llvm {
   FunctionPass *createPostDomOnlyPrinterPass();
   FunctionPass *createPostDomViewerPass();
   FunctionPass *createPostDomOnlyViewerPass();
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/DominanceFrontier.h b/include/llvm/Analysis/DominanceFrontier.h
index 0cdd73e..996700e 100644
--- a/include/llvm/Analysis/DominanceFrontier.h
+++ b/include/llvm/Analysis/DominanceFrontier.h
@@ -205,6 +205,6 @@ public:
 EXTERN_TEMPLATE_INSTANTIATION(class DominanceFrontierBase<BasicBlock>);
 EXTERN_TEMPLATE_INSTANTIATION(class ForwardDominanceFrontierBase<BasicBlock>);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/DominanceFrontierImpl.h b/include/llvm/Analysis/DominanceFrontierImpl.h
index 4904f93..629ae38 100644
--- a/include/llvm/Analysis/DominanceFrontierImpl.h
+++ b/include/llvm/Analysis/DominanceFrontierImpl.h
@@ -221,6 +221,6 @@ ForwardDominanceFrontierBase<BlockT>::calculate(const DomTreeT &DT,
   return *Result;
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 2ad0ae7..ae9c1f5 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -178,6 +178,6 @@ protected:
 
 Pass *createIVUsersPass();
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index 57da132..79ed74d 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -36,7 +36,7 @@ namespace InlineConstants {
   /// Do not inline functions which allocate this many bytes on the stack
   /// when the caller is recursive.
   const unsigned TotalAllocaSizeRecursiveCaller = 1024;
-} // namespace InlineConstants
+}
 
 /// \brief Represents the cost of inlining a function.
 ///
@@ -138,6 +138,6 @@ public:
   bool isInlineViable(Function &Callee);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/Interval.h b/include/llvm/Analysis/Interval.h
index cbdb0c0..01eba3f 100644
--- a/include/llvm/Analysis/Interval.h
+++ b/include/llvm/Analysis/Interval.h
@@ -145,6 +145,6 @@ template <> struct GraphTraits<Inverse<Interval*> > {
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/IntervalIterator.h b/include/llvm/Analysis/IntervalIterator.h
index 5ec50d4..655ce2d 100644
--- a/include/llvm/Analysis/IntervalIterator.h
+++ b/include/llvm/Analysis/IntervalIterator.h
@@ -263,6 +263,6 @@ inline interval_part_interval_iterator intervals_end(IntervalPartition &IP) {
   return interval_part_interval_iterator();
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/IntervalPartition.h b/include/llvm/Analysis/IntervalPartition.h
index 2176d0c..274be2b 100644
--- a/include/llvm/Analysis/IntervalPartition.h
+++ b/include/llvm/Analysis/IntervalPartition.h
@@ -106,6 +106,6 @@ private:
   void updatePredecessors(Interval *Int);
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/IteratedDominanceFrontier.h b/include/llvm/Analysis/IteratedDominanceFrontier.h
index eea0d81..5a339f1 100644
--- a/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -92,5 +92,5 @@ private:
   const SmallPtrSetImpl<BasicBlock *> *DefBlocks;
   SmallVector<BasicBlock *, 32> PHIBlocks;
 };
-} // namespace llvm
+}
 #endif
diff --git a/include/llvm/Analysis/JumpInstrTableInfo.h b/include/llvm/Analysis/JumpInstrTableInfo.h
index ea331a4..b6dad47 100644
--- a/include/llvm/Analysis/JumpInstrTableInfo.h
+++ b/include/llvm/Analysis/JumpInstrTableInfo.h
@@ -66,6 +66,6 @@ private:
 /// bound specifies the maximum number of bytes needed to represent an
 /// unconditional jump or a trap instruction in the back end currently in use.
 ModulePass *createJumpInstrTableInfoPass(unsigned Bound);
-} // namespace llvm
+}
 
 #endif /* LLVM_ANALYSIS_JUMPINSTRTABLEINFO_H */
diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h
index af4861f..b0b9068 100644
--- a/include/llvm/Analysis/LazyCallGraph.h
+++ b/include/llvm/Analysis/LazyCallGraph.h
@@ -569,6 +569,6 @@ public:
   static StringRef name() { return "LazyCallGraphPrinterPass"; }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/LibCallAliasAnalysis.h b/include/llvm/Analysis/LibCallAliasAnalysis.h
index a4b7e5d..6589ac1 100644
--- a/include/llvm/Analysis/LibCallAliasAnalysis.h
+++ b/include/llvm/Analysis/LibCallAliasAnalysis.h
@@ -66,6 +66,6 @@ namespace llvm {
                                        ImmutableCallSite CS,
                                        const MemoryLocation &Loc);
   };
-} // namespace llvm
+}  // End of llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/Lint.h b/include/llvm/Analysis/Lint.h
index 79cd82f..7c88b13 100644
--- a/include/llvm/Analysis/Lint.h
+++ b/include/llvm/Analysis/Lint.h
@@ -44,6 +44,6 @@ void lintFunction(
   const Function &F  ///< The function to be checked
 );
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h
index c8a6e4a..42667d2 100644
--- a/include/llvm/Analysis/Loads.h
+++ b/include/llvm/Analysis/Loads.h
@@ -52,6 +52,6 @@ Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
                                 AliasAnalysis *AA = nullptr,
                                 AAMDNodes *AATags = nullptr);
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h
index 0f3c731..7b635a8 100644
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -555,6 +555,6 @@ private:
   DominatorTree *DT;
   LoopInfo *LI;
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 7bfebab..bbcde8d 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -763,6 +763,6 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index b8f80df..f5cc856 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -535,6 +535,6 @@ void LoopInfoBase<BlockT, LoopT>::verify() const {
 #endif
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h
index 57ad793..8650000 100644
--- a/include/llvm/Analysis/LoopPass.h
+++ b/include/llvm/Analysis/LoopPass.h
@@ -169,6 +169,6 @@ private:
   Loop *CurrentLoop;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index 557d6fc..805a43d 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -262,6 +262,6 @@ public:
   SizeOffsetEvalType visitInstruction(Instruction &I);
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 9c50ae0..5118980 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -445,6 +445,6 @@ namespace llvm {
 
   };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/MemoryLocation.h b/include/llvm/Analysis/MemoryLocation.h
index ea69633..426b49a 100644
--- a/include/llvm/Analysis/MemoryLocation.h
+++ b/include/llvm/Analysis/MemoryLocation.h
@@ -137,6 +137,6 @@ template <> struct DenseMapInfo<MemoryLocation> {
     return LHS == RHS;
   }
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index ffaf871..d112ab1 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -173,6 +173,6 @@ namespace llvm {
   //
   FunctionPass *createMemDerefPrinter();
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h
index f654652a..0f7e2b8 100644
--- a/include/llvm/Analysis/PostDominators.h
+++ b/include/llvm/Analysis/PostDominators.h
@@ -112,6 +112,6 @@ template <> struct GraphTraits<PostDominatorTree*>
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/PtrUseVisitor.h b/include/llvm/Analysis/PtrUseVisitor.h
index 8b5b90a..6e61fc3 100644
--- a/include/llvm/Analysis/PtrUseVisitor.h
+++ b/include/llvm/Analysis/PtrUseVisitor.h
@@ -280,6 +280,6 @@ protected:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index 22fd1df..7ceb086 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -906,5 +906,5 @@ EXTERN_TEMPLATE_INSTANTIATION(class RegionBase<RegionTraits<Function>>);
 EXTERN_TEMPLATE_INSTANTIATION(class RegionNodeBase<RegionTraits<Function>>);
 EXTERN_TEMPLATE_INSTANTIATION(class RegionInfoBase<RegionTraits<Function>>);
 
-} // namespace llvm
+} // End llvm namespace
 #endif
diff --git a/include/llvm/Analysis/RegionPass.h b/include/llvm/Analysis/RegionPass.h
index 5866fc5..bd51c49 100644
--- a/include/llvm/Analysis/RegionPass.h
+++ b/include/llvm/Analysis/RegionPass.h
@@ -123,6 +123,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 1c81408..d47cab8 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -954,6 +954,86 @@ namespace llvm {
     void print(raw_ostream &OS, const Module* = nullptr) const override;
     void verifyAnalysis() const override;
 
+    /// Collect parametric terms occurring in step expressions.
+    void collectParametricTerms(const SCEV *Expr,
+                                SmallVectorImpl<const SCEV *> &Terms);
+
+
+
+    /// Return in Subscripts the access functions for each dimension in Sizes.
+    void computeAccessFunctions(const SCEV *Expr,
+                                SmallVectorImpl<const SCEV *> &Subscripts,
+                                SmallVectorImpl<const SCEV *> &Sizes);
+
+    /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
+    /// subscripts and sizes of an array access.
+    ///
+    /// The delinearization is a 3 step process: the first two steps compute the
+    /// sizes of each subscript and the third step computes the access functions
+    /// for the delinearized array:
+    ///
+    /// 1. Find the terms in the step functions
+    /// 2. Compute the array size
+    /// 3. Compute the access function: divide the SCEV by the array size
+    ///    starting with the innermost dimensions found in step 2. The Quotient
+    ///    is the SCEV to be divided in the next step of the recursion. The
+    ///    Remainder is the subscript of the innermost dimension. Loop over all
+    ///    array dimensions computed in step 2.
+    ///
+    /// To compute a uniform array size for several memory accesses to the same
+    /// object, one can collect in step 1 all the step terms for all the memory
+    /// accesses, and compute in step 2 a unique array shape. This guarantees
+    /// that the array shape will be the same across all memory accesses.
+    ///
+    /// FIXME: We could derive the result of steps 1 and 2 from a description of
+    /// the array shape given in metadata.
+    ///
+    /// Example:
+    ///
+    /// A[][n][m]
+    ///
+    /// for i
+    ///   for j
+    ///     for k
+    ///       A[j+k][2i][5i] =
+    ///
+    /// The initial SCEV:
+    ///
+    /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k]
+    ///
+    /// 1. Find the different terms in the step functions:
+    /// -> [2*m, 5, n*m, n*m]
+    ///
+    /// 2. Compute the array size: sort and unique them
+    /// -> [n*m, 2*m, 5]
+    /// find the GCD of all the terms = 1
+    /// divide by the GCD and erase constant terms
+    /// -> [n*m, 2*m]
+    /// GCD = m
+    /// divide by GCD -> [n, 2]
+    /// remove constant terms
+    /// -> [n]
+    /// size of the array is A[unknown][n][m]
+    ///
+    /// 3. Compute the access function
+    /// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m
+    /// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k
+    /// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k
+    /// The remainder is the subscript of the innermost array dimension: [5i].
+    ///
+    /// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n
+    /// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k
+    /// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k
+    /// The Remainder is the subscript of the next array dimension: [2i].
+    ///
+    /// The subscript of the outermost dimension is the Quotient: [j+k].
+    ///
+    /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i].
+    void delinearize(const SCEV *Expr,
+                     SmallVectorImpl<const SCEV *> &Subscripts,
+                     SmallVectorImpl<const SCEV *> &Sizes,
+                     const SCEV *ElementSize);
+
   private:
     /// Compute the backedge taken count knowing the interval difference, the
     /// stride and presence of the equality in the comparison.
@@ -981,6 +1061,6 @@ namespace llvm {
     /// to locate them all and call their destructors.
     SCEVUnknown *FirstUnknown;
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 83493fa..8ec2078 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -275,6 +275,6 @@ namespace llvm {
     Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
                        Type *ExpandTy, Type *IntTy, bool useSubtract);
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 14feeed..da24de2 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -356,84 +356,6 @@ namespace llvm {
     static inline bool classof(const SCEV *S) {
       return S->getSCEVType() == scAddRecExpr;
     }
-
-    /// Collect parametric terms occurring in step expressions.
-    void collectParametricTerms(ScalarEvolution &SE,
-                                SmallVectorImpl<const SCEV *> &Terms) const;
-
-    /// Return in Subscripts the access functions for each dimension in Sizes.
-    void computeAccessFunctions(ScalarEvolution &SE,
-                                SmallVectorImpl<const SCEV *> &Subscripts,
-                                SmallVectorImpl<const SCEV *> &Sizes) const;
-
-    /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
-    /// subscripts and sizes of an array access.
-    ///
-    /// The delinearization is a 3 step process: the first two steps compute the
-    /// sizes of each subscript and the third step computes the access functions
-    /// for the delinearized array:
-    ///
-    /// 1. Find the terms in the step functions
-    /// 2. Compute the array size
-    /// 3. Compute the access function: divide the SCEV by the array size
-    ///    starting with the innermost dimensions found in step 2. The Quotient
-    ///    is the SCEV to be divided in the next step of the recursion. The
-    ///    Remainder is the subscript of the innermost dimension. Loop over all
-    ///    array dimensions computed in step 2.
-    ///
-    /// To compute a uniform array size for several memory accesses to the same
-    /// object, one can collect in step 1 all the step terms for all the memory
-    /// accesses, and compute in step 2 a unique array shape. This guarantees
-    /// that the array shape will be the same across all memory accesses.
-    ///
-    /// FIXME: We could derive the result of steps 1 and 2 from a description of
-    /// the array shape given in metadata.
-    ///
-    /// Example:
-    ///
-    /// A[][n][m]
-    ///
-    /// for i
-    ///   for j
-    ///     for k
-    ///       A[j+k][2i][5i] =
-    ///
-    /// The initial SCEV:
-    ///
-    /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k]
-    ///
-    /// 1. Find the different terms in the step functions:
-    /// -> [2*m, 5, n*m, n*m]
-    ///
-    /// 2. Compute the array size: sort and unique them
-    /// -> [n*m, 2*m, 5]
-    /// find the GCD of all the terms = 1
-    /// divide by the GCD and erase constant terms
-    /// -> [n*m, 2*m]
-    /// GCD = m
-    /// divide by GCD -> [n, 2]
-    /// remove constant terms
-    /// -> [n]
-    /// size of the array is A[unknown][n][m]
-    ///
-    /// 3. Compute the access function
-    /// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m
-    /// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k
-    /// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k
-    /// The remainder is the subscript of the innermost array dimension: [5i].
-    ///
-    /// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n
-    /// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k
-    /// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k
-    /// The Remainder is the subscript of the next array dimension: [2i].
-    ///
-    /// The subscript of the outermost dimension is the Quotient: [j+k].
-    ///
-    /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i].
-    void delinearize(ScalarEvolution &SE,
-                     SmallVectorImpl<const SCEV *> &Subscripts,
-                     SmallVectorImpl<const SCEV *> &Sizes,
-                     const SCEV *ElementSize) const;
   };
 
   //===--------------------------------------------------------------------===//
@@ -829,6 +751,6 @@ static inline const SCEV *apply(const SCEV *Scev, LoopToScevMapT &Map,
   return SCEVApplyRewriter::rewrite(Scev, Map, SE);
 }
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/ScalarEvolutionNormalization.h b/include/llvm/Analysis/ScalarEvolutionNormalization.h
index 4133864..7c6423a 100644
--- a/include/llvm/Analysis/ScalarEvolutionNormalization.h
+++ b/include/llvm/Analysis/ScalarEvolutionNormalization.h
@@ -73,6 +73,6 @@ const SCEV *TransformForPostIncUse(TransformKind Kind,
                                    ScalarEvolution &SE,
                                    DominatorTree &DT);
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/TargetFolder.h b/include/llvm/Analysis/TargetFolder.h
index 0e17a58..12bf9fe 100644
--- a/include/llvm/Analysis/TargetFolder.h
+++ b/include/llvm/Analysis/TargetFolder.h
@@ -265,6 +265,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index d863b4f..bb6e266 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -519,6 +519,11 @@ public:
   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
                                            Type *ExpectedType) const;
 
+  /// \returns True if the two functions have compatible attributes for inlining
+  /// purposes.
+  bool hasCompatibleFunctionAttributes(const Function *Caller,
+                                       const Function *Callee) const;
+
   /// @}
 
 private:
@@ -619,6 +624,8 @@ public:
                                   MemIntrinsicInfo &Info) = 0;
   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
                                                    Type *ExpectedType) = 0;
+  virtual bool hasCompatibleFunctionAttributes(const Function *Caller,
+                                               const Function *Callee) const = 0;
 };
 
 template <typename T>
@@ -804,6 +811,10 @@ public:
                                            Type *ExpectedType) override {
     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
   }
+  bool hasCompatibleFunctionAttributes(const Function *Caller,
+                                       const Function *Callee) const override {
+    return Impl.hasCompatibleFunctionAttributes(Caller, Callee);
+  }
 };
 
 template <typename T>
@@ -908,6 +919,6 @@ public:
 /// clients.
 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index 59b95a8..403175a 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -335,6 +335,14 @@ public:
                                            Type *ExpectedType) {
     return nullptr;
   }
+
+  bool hasCompatibleFunctionAttributes(const Function *Caller,
+                                       const Function *Callee) const {
+    return (Caller->getFnAttribute("target-cpu") ==
+            Callee->getFnAttribute("target-cpu")) &&
+           (Caller->getFnAttribute("target-features") ==
+            Callee->getFnAttribute("target-features"));
+  }
 };
 
 /// \brief CRTP base class for use as a mix-in that aids implementing
@@ -446,6 +454,6 @@ public:
         U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
   }
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h
new file mode 100644
index 0000000..aa538ec
--- /dev/null
+++ b/include/llvm/Analysis/VectorUtils.h
@@ -0,0 +1,56 @@
+//===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -*- C++ -*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some vectorizer utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
+#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
+
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+
+namespace llvm {
+
+/// \brief Identify if the intrinsic is trivially vectorizable.
+/// This method returns true if the intrinsic's argument types are all
+/// scalars for the scalar form of the intrinsic and all vectors for
+/// the vector form of the intrinsic.
+bool isTriviallyVectorizable(Intrinsic::ID ID);
+
+/// \brief Identifies if the intrinsic has a scalar operand. It checks for
+/// ctlz,cttz and powi special intrinsics whose argument is scalar.
+bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
+
+/// \brief Identify if call has a unary float signature
+/// It returns input intrinsic ID if call has a single argument,
+/// argument type and call instruction type should be floating
+/// point type and call should only reads memory.
+/// else return not_intrinsic.
+Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
+                                       Intrinsic::ID ValidIntrinsicID);
+
+/// \brief Identify if call has a binary float signature
+/// It returns input intrinsic ID if call has two arguments,
+/// arguments type and call instruction type should be floating
+/// point type and call should only reads memory.
+/// else return not_intrinsic.
+Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
+                                        Intrinsic::ID ValidIntrinsicID);
+
+/// \brief Returns intrinsic ID for call.
+/// For the input call instruction it finds mapping intrinsic and returns
+/// its intrinsic ID, in case it does not found it return not_intrinsic.
+Intrinsic::ID getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI);
+
+} // llvm namespace
+
+#endif
diff --git a/include/llvm/AsmParser/Parser.h b/include/llvm/AsmParser/Parser.h
index 0c37a9b..5215140 100644
--- a/include/llvm/AsmParser/Parser.h
+++ b/include/llvm/AsmParser/Parser.h
@@ -18,55 +18,67 @@
 
 namespace llvm {
 
+class LLVMContext;
 class Module;
+struct SlotMapping;
 class SMDiagnostic;
-class LLVMContext;
 
 /// This function is the main interface to the LLVM Assembly Parser. It parses
 /// an ASCII file that (presumably) contains LLVM Assembly code. It returns a
 /// Module (intermediate representation) with the corresponding features. Note
 /// that this does not verify that the generated Module is valid, so you should
 /// run the verifier after parsing the file to check that it is okay.
-/// @brief Parse LLVM Assembly from a file
-/// @param Filename The name of the file to parse
-/// @param Error Error result info.
-/// @param Context Context in which to allocate globals info.
+/// \brief Parse LLVM Assembly from a file
+/// \param Filename The name of the file to parse
+/// \param Error Error result info.
+/// \param Context Context in which to allocate globals info.
+/// \param Slots The optional slot mapping that will be initialized during
+///              parsing.
 std::unique_ptr<Module> parseAssemblyFile(StringRef Filename,
                                           SMDiagnostic &Error,
-                                          LLVMContext &Context);
+                                          LLVMContext &Context,
+                                          SlotMapping *Slots = nullptr);
 
 /// The function is a secondary interface to the LLVM Assembly Parser. It parses
 /// an ASCII string that (presumably) contains LLVM Assembly code. It returns a
 /// Module (intermediate representation) with the corresponding features. Note
 /// that this does not verify that the generated Module is valid, so you should
 /// run the verifier after parsing the file to check that it is okay.
-/// @brief Parse LLVM Assembly from a string
-/// @param AsmString The string containing assembly
-/// @param Error Error result info.
-/// @param Context Context in which to allocate globals info.
+/// \brief Parse LLVM Assembly from a string
+/// \param AsmString The string containing assembly
+/// \param Error Error result info.
+/// \param Context Context in which to allocate globals info.
+/// \param Slots The optional slot mapping that will be initialized during
+///              parsing.
 std::unique_ptr<Module> parseAssemblyString(StringRef AsmString,
                                             SMDiagnostic &Error,
-                                            LLVMContext &Context);
+                                            LLVMContext &Context,
+                                            SlotMapping *Slots = nullptr);
 
 /// parseAssemblyFile and parseAssemblyString are wrappers around this function.
-/// @brief Parse LLVM Assembly from a MemoryBuffer.
-/// @param F The MemoryBuffer containing assembly
-/// @param Err Error result info.
-/// @param Context Context in which to allocate globals info.
+/// \brief Parse LLVM Assembly from a MemoryBuffer.
+/// \param F The MemoryBuffer containing assembly
+/// \param Err Error result info.
+/// \param Slots The optional slot mapping that will be initialized during
+///              parsing.
 std::unique_ptr<Module> parseAssembly(MemoryBufferRef F, SMDiagnostic &Err,
-                                      LLVMContext &Context);
+                                      LLVMContext &Context,
+                                      SlotMapping *Slots = nullptr);
 
 /// This function is the low-level interface to the LLVM Assembly Parser.
 /// This is kept as an independent function instead of being inlined into
 /// parseAssembly for the convenience of interactive users that want to add
 /// recently parsed bits to an existing module.
 ///
-/// @param F The MemoryBuffer containing assembly
-/// @param M The module to add data to.
-/// @param Err Error result info.
-/// @return true on error.
-bool parseAssemblyInto(MemoryBufferRef F, Module &M, SMDiagnostic &Err);
+/// \param F The MemoryBuffer containing assembly
+/// \param M The module to add data to.
+/// \param Err Error result info.
+/// \param Slots The optional slot mapping that will be initialized during
+///              parsing.
+/// \return true on error.
+bool parseAssemblyInto(MemoryBufferRef F, Module &M, SMDiagnostic &Err,
+                       SlotMapping *Slots = nullptr);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/AsmParser/SlotMapping.h b/include/llvm/AsmParser/SlotMapping.h
new file mode 100644
index 0000000..c5f61d2
--- /dev/null
+++ b/include/llvm/AsmParser/SlotMapping.h
@@ -0,0 +1,34 @@
+//===-- SlotMapping.h - Slot number mapping for unnamed values --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SlotMapping struct.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASMPARSER_SLOTMAPPING_H
+#define LLVM_ASMPARSER_SLOTMAPPING_H
+
+#include "llvm/IR/TrackingMDRef.h"
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+class GlobalValue;
+
+/// This struct contains the mapping from the slot numbers to unnamed metadata
+/// nodes and global values.
+struct SlotMapping {
+  std::vector<GlobalValue *> GlobalValues;
+  std::map<unsigned, TrackingMDNodeRef> MetadataNodes;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h
index 6b23eb9..96c4201 100644
--- a/include/llvm/Bitcode/BitCodes.h
+++ b/include/llvm/Bitcode/BitCodes.h
@@ -77,7 +77,7 @@ namespace bitc {
                                       //                             [id, name]
   };
 
-} // namespace bitc
+} // End bitc namespace
 
 /// BitCodeAbbrevOp - This describes one or more operands in an abbreviation.
 /// This is actually a union of two different things:
@@ -180,6 +180,6 @@ public:
     OperandList.push_back(OpInfo);
   }
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Bitcode/BitcodeWriterPass.h b/include/llvm/Bitcode/BitcodeWriterPass.h
index cc742f1..ae915c6 100644
--- a/include/llvm/Bitcode/BitcodeWriterPass.h
+++ b/include/llvm/Bitcode/BitcodeWriterPass.h
@@ -56,6 +56,6 @@ public:
   static StringRef name() { return "BitcodeWriterPass"; }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 9201daf..4c040a7 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -512,6 +512,6 @@ public:
   bool ReadBlockInfoBlock();
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index eef6076..9f23023 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -520,6 +520,6 @@ public:
 };
 
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 41aa148..605c417 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -167,6 +167,7 @@ namespace bitc {
     METADATA_EXPRESSION    = 29,  // [distinct, n x element]
     METADATA_OBJC_PROPERTY = 30,  // [distinct, name, file, line, ...]
     METADATA_IMPORTED_ENTITY=31,  // [distinct, tag, scope, entity, line, name]
+    METADATA_MODULE=32,           // [distinct, scope, name, ...]
   };
 
   // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
@@ -416,7 +417,7 @@ namespace bitc {
     COMDAT_SELECTION_KIND_SAME_SIZE = 5,
   };
 
-} // namespace bitc
-} // namespace llvm
+} // End bitc namespace
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h
index d158569..6797aa1 100644
--- a/include/llvm/Bitcode/ReaderWriter.h
+++ b/include/llvm/Bitcode/ReaderWriter.h
@@ -166,7 +166,7 @@ namespace llvm {
     }
   };
 
-} // namespace llvm
+} // End llvm namespace
 
 namespace std {
 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
index 96e9554..c4b94ed 100644
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -115,6 +115,6 @@ bool returnTypeIsEligibleForTailCall(const Function *F,
 // or we are in LTO.
 bool canBeOmittedFromSymbolTable(const GlobalValue *GV);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index 8a0989f..fe7efae 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -535,6 +535,6 @@ private:
   void EmitXXStructorList(const Constant *List, bool isCtor);
   GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C);
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index cb61cc7..3e464f4 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -830,6 +830,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h
index 7c90190..91fb0a9 100644
--- a/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/include/llvm/CodeGen/CalcSpillWeights.h
@@ -74,6 +74,6 @@ namespace llvm {
                                      const MachineBlockFrequencyInfo &MBFI,
                                      VirtRegAuxInfo::NormalizingFn norm =
                                          normalizeSpillWeight);
-} // namespace llvm
+}
 
 #endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H
diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h
index 3c3f770..554511d 100644
--- a/include/llvm/CodeGen/CommandFlags.h
+++ b/include/llvm/CodeGen/CommandFlags.h
@@ -17,6 +17,8 @@
 #define LLVM_CODEGEN_COMMANDFLAGS_H
 
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCTargetOptionsCommandFlags.h"
 #include "llvm//MC/SubtargetFeature.h"
@@ -249,7 +251,6 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
   Options.NoZerosInBSS = DontPlaceZerosInBSS;
   Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt;
   Options.StackAlignmentOverride = OverrideStackAlignment;
-  Options.TrapFuncName = TrapFuncName;
   Options.PositionIndependentExecutable = EnablePIE;
   Options.UseInitArray = !UseCtors;
   Options.DataSections = DataSections;
@@ -320,6 +321,16 @@ static inline void setFunctionAttributes(StringRef CPU, StringRef Features,
                                        "disable-tail-calls",
                                        toStringRef(DisableTailCalls));
 
+    if (TrapFuncName.getNumOccurrences() > 0)
+      for (auto &B : F)
+        for (auto &I : B)
+          if (auto *Call = dyn_cast<CallInst>(&I))
+            if (const auto *F = Call->getCalledFunction())
+              if (F->getIntrinsicID() == Intrinsic::debugtrap ||
+                  F->getIntrinsicID() == Intrinsic::trap)
+                Call->addAttribute(llvm::AttributeSet::FunctionIndex,
+                                   "trap-func-name", TrapFuncName);
+
     // Let NewAttrs override Attrs.
     NewAttrs = Attrs.addAttributes(Ctx, AttributeSet::FunctionIndex, NewAttrs);
     F.setAttributes(NewAttrs);
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
index ccff388..c44a7e0 100644
--- a/include/llvm/CodeGen/DFAPacketizer.h
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -91,7 +91,7 @@ public:
 // API call is made to prune the dependence.
 class VLIWPacketizerList {
 protected:
-  const MachineFunction &MF;
+  MachineFunction &MF;
   const TargetInstrInfo *TII;
 
   // The VLIW Scheduler.
@@ -159,6 +159,6 @@ public:
   }
 
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h
index 1ea3217..f07712a 100644
--- a/include/llvm/CodeGen/DIE.h
+++ b/include/llvm/CodeGen/DIE.h
@@ -15,6 +15,8 @@
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H
 
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/Support/Dwarf.h"
@@ -436,11 +438,11 @@ public:
 
   /// EmitValue - Emit value via the Dwarf writer.
   ///
-  void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  void EmitValue(const AsmPrinter *AP) const;
 
   /// SizeOf - Return the size of a value in bytes.
   ///
-  unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
+  unsigned SizeOf(const AsmPrinter *AP) const;
 
 #ifndef NDEBUG
   void print(raw_ostream &O) const;
@@ -448,10 +450,179 @@ public:
 #endif
 };
 
+struct IntrusiveBackListNode {
+  PointerIntPair<IntrusiveBackListNode *, 1> Next;
+  IntrusiveBackListNode() : Next(this, true) {}
+
+  IntrusiveBackListNode *getNext() const {
+    return Next.getInt() ? nullptr : Next.getPointer();
+  }
+};
+
+struct IntrusiveBackListBase {
+  typedef IntrusiveBackListNode Node;
+  Node *Last = nullptr;
+
+  bool empty() const { return !Last; }
+  void push_back(Node &N) {
+    assert(N.Next.getPointer() == &N && "Expected unlinked node");
+    assert(N.Next.getInt() == true && "Expected unlinked node");
+
+    if (Last) {
+      N.Next = Last->Next;
+      Last->Next.setPointerAndInt(&N, false);
+    }
+    Last = &N;
+  }
+};
+
+template <class T> class IntrusiveBackList : IntrusiveBackListBase {
+public:
+  using IntrusiveBackListBase::empty;
+  void push_back(T &N) { IntrusiveBackListBase::push_back(N); }
+  T &back() { return *static_cast<T *>(Last); }
+  const T &back() const { return *static_cast<T *>(Last); }
+
+  class const_iterator;
+  class iterator
+      : public iterator_facade_base<iterator, std::forward_iterator_tag, T> {
+    friend class const_iterator;
+    Node *N = nullptr;
+
+  public:
+    iterator() = default;
+    explicit iterator(T *N) : N(N) {}
+
+    iterator &operator++() {
+      N = N->getNext();
+      return *this;
+    }
+
+    explicit operator bool() const { return N; }
+    T &operator*() const { return *static_cast<T *>(N); }
+
+    bool operator==(const iterator &X) const { return N == X.N; }
+    bool operator!=(const iterator &X) const { return N != X.N; }
+  };
+
+  class const_iterator
+      : public iterator_facade_base<const_iterator, std::forward_iterator_tag,
+                                    const T> {
+    const Node *N = nullptr;
+
+  public:
+    const_iterator() = default;
+    // Placate MSVC by explicitly scoping 'iterator'.
+    const_iterator(typename IntrusiveBackList<T>::iterator X) : N(X.N) {}
+    explicit const_iterator(const T *N) : N(N) {}
+
+    const_iterator &operator++() {
+      N = N->getNext();
+      return *this;
+    }
+
+    explicit operator bool() const { return N; }
+    const T &operator*() const { return *static_cast<const T *>(N); }
+
+    bool operator==(const const_iterator &X) const { return N == X.N; }
+    bool operator!=(const const_iterator &X) const { return N != X.N; }
+  };
+
+  iterator begin() {
+    return Last ? iterator(static_cast<T *>(Last->Next.getPointer())) : end();
+  }
+  const_iterator begin() const {
+    return const_cast<IntrusiveBackList *>(this)->begin();
+  }
+  iterator end() { return iterator(); }
+  const_iterator end() const { return const_iterator(); }
+
+  static iterator toIterator(T &N) { return iterator(&N); }
+  static const_iterator toIterator(const T &N) { return const_iterator(&N); }
+};
+
+/// A list of DIE values.
+///
+/// This is a singly-linked list, but instead of reversing the order of
+/// insertion, we keep a pointer to the back of the list so we can push in
+/// order.
+///
+/// There are two main reasons to choose a linked list over a customized
+/// vector-like data structure.
+///
+///  1. For teardown efficiency, we want DIEs to be BumpPtrAllocated.  Using a
+///     linked list here makes this way easier to accomplish.
+///  2. Carrying an extra pointer per \a DIEValue isn't expensive.  45% of DIEs
+///     have 2 or fewer values, and 90% have 5 or fewer.  A vector would be
+///     over-allocated by 50% on average anyway, the same cost as the
+///     linked-list node.
+class DIEValueList {
+  struct Node : IntrusiveBackListNode {
+    DIEValue V;
+    explicit Node(DIEValue V) : V(V) {}
+  };
+
+  typedef IntrusiveBackList<Node> ListTy;
+  ListTy List;
+
+public:
+  bool empty() const { return List.empty(); }
+
+  class const_iterator;
+  class iterator
+      : public iterator_adaptor_base<iterator, ListTy::iterator,
+                                     std::forward_iterator_tag, DIEValue> {
+    friend class const_iterator;
+    typedef iterator_adaptor_base<iterator, ListTy::iterator,
+                                  std::forward_iterator_tag,
+                                  DIEValue> iterator_adaptor;
+
+  public:
+    iterator() = default;
+    explicit iterator(ListTy::iterator X) : iterator_adaptor(X) {}
+
+    explicit operator bool() const { return bool(wrapped()); }
+    DIEValue &operator*() const { return wrapped()->V; }
+  };
+
+  class const_iterator
+      : public iterator_adaptor_base<const_iterator, ListTy::const_iterator,
+                                     std::forward_iterator_tag,
+                                     const DIEValue> {
+    typedef iterator_adaptor_base<const_iterator, ListTy::const_iterator,
+                                  std::forward_iterator_tag,
+                                  const DIEValue> iterator_adaptor;
+
+  public:
+    const_iterator() = default;
+    const_iterator(DIEValueList::iterator X) : iterator_adaptor(X.wrapped()) {}
+    explicit const_iterator(ListTy::const_iterator X) : iterator_adaptor(X) {}
+
+    explicit operator bool() const { return bool(wrapped()); }
+    const DIEValue &operator*() const { return wrapped()->V; }
+  };
+
+  iterator insert(BumpPtrAllocator &Alloc, DIEValue V) {
+    List.push_back(*new (Alloc) Node(V));
+    return iterator(ListTy::toIterator(List.back()));
+  }
+  template <class... Ts>
+  iterator emplace(BumpPtrAllocator &Alloc, Ts &&... Args) {
+    return insert(Alloc, DIEValue(std::forward<Ts>(Args)...));
+  }
+
+  iterator begin() { return iterator(List.begin()); }
+  iterator end() { return iterator(List.end()); }
+  const_iterator begin() const { return const_iterator(List.begin()); }
+  const_iterator end() const { return const_iterator(List.end()); }
+};
+
 //===--------------------------------------------------------------------===//
 /// DIE - A structured debug information entry.  Has an abbreviation which
 /// describes its organization.
-class DIE {
+class DIE : IntrusiveBackListNode {
+  friend class IntrusiveBackList<DIE>;
+
 protected:
   /// Offset - Offset in debug info section.
   ///
@@ -468,27 +639,24 @@ protected:
   dwarf::Tag Tag = (dwarf::Tag)0;
 
   /// Children DIEs.
-  ///
-  // This can't be a vector<DIE> because pointer validity is requirent for the
-  // Parent pointer and DIEEntry.
-  // It can't be a list<DIE> because some clients need pointer validity before
-  // the object has been added to any child list
-  // (eg: DwarfUnit::constructVariableDIE). These aren't insurmountable, but may
-  // be more convoluted than beneficial.
-  std::vector<std::unique_ptr<DIE>> Children;
+  IntrusiveBackList<DIE> Children;
 
-  DIE *Parent;
+  DIE *Parent = nullptr;
 
   /// Attribute values.
   ///
-  SmallVector<DIEValue, 12> Values;
+  DIEValueList Values;
 
 protected:
-  DIE() : Offset(0), Size(0), Parent(nullptr) {}
+  DIE() : Offset(0), Size(0) {}
+
+private:
+  explicit DIE(dwarf::Tag Tag) : Offset(0), Size(0), Tag(Tag) {}
 
 public:
-  explicit DIE(dwarf::Tag Tag)
-      : Offset(0), Size(0), Tag(Tag), Parent(nullptr) {}
+  static DIE *get(BumpPtrAllocator &Alloc, dwarf::Tag Tag) {
+    return new (Alloc) DIE(Tag);
+  }
 
   // Accessors.
   unsigned getAbbrevNumber() const { return AbbrevNumber; }
@@ -497,26 +665,32 @@ public:
   unsigned getSize() const { return Size; }
   bool hasChildren() const { return !Children.empty(); }
 
-  typedef std::vector<std::unique_ptr<DIE>>::const_iterator child_iterator;
+  typedef IntrusiveBackList<DIE>::iterator child_iterator;
+  typedef IntrusiveBackList<DIE>::const_iterator const_child_iterator;
   typedef iterator_range<child_iterator> child_range;
+  typedef iterator_range<const_child_iterator> const_child_range;
 
-  child_range children() const {
+  child_range children() {
+    return llvm::make_range(Children.begin(), Children.end());
+  }
+  const_child_range children() const {
     return llvm::make_range(Children.begin(), Children.end());
   }
 
-  typedef SmallVectorImpl<DIEValue>::const_iterator value_iterator;
+  typedef DIEValueList::iterator value_iterator;
   typedef iterator_range<value_iterator> value_range;
 
-  value_iterator values_begin() const { return Values.begin(); }
-  value_iterator values_end() const { return Values.end(); }
-  value_range values() const {
-    return llvm::make_range(values_begin(), values_end());
+  value_range values() {
+    return llvm::make_range(Values.begin(), Values.end());
   }
 
-  void setValue(unsigned I, DIEValue New) {
-    assert(I < Values.size());
-    Values[I] = New;
+  typedef DIEValueList::const_iterator const_value_iterator;
+  typedef iterator_range<const_value_iterator> const_value_range;
+
+  const_value_range values() const {
+    return llvm::make_range(Values.begin(), Values.end());
   }
+
   DIE *getParent() const { return Parent; }
 
   /// Generate the abbreviation for this DIE.
@@ -539,19 +713,21 @@ public:
 
   /// addValue - Add a value and attributes to a DIE.
   ///
-  void addValue(DIEValue Value) { Values.push_back(Value); }
+  value_iterator addValue(BumpPtrAllocator &Alloc, DIEValue Value) {
+    return Values.insert(Alloc, Value);
+  }
   template <class T>
-  void addValue(dwarf::Attribute Attribute, dwarf::Form Form, T &&Value) {
-    Values.emplace_back(Attribute, Form, std::forward<T>(Value));
+  value_iterator addValue(BumpPtrAllocator &Alloc, dwarf::Attribute Attribute,
+                          dwarf::Form Form, T &&Value) {
+    return Values.emplace(Alloc, Attribute, Form, std::forward<T>(Value));
   }
 
-  /// addChild - Add a child to the DIE.
-  ///
-  DIE &addChild(std::unique_ptr<DIE> Child) {
-    assert(!Child->getParent());
+  /// Add a child to the DIE.
+  DIE &addChild(DIE *Child) {
+    assert(!Child->getParent() && "Child should be orphaned");
     Child->Parent = this;
-    Children.push_back(std::move(Child));
-    return *Children.back();
+    Children.push_back(*Child);
+    return Children.back();
   }
 
   /// Find a value in the DIE with the attribute given.
@@ -635,6 +811,6 @@ public:
 #endif
 };
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 1dca2ce..f04a7cd 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -69,7 +69,7 @@ public:
     unsigned NumFixedArgs;
     CallingConv::ID CallConv;
     const Value *Callee;
-    const char *SymName;
+    MCSymbol *Symbol;
     ArgListTy Args;
     ImmutableCallSite *CS;
     MachineInstr *Call;
@@ -88,7 +88,7 @@ public:
         : RetTy(nullptr), RetSExt(false), RetZExt(false), IsVarArg(false),
           IsInReg(false), DoesNotReturn(false), IsReturnValueUsed(true),
           IsTailCall(false), NumFixedArgs(-1), CallConv(CallingConv::C),
-          Callee(nullptr), SymName(nullptr), CS(nullptr), Call(nullptr),
+          Callee(nullptr), Symbol(nullptr), CS(nullptr), Call(nullptr),
           ResultReg(0), NumResultRegs(0), IsPatchPoint(false) {}
 
     CallLoweringInfo &setCallee(Type *ResultTy, FunctionType *FuncTy,
@@ -114,12 +114,12 @@ public:
     }
 
     CallLoweringInfo &setCallee(Type *ResultTy, FunctionType *FuncTy,
-                                const char *Target, ArgListTy &&ArgsList,
+                                MCSymbol *Target, ArgListTy &&ArgsList,
                                 ImmutableCallSite &Call,
                                 unsigned FixedArgs = ~0U) {
       RetTy = ResultTy;
       Callee = Call.getCalledValue();
-      SymName = Target;
+      Symbol = Target;
 
       IsInReg = Call.paramHasAttr(0, Attribute::InReg);
       DoesNotReturn = Call.doesNotReturn();
@@ -148,11 +148,16 @@ public:
       return *this;
     }
 
-    CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultTy,
+    CallLoweringInfo &setCallee(const DataLayout &DL, MCContext &Ctx,
+                                CallingConv::ID CC, Type *ResultTy,
                                 const char *Target, ArgListTy &&ArgsList,
+                                unsigned FixedArgs = ~0U);
+
+    CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultTy,
+                                MCSymbol *Target, ArgListTy &&ArgsList,
                                 unsigned FixedArgs = ~0U) {
       RetTy = ResultTy;
-      SymName = Target;
+      Symbol = Target;
       CallConv = CC;
       Args = std::move(ArgsList);
       NumFixedArgs = (FixedArgs == ~0U) ? Args.size() : FixedArgs;
@@ -504,7 +509,9 @@ protected:
 
   CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) const;
 
-  bool lowerCallTo(const CallInst *CI, const char *SymName, unsigned NumArgs);
+  bool lowerCallTo(const CallInst *CI, MCSymbol *Symbol, unsigned NumArgs);
+  bool lowerCallTo(const CallInst *CI, const char *SymbolName,
+                   unsigned NumArgs);
   bool lowerCallTo(CallLoweringInfo &CLI);
 
   bool isCommutativeIntrinsic(IntrinsicInst const *II) {
diff --git a/include/llvm/CodeGen/FaultMaps.h b/include/llvm/CodeGen/FaultMaps.h
index d5c2fee..f4b6463 100644
--- a/include/llvm/CodeGen/FaultMaps.h
+++ b/include/llvm/CodeGen/FaultMaps.h
@@ -1,4 +1,4 @@
-//===------------------- FaultMaps.h - StackMaps ----------------*- C++ -*-===//
+//===------------------- FaultMaps.h - The "FaultMaps" section --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,6 +12,8 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Format.h"
 
 #include <vector>
 #include <map>
@@ -68,6 +70,151 @@ private:
 
   void emitFunctionInfo(const MCSymbol *FnLabel, const FunctionFaultInfos &FFI);
 };
+
+/// A parser for the __llvm_faultmaps section generated by the FaultMaps class
+/// above.  This parser is version locked with with the __llvm_faultmaps section
+/// generated by the version of LLVM that includes it.  No guarantees are made
+/// with respect to forward or backward compatibility.
+class FaultMapParser {
+  typedef uint8_t FaultMapVersionType;
+  static const size_t FaultMapVersionOffset = 0;
+
+  typedef uint8_t Reserved0Type;
+  static const size_t Reserved0Offset =
+      FaultMapVersionOffset + sizeof(FaultMapVersionType);
+
+  typedef uint16_t Reserved1Type;
+  static const size_t Reserved1Offset = Reserved0Offset + sizeof(Reserved0Type);
+
+  typedef uint32_t NumFunctionsType;
+  static const size_t NumFunctionsOffset =
+      Reserved1Offset + sizeof(Reserved1Type);
+
+  static const size_t FunctionInfosOffset =
+      NumFunctionsOffset + sizeof(NumFunctionsType);
+
+  const uint8_t *P;
+  const uint8_t *E;
+
+  template <typename T> static T read(const uint8_t *P, const uint8_t *E) {
+    assert(P + sizeof(T) <= E && "out of bounds read!");
+    return support::endian::read<T, support::little, 1>(P);
+  }
+
+public:
+  class FunctionFaultInfoAccessor {
+    typedef uint32_t FaultKindType;
+    static const size_t FaultKindOffset = 0;
+
+    typedef uint32_t FaultingPCOffsetType;
+    static const size_t FaultingPCOffsetOffset =
+        FaultKindOffset + sizeof(FaultKindType);
+
+    typedef uint32_t HandlerPCOffsetType;
+    static const size_t HandlerPCOffsetOffset =
+        FaultingPCOffsetOffset + sizeof(FaultingPCOffsetType);
+
+    const uint8_t *P;
+    const uint8_t *E;
+
+  public:
+    static const size_t Size =
+        HandlerPCOffsetOffset + sizeof(HandlerPCOffsetType);
+
+    explicit FunctionFaultInfoAccessor(const uint8_t *P, const uint8_t *E)
+        : P(P), E(E) {}
+
+    FaultKindType getFaultKind() const {
+      return read<FaultKindType>(P + FaultKindOffset, E);
+    }
+
+    FaultingPCOffsetType getFaultingPCOffset() const {
+      return read<FaultingPCOffsetType>(P + FaultingPCOffsetOffset, E);
+    }
+
+    HandlerPCOffsetType getHandlerPCOffset() const {
+      return read<HandlerPCOffsetType>(P + HandlerPCOffsetOffset, E);
+    }
+  };
+
+  class FunctionInfoAccessor {
+    typedef uint64_t FunctionAddrType;
+    static const size_t FunctionAddrOffset = 0;
+
+    typedef uint32_t NumFaultingPCsType;
+    static const size_t NumFaultingPCsOffset =
+        FunctionAddrOffset + sizeof(FunctionAddrType);
+
+    typedef uint32_t ReservedType;
+    static const size_t ReservedOffset =
+        NumFaultingPCsOffset + sizeof(NumFaultingPCsType);
+
+    static const size_t FunctionFaultInfosOffset =
+        ReservedOffset + sizeof(ReservedType);
+
+    static const size_t FunctionInfoHeaderSize = FunctionFaultInfosOffset;
+
+    const uint8_t *P;
+    const uint8_t *E;
+
+  public:
+    FunctionInfoAccessor() : P(nullptr), E(nullptr) {}
+
+    explicit FunctionInfoAccessor(const uint8_t *P, const uint8_t *E)
+        : P(P), E(E) {}
+
+    FunctionAddrType getFunctionAddr() const {
+      return read<FunctionAddrType>(P + FunctionAddrOffset, E);
+    }
+
+    NumFaultingPCsType getNumFaultingPCs() const {
+      return read<NumFaultingPCsType>(P + NumFaultingPCsOffset, E);
+    }
+
+    FunctionFaultInfoAccessor getFunctionFaultInfoAt(uint32_t Index) const {
+      assert(Index < getNumFaultingPCs() && "index out of bounds!");
+      const uint8_t *Begin = P + FunctionFaultInfosOffset +
+                             FunctionFaultInfoAccessor::Size * Index;
+      return FunctionFaultInfoAccessor(Begin, E);
+    }
+
+    FunctionInfoAccessor getNextFunctionInfo() const {
+      size_t MySize = FunctionInfoHeaderSize +
+                      getNumFaultingPCs() * FunctionFaultInfoAccessor::Size;
+
+      const uint8_t *Begin = P + MySize;
+      assert(Begin < E && "out of bounds!");
+      return FunctionInfoAccessor(Begin, E);
+    }
+  };
+
+  explicit FaultMapParser(const uint8_t *Begin, const uint8_t *End)
+      : P(Begin), E(End) {}
+
+  FaultMapVersionType getFaultMapVersion() const {
+    auto Version = read<FaultMapVersionType>(P + FaultMapVersionOffset, E);
+    assert(Version == 1 && "only version 1 supported!");
+    return Version;
+  }
+
+  NumFunctionsType getNumFunctions() const {
+    return read<NumFunctionsType>(P + NumFunctionsOffset, E);
+  }
+
+  FunctionInfoAccessor getFirstFunctionInfo() const {
+    const uint8_t *Begin = P + FunctionInfosOffset;
+    return FunctionInfoAccessor(Begin, E);
+  }
+};
+
+raw_ostream &
+operator<<(raw_ostream &OS, const FaultMapParser::FunctionFaultInfoAccessor &);
+
+raw_ostream &operator<<(raw_ostream &OS,
+                        const FaultMapParser::FunctionInfoAccessor &);
+
+raw_ostream &operator<<(raw_ostream &OS, const FaultMapParser &);
+
 } // namespace llvm
 
 #endif
diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h
index b34f67a..e883bd1 100644
--- a/include/llvm/CodeGen/GCMetadata.h
+++ b/include/llvm/CodeGen/GCMetadata.h
@@ -201,6 +201,6 @@ public:
   /// will soon change.
   GCFunctionInfo &getFunctionInfo(const Function &F);
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/GCMetadataPrinter.h b/include/llvm/CodeGen/GCMetadataPrinter.h
index e451cd2..2208470 100644
--- a/include/llvm/CodeGen/GCMetadataPrinter.h
+++ b/include/llvm/CodeGen/GCMetadataPrinter.h
@@ -59,6 +59,6 @@ public:
 
   virtual ~GCMetadataPrinter();
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h
index 2a4dabb..a1b8e89 100644
--- a/include/llvm/CodeGen/GCStrategy.h
+++ b/include/llvm/CodeGen/GCStrategy.h
@@ -172,6 +172,6 @@ public:
 /// register your GCMetadataPrinter subclass with the
 /// GCMetadataPrinterRegistery as well.
 typedef Registry<GCStrategy> GCRegistry;
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/GCs.h b/include/llvm/CodeGen/GCs.h
index 5418fff..5207f80 100644
--- a/include/llvm/CodeGen/GCs.h
+++ b/include/llvm/CodeGen/GCs.h
@@ -41,6 +41,6 @@ void linkErlangGCPrinter();
 void linkShadowStackGC();
 
 void linkStatepointExampleGC();
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 5a1cf59..c7237fd 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -124,6 +124,8 @@ namespace ISD {
     TargetExternalSymbol,
     TargetBlockAddress,
 
+    MCSymbol,
+
     /// TargetIndex - Like a constant pool entry, but with completely
     /// target-dependent semantics. Holds target flags, a 32-bit index, and a
     /// 64-bit index. Targets can use this however they like.
@@ -890,8 +892,8 @@ namespace ISD {
     CVT_INVALID /// Marker - Invalid opcode
   };
 
-} // namespace ISD
+} // end llvm::ISD namespace
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h
index a764645..9e6ab7d 100644
--- a/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/include/llvm/CodeGen/IntrinsicLowering.h
@@ -54,6 +54,6 @@ namespace llvm {
     /// simple integer bswap.
     static bool LowerToByteSwap(CallInst *CI);
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h
index cc33f34..f347f66 100644
--- a/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -93,6 +93,6 @@ private:
     void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
     SUnit *getSingleUnscheduledPred(SUnit *SU);
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h
index 7478c3a..7d7e48a 100644
--- a/include/llvm/CodeGen/LexicalScopes.h
+++ b/include/llvm/CodeGen/LexicalScopes.h
@@ -252,6 +252,6 @@ private:
   LexicalScope *CurrentFnLexicalScope;
 };
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index ea44ab1..9b8b91c 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -866,5 +866,5 @@ namespace llvm {
 
   };
 
-} // namespace llvm
+}
 #endif
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 9d68841..9673f80 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -444,6 +444,6 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
 
     class HMEditor;
   };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h
index 6ffd3ee..6475e7b 100644
--- a/include/llvm/CodeGen/LivePhysRegs.h
+++ b/include/llvm/CodeGen/LivePhysRegs.h
@@ -116,19 +116,15 @@ public:
   void stepForward(const MachineInstr &MI,
         SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers);
 
-  /// \brief Adds all live-in registers of basic block @p MBB.
-  void addLiveIns(const MachineBasicBlock *MBB) {
-    for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
-         LE = MBB->livein_end(); LI != LE; ++LI)
-      addReg(*LI);
-  }
-
-  /// \brief Adds all live-out registers of basic block @p MBB.
-  void addLiveOuts(const MachineBasicBlock *MBB) {
-    for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
-         SE = MBB->succ_end(); SI != SE; ++SI)
-      addLiveIns(*SI);
-  }
+  /// \brief Adds all live-in registers of basic block @p MBB; After prologue/
+  /// epilogue insertion \p AddPristines should be set to true to insert the
+  /// pristine registers.
+  void addLiveIns(const MachineBasicBlock *MBB, bool AddPristines = false);
+
+  /// \brief Adds all live-out registers of basic block @p MBB; After prologue/
+  /// epilogue insertion \p AddPristines should be set to true to insert the
+  /// pristine registers.
+  void addLiveOuts(const MachineBasicBlock *MBB, bool AddPristines = false);
 
   typedef SparseSet<unsigned>::const_iterator const_iterator;
   const_iterator begin() const { return LiveRegs.begin(); }
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
index f04efc3..c97c636 100644
--- a/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -228,6 +228,6 @@ public:
                                 const MachineBlockFrequencyInfo&);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStackAnalysis.h
index b4808ab..f495507 100644
--- a/include/llvm/CodeGen/LiveStackAnalysis.h
+++ b/include/llvm/CodeGen/LiveStackAnalysis.h
@@ -95,6 +95,6 @@ namespace llvm {
     /// print - Implement the dump method.
     void print(raw_ostream &O, const Module* = nullptr) const override;
   };
-} // namespace llvm
+}
 
 #endif /* LLVM_CODEGEN_LIVESTACK_ANALYSIS_H */
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index 334e8c5..55b97dc 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -306,6 +306,6 @@ public:
   void setPHIJoin(unsigned Reg) { PHIJoins.set(Reg); }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h
index b1fe47a..a6ffeb3 100644
--- a/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/include/llvm/CodeGen/MIRYamlMapping.h
@@ -25,22 +25,84 @@
 namespace llvm {
 namespace yaml {
 
+/// A wrapper around std::string which contains a source range that's being
+/// set during parsing.
+struct StringValue {
+  std::string Value;
+  SMRange SourceRange;
+
+  StringValue() {}
+  StringValue(std::string Value) : Value(std::move(Value)) {}
+
+  bool operator==(const StringValue &Other) const {
+    return Value == Other.Value;
+  }
+};
+
+template <> struct ScalarTraits<StringValue> {
+  static void output(const StringValue &S, void *, llvm::raw_ostream &OS) {
+    OS << S.Value;
+  }
+
+  static StringRef input(StringRef Scalar, void *Ctx, StringValue &S) {
+    S.Value = Scalar.str();
+    if (const auto *Node =
+            reinterpret_cast<yaml::Input *>(Ctx)->getCurrentNode())
+      S.SourceRange = Node->getSourceRange();
+    return "";
+  }
+
+  static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); }
+};
+
+struct FlowStringValue : StringValue {
+  FlowStringValue() {}
+  FlowStringValue(std::string Value) : StringValue(Value) {}
+};
+
+template <> struct ScalarTraits<FlowStringValue> {
+  static void output(const FlowStringValue &S, void *, llvm::raw_ostream &OS) {
+    return ScalarTraits<StringValue>::output(S, nullptr, OS);
+  }
+
+  static StringRef input(StringRef Scalar, void *Ctx, FlowStringValue &S) {
+    return ScalarTraits<StringValue>::input(Scalar, Ctx, S);
+  }
+
+  static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::StringValue)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::FlowStringValue)
+
+namespace llvm {
+namespace yaml {
+
 struct MachineBasicBlock {
+  unsigned ID;
   std::string Name;
   unsigned Alignment = 0;
   bool IsLandingPad = false;
   bool AddressTaken = false;
-  // TODO: Serialize the successors and liveins.
-  // TODO: Serialize machine instructions.
+  // TODO: Serialize the successor weights and liveins.
+  std::vector<FlowStringValue> Successors;
+
+  std::vector<StringValue> Instructions;
 };
 
 template <> struct MappingTraits<MachineBasicBlock> {
   static void mapping(IO &YamlIO, MachineBasicBlock &MBB) {
+    YamlIO.mapRequired("id", MBB.ID);
     YamlIO.mapOptional("name", MBB.Name,
                        std::string()); // Don't print out an empty name.
     YamlIO.mapOptional("alignment", MBB.Alignment);
     YamlIO.mapOptional("isLandingPad", MBB.IsLandingPad);
     YamlIO.mapOptional("addressTaken", MBB.AddressTaken);
+    YamlIO.mapOptional("successors", MBB.Successors);
+    YamlIO.mapOptional("instructions", MBB.Instructions);
   }
 };
 
@@ -57,6 +119,13 @@ struct MachineFunction {
   unsigned Alignment = 0;
   bool ExposesReturnsTwice = false;
   bool HasInlineAsm = false;
+  // Register information
+  bool IsSSA = false;
+  bool TracksRegLiveness = false;
+  bool TracksSubRegLiveness = false;
+  // TODO: Serialize virtual register definitions.
+  // TODO: Serialize the various register masks.
+  // TODO: Serialize live in registers.
 
   std::vector<MachineBasicBlock> BasicBlocks;
 };
@@ -67,6 +136,9 @@ template <> struct MappingTraits<MachineFunction> {
     YamlIO.mapOptional("alignment", MF.Alignment);
     YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice);
     YamlIO.mapOptional("hasInlineAsm", MF.HasInlineAsm);
+    YamlIO.mapOptional("isSSA", MF.IsSSA);
+    YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness);
+    YamlIO.mapOptional("tracksSubRegLiveness", MF.TracksSubRegLiveness);
     YamlIO.mapOptional("body", MF.BasicBlocks);
   }
 };
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 619894c..5e5f45c 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -461,16 +461,27 @@ public:
   /// instruction of this basic block. If a terminator does not exist,
   /// it returns end()
   iterator getFirstTerminator();
-  const_iterator getFirstTerminator() const;
+  const_iterator getFirstTerminator() const {
+    return const_cast<MachineBasicBlock *>(this)->getFirstTerminator();
+  }
 
   /// getFirstInstrTerminator - Same getFirstTerminator but it ignores bundles
   /// and return an instr_iterator instead.
   instr_iterator getFirstInstrTerminator();
 
+  /// getFirstNonDebugInstr - returns an iterator to the first non-debug
+  /// instruction in the basic block, or end()
+  iterator getFirstNonDebugInstr();
+  const_iterator getFirstNonDebugInstr() const {
+    return const_cast<MachineBasicBlock *>(this)->getFirstNonDebugInstr();
+  }
+
   /// getLastNonDebugInstr - returns an iterator to the last non-debug
   /// instruction in the basic block, or end()
   iterator getLastNonDebugInstr();
-  const_iterator getLastNonDebugInstr() const;
+  const_iterator getLastNonDebugInstr() const {
+    return const_cast<MachineBasicBlock *>(this)->getLastNonDebugInstr();
+  }
 
   /// SplitCriticalEdge - Split the critical edge from this block to the
   /// given successor block, and return the newly created block, or null
@@ -649,6 +660,8 @@ public:
   // Debugging methods.
   void dump() const;
   void print(raw_ostream &OS, SlotIndexes* = nullptr) const;
+  void print(raw_ostream &OS, ModuleSlotTracker &MST,
+             SlotIndexes * = nullptr) const;
 
   // Printing method used by LoopInfo.
   void printAsOperand(raw_ostream &OS, bool PrintType = true) const;
@@ -801,6 +814,6 @@ public:
   MachineBasicBlock::iterator getInitial() { return I; }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index 9d0a069..feb394e 100644
--- a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -66,6 +66,6 @@ public:
 
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index da6ea1d..7ba7495 100644
--- a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -84,7 +84,7 @@ public:
                                     const MachineBasicBlock *Dst) const;
 };
 
-} // namespace llvm
+}
 
 
 #endif
diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h
index 8a915fb..c619afb 100644
--- a/include/llvm/CodeGen/MachineConstantPool.h
+++ b/include/llvm/CodeGen/MachineConstantPool.h
@@ -174,6 +174,6 @@ public:
   void dump() const;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineDominanceFrontier.h b/include/llvm/CodeGen/MachineDominanceFrontier.h
index f8dd2cd..4131194 100644
--- a/include/llvm/CodeGen/MachineDominanceFrontier.h
+++ b/include/llvm/CodeGen/MachineDominanceFrontier.h
@@ -104,6 +104,6 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index 6518114..4428fa6 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -270,6 +270,6 @@ template <> struct GraphTraits<MachineDominatorTree*>
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index ac92a4b..0f5a4b1 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -600,6 +600,6 @@ public:
   void dump(const MachineFunction &MF) const;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index d838cad..94610ca 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -546,6 +546,6 @@ template <> struct GraphTraits<Inverse<const MachineFunction*> > :
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineFunctionAnalysis.h b/include/llvm/CodeGen/MachineFunctionAnalysis.h
index 576e72b..4c0f5e6 100644
--- a/include/llvm/CodeGen/MachineFunctionAnalysis.h
+++ b/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -50,6 +50,6 @@ private:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h
index 0e09c90..50a1f6e 100644
--- a/include/llvm/CodeGen/MachineFunctionPass.h
+++ b/include/llvm/CodeGen/MachineFunctionPass.h
@@ -54,6 +54,6 @@ private:
   bool runOnFunction(Function &F) override;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 0313e93..de7e0a2 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -1105,6 +1105,8 @@ public:
   // Debugging support
   //
   void print(raw_ostream &OS, bool SkipOpers = false) const;
+  void print(raw_ostream &OS, ModuleSlotTracker &MST,
+             bool SkipOpers = false) const;
   void dump() const;
 
   //===--------------------------------------------------------------------===//
@@ -1235,6 +1237,6 @@ inline raw_ostream& operator<<(raw_ostream &OS, const MachineInstr &MI) {
   return OS;
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 0778ff4..4f68f38 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -40,7 +40,7 @@ namespace RegState {
     ImplicitDefine = Implicit | Define,
     ImplicitKill   = Implicit | Kill
   };
-} // namespace RegState
+}
 
 class MachineInstrBuilder {
   MachineFunction *MF;
@@ -185,8 +185,9 @@ public:
     return *this;
   }
 
-  const MachineInstrBuilder &addSym(MCSymbol *Sym) const {
-    MI->addOperand(*MF, MachineOperand::CreateMCSymbol(Sym));
+  const MachineInstrBuilder &addSym(MCSymbol *Sym,
+                                    unsigned char TargetFlags = 0) const {
+    MI->addOperand(*MF, MachineOperand::CreateMCSymbol(Sym, TargetFlags));
     return *this;
   }
 
@@ -502,6 +503,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h
index edebfa6..1220224 100644
--- a/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/include/llvm/CodeGen/MachineInstrBundle.h
@@ -247,6 +247,6 @@ public:
   const MachineOperand *operator->() const { return &deref(); }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h
index b59b585..adcd1d0 100644
--- a/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -125,6 +125,6 @@ public:
   void dump() const;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index 8c245ae..438ef2e 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -186,6 +186,6 @@ template <> struct GraphTraits<MachineLoop*> {
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index 9962ff9..a73b92f 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -27,6 +27,7 @@ namespace llvm {
 class FoldingSetNodeID;
 class MDNode;
 class raw_ostream;
+class ModuleSlotTracker;
 
 /// MachinePointerInfo - This class contains a discriminated union of
 /// information about pointers in memory operands, relating them back to LLVM IR
@@ -200,6 +201,12 @@ public:
   ///
   void Profile(FoldingSetNodeID &ID) const;
 
+  /// Support for operator<<.
+  /// @{
+  void print(raw_ostream &OS) const;
+  void print(raw_ostream &OS, ModuleSlotTracker &MST) const;
+  /// @}
+
   friend bool operator==(const MachineMemOperand &LHS,
                          const MachineMemOperand &RHS) {
     return LHS.getValue() == RHS.getValue() &&
@@ -219,8 +226,11 @@ public:
   }
 };
 
-raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MRO);
+inline raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MRO) {
+  MRO.print(OS);
+  return OS;
+}
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 5faf8de..ccaa83a 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -284,12 +284,14 @@ public:
   /// getAddrLabelSymbol - Return the symbol to be used for the specified basic
   /// block when its address is taken.  This cannot be its normal LBB label
   /// because the block may be accessed outside its containing function.
-  MCSymbol *getAddrLabelSymbol(const BasicBlock *BB);
+  MCSymbol *getAddrLabelSymbol(const BasicBlock *BB) {
+    return getAddrLabelSymbolToEmit(BB).front();
+  }
 
   /// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
   /// basic block when its address is taken.  If other blocks were RAUW'd to
   /// this one, we may have to emit them as well, return the whole set.
-  std::vector<MCSymbol*> getAddrLabelSymbolToEmit(const BasicBlock *BB);
+  ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(const BasicBlock *BB);
 
   /// takeDeletedSymbolsForFunction - If the specified function has had any
   /// references to address-taken blocks generated, but the block got deleted,
@@ -441,6 +443,6 @@ public:
 
 }; // End class MachineModuleInfo
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 8c8ce71..c43e47c 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -27,6 +27,7 @@ class MachineBasicBlock;
 class MachineInstr;
 class MachineRegisterInfo;
 class MDNode;
+class ModuleSlotTracker;
 class TargetMachine;
 class TargetRegisterInfo;
 class hash_code;
@@ -218,6 +219,8 @@ public:
   void clearParent() { ParentMI = nullptr; }
 
   void print(raw_ostream &os, const TargetRegisterInfo *TRI = nullptr) const;
+  void print(raw_ostream &os, ModuleSlotTracker &MST,
+             const TargetRegisterInfo *TRI = nullptr) const;
 
   //===--------------------------------------------------------------------===//
   // Accessors that tell you what kind of MachineOperand you're looking at.
@@ -450,11 +453,12 @@ public:
     return Contents.CFIIndex;
   }
 
-  /// getOffset - Return the offset from the symbol in this operand. This always
-  /// returns 0 for ExternalSymbol operands.
+  /// Return the offset from the symbol in this operand. This always returns 0
+  /// for ExternalSymbol operands.
   int64_t getOffset() const {
-    assert((isGlobal() || isSymbol() || isCPI() || isTargetIndex() ||
-            isBlockAddress()) && "Wrong MachineOperand accessor");
+    assert((isGlobal() || isSymbol() || isMCSymbol() || isCPI() ||
+            isTargetIndex() || isBlockAddress()) &&
+           "Wrong MachineOperand accessor");
     return int64_t(uint64_t(Contents.OffsetedInfo.OffsetHi) << 32) |
            SmallContents.OffsetLo;
   }
@@ -512,8 +516,9 @@ public:
   }
 
   void setOffset(int64_t Offset) {
-    assert((isGlobal() || isSymbol() || isCPI() || isTargetIndex() ||
-            isBlockAddress()) && "Wrong MachineOperand accessor");
+    assert((isGlobal() || isSymbol() || isMCSymbol() || isCPI() ||
+            isTargetIndex() || isBlockAddress()) &&
+           "Wrong MachineOperand accessor");
     SmallContents.OffsetLo = unsigned(Offset);
     Contents.OffsetedInfo.OffsetHi = int(Offset >> 32);
   }
@@ -703,9 +708,12 @@ public:
     return Op;
   }
 
-  static MachineOperand CreateMCSymbol(MCSymbol *Sym) {
+  static MachineOperand CreateMCSymbol(MCSymbol *Sym,
+                                       unsigned char TargetFlags = 0) {
     MachineOperand Op(MachineOperand::MO_MCSymbol);
     Op.Contents.Sym = Sym;
+    Op.setOffset(0);
+    Op.setTargetFlags(TargetFlags);
     return Op;
   }
 
@@ -741,6 +749,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand& MO) {
   // See friend declaration above. This additional declaration is required in
   // order to compile LLVM with IBM xlC compiler.
   hash_code hash_value(const MachineOperand &MO);
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineRegionInfo.h b/include/llvm/CodeGen/MachineRegionInfo.h
index 794f1d6..cf49c29 100644
--- a/include/llvm/CodeGen/MachineRegionInfo.h
+++ b/include/llvm/CodeGen/MachineRegionInfo.h
@@ -176,6 +176,6 @@ EXTERN_TEMPLATE_INSTANTIATION(class RegionBase<RegionTraits<MachineFunction>>);
 EXTERN_TEMPLATE_INSTANTIATION(class RegionNodeBase<RegionTraits<MachineFunction>>);
 EXTERN_TEMPLATE_INSTANTIATION(class RegionInfoBase<RegionTraits<MachineFunction>>);
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index c17ad38..e5b837a 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -1036,6 +1036,6 @@ getPressureSets(unsigned RegUnit) const {
   return PSetIterator(RegUnit, this);
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineSSAUpdater.h b/include/llvm/CodeGen/MachineSSAUpdater.h
index dad0c46..5f988ad 100644
--- a/include/llvm/CodeGen/MachineSSAUpdater.h
+++ b/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -111,6 +111,6 @@ private:
   MachineSSAUpdater(const MachineSSAUpdater&) = delete;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineValueType.h b/include/llvm/CodeGen/MachineValueType.h
index a3eea5b..a728df3 100644
--- a/include/llvm/CodeGen/MachineValueType.h
+++ b/include/llvm/CodeGen/MachineValueType.h
@@ -644,6 +644,6 @@ class MVT {
     /// @}
   };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/PBQPRAConstraint.h b/include/llvm/CodeGen/PBQPRAConstraint.h
index 832c043..833b9ba 100644
--- a/include/llvm/CodeGen/PBQPRAConstraint.h
+++ b/include/llvm/CodeGen/PBQPRAConstraint.h
@@ -64,6 +64,6 @@ private:
   void anchor() override;
 };
 
-} // namespace llvm
+}
 
 #endif /* LLVM_CODEGEN_PBQPRACONSTRAINT_H */
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 3aeec2a..538c995 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -637,7 +637,12 @@ namespace llvm {
   /// createForwardControlFlowIntegrityPass - This pass adds control-flow
   /// integrity.
   ModulePass *createForwardControlFlowIntegrityPass();
-} // namespace llvm
+
+  /// InterleavedAccess Pass - This pass identifies and matches interleaved
+  /// memory accesses to target specific intrinsics.
+  ///
+  FunctionPass *createInterleavedAccessPass(const TargetMachine *TM);
+} // End llvm namespace
 
 /// Target machine pass initializer for passes with dependencies. Use with
 /// INITIALIZE_TM_PASS_END.
diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h
index e0ec72f..a518b62 100644
--- a/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/include/llvm/CodeGen/PseudoSourceValue.h
@@ -29,8 +29,7 @@ namespace llvm {
   /// space), or constant pool.
   class PseudoSourceValue {
   private:
-    friend raw_ostream &llvm::operator<<(raw_ostream &OS,
-                                         const MachineMemOperand &MMO);
+    friend class MachineMemOperand; // For printCustom().
 
     /// printCustom - Implement printing for PseudoSourceValue. This is called
     /// from Value::print or Value's operator<<.
@@ -106,6 +105,6 @@ namespace llvm {
 
     int getFrameIndex() const { return FI; }
   };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index b2e31fa..df3fd34 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -184,6 +184,6 @@ private:
 
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/ResourcePriorityQueue.h b/include/llvm/CodeGen/ResourcePriorityQueue.h
index d1ea9ff..0097e04 100644
--- a/include/llvm/CodeGen/ResourcePriorityQueue.h
+++ b/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -131,6 +131,6 @@ private:
     unsigned numberRCValPredInSU (SUnit *SU, unsigned RCId);
     unsigned numberRCValSuccInSU (SUnit *SU, unsigned RCId);
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 34adde5..2be5de6 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -429,7 +429,7 @@ namespace RTLIB {
   /// Return the SYNC_FETCH_AND_* value for the given opcode and type, or
   /// UNKNOWN_LIBCALL if there is none.
   Libcall getATOMIC(unsigned Opc, MVT VT);
-} // namespace RTLIB
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 9b5d59c..8391314 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -748,6 +748,6 @@ namespace llvm {
     reverse_iterator rend() { return Index2Node.rend(); }
     const_reverse_iterator rend() const { return Index2Node.rend(); }
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/include/llvm/CodeGen/ScheduleHazardRecognizer.h
index ef872a2..8a40e72 100644
--- a/include/llvm/CodeGen/ScheduleHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScheduleHazardRecognizer.h
@@ -106,6 +106,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
index 5911cfb..ab14c2d 100644
--- a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -121,6 +121,6 @@ public:
   void RecedeCycle() override;
 };
 
-} // namespace llvm
+}
 
 #endif //!LLVM_CODEGEN_SCOREBOARDHAZARDRECOGNIZER_H
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index aa50dea..c2b1243 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -495,6 +495,8 @@ public:
   SDValue getExternalSymbol(const char *Sym, SDLoc dl, EVT VT);
   SDValue getTargetExternalSymbol(const char *Sym, EVT VT,
                                   unsigned char TargetFlags = 0);
+  SDValue getMCSymbol(MCSymbol *Sym, EVT VT);
+
   SDValue getValueType(EVT);
   SDValue getRegister(unsigned Reg, EVT VT);
   SDValue getRegisterMask(const uint32_t *RegMask);
@@ -1278,6 +1280,7 @@ private:
   StringMap<SDNode*> ExternalSymbols;
 
   std::map<std::pair<std::string, unsigned char>,SDNode*> TargetExternalSymbols;
+  DenseMap<MCSymbol *, SDNode *> MCSymbols;
 };
 
 template <> struct GraphTraits<SelectionDAG*> : public GraphTraits<SDNode*> {
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index dc4fa2b..a011e4c 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -301,6 +301,6 @@ private:
 
 };
 
-} // namespace llvm
+}
 
 #endif /* LLVM_CODEGEN_SELECTIONDAGISEL_H */
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 4b65eaa..6191190 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -89,7 +89,7 @@ namespace ISD {
   /// Return true if the node has at least one operand
   /// and all operands of the specified node are ISD::UNDEF.
   bool allOperandsUndef(const SDNode *N);
-} // namespace ISD
+}  // end llvm:ISD namespace
 
 //===----------------------------------------------------------------------===//
 /// Unlike LLVM values, Selection DAG nodes may return multiple
@@ -579,6 +579,23 @@ public:
   op_iterator op_end() const { return OperandList+NumOperands; }
   ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
 
+  /// Iterator for directly iterating over the operand SDValue's.
+  struct value_op_iterator
+      : iterator_adaptor_base<value_op_iterator, op_iterator,
+                              std::random_access_iterator_tag, SDValue,
+                              ptrdiff_t, value_op_iterator *,
+                              value_op_iterator *> {
+    explicit value_op_iterator(SDUse *U = nullptr)
+      : iterator_adaptor_base(U) {}
+
+    const SDValue &operator*() const { return I->get(); }
+  };
+
+  iterator_range<value_op_iterator> op_values() const {
+    return iterator_range<value_op_iterator>(value_op_iterator(op_begin()),
+                                             value_op_iterator(op_end()));
+  }
+
   SDVTList getVTList() const {
     SDVTList X = { ValueList, NumValues };
     return X;
@@ -1810,6 +1827,21 @@ public:
   }
 };
 
+class MCSymbolSDNode : public SDNode {
+  MCSymbol *Symbol;
+
+  friend class SelectionDAG;
+  MCSymbolSDNode(MCSymbol *Symbol, EVT VT)
+      : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {}
+
+public:
+  MCSymbol *getMCSymbol() const { return Symbol; }
+
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::MCSymbol;
+  }
+};
+
 class CondCodeSDNode : public SDNode {
   ISD::CondCode Condition;
   friend class SelectionDAG;
@@ -2268,8 +2300,8 @@ namespace ISD {
     return isa<StoreSDNode>(N) &&
       cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
   }
-} // namespace ISD
+}
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index 5f21397..9d6d6f5 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -705,6 +705,6 @@ namespace llvm {
   struct IntervalMapInfo<SlotIndex> : IntervalMapHalfOpenInfo<SlotIndex> {
   };
 
-} // namespace llvm
+}
 
 #endif // LLVM_CODEGEN_SLOTINDEXES_H
diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h
index ba27404..46a773f 100644
--- a/include/llvm/CodeGen/StackMaps.h
+++ b/include/llvm/CodeGen/StackMaps.h
@@ -255,6 +255,6 @@ private:
   void debug() { print(dbgs()); }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 9a1b63f..10c099d 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -90,10 +90,6 @@ public:
   ~TargetLoweringObjectFileMachO() override {}
   TargetLoweringObjectFileMachO();
 
-  /// Extract the dependent library name from a linker option string. Returns
-  /// StringRef() if the option does not specify a library.
-  StringRef getDepLibFromLinkerOpt(StringRef LinkerOption) const override;
-
   /// Emit the module flags that specify the garbage collection information.
   void emitModuleFlags(MCStreamer &Streamer,
                        ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
@@ -150,10 +146,6 @@ public:
   MCSection *getSectionForJumpTable(const Function &F, Mangler &Mang,
                                     const TargetMachine &TM) const override;
 
-  /// Extract the dependent library name from a linker option string. Returns
-  /// StringRef() if the option does not specify a library.
-  StringRef getDepLibFromLinkerOpt(StringRef LinkerOption) const override;
-
   /// Emit Obj-C garbage collection and linker options. Only linker option
   /// emission is implemented for COFF.
   void emitModuleFlags(MCStreamer &Streamer,
@@ -164,6 +156,9 @@ public:
                                   const MCSymbol *KeySym) const override;
   MCSection *getStaticDtorSection(unsigned Priority,
                                   const MCSymbol *KeySym) const override;
+
+  void emitLinkerFlagsForGlobal(raw_ostream &OS, const GlobalValue *GV,
+                                const Mangler &Mang) const override;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index e02d7db..e1a9fd3 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -361,6 +361,6 @@ namespace llvm {
     unsigned getExtendedSizeInBits() const;
   };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h
index 5b771d0..d7e9209 100644
--- a/include/llvm/CodeGen/VirtRegMap.h
+++ b/include/llvm/CodeGen/VirtRegMap.h
@@ -185,6 +185,6 @@ namespace llvm {
     VRM.print(OS);
     return OS;
   }
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/WinEHFuncInfo.h b/include/llvm/CodeGen/WinEHFuncInfo.h
index 5c1b3df..291f390 100644
--- a/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -161,5 +161,5 @@ struct WinEHFuncInfo {
 void calculateWinCXXEHStateNumbers(const Function *ParentFn,
                                    WinEHFuncInfo &FuncInfo);
 
-} // namespace llvm
+}
 #endif // LLVM_CODEGEN_WINEHFUNCINFO_H
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index 1712e58..b9fd450 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -125,9 +125,6 @@
 /* Define if you have the libdl library or equivalent. */
 #cmakedefine HAVE_LIBDL ${HAVE_LIBDL}
 
-/* Define to 1 if you have the `imagehlp' library (-limagehlp). */
-#cmakedefine HAVE_LIBIMAGEHLP ${HAVE_LIBIMAGEHLP}
-
 /* Define to 1 if you have the `m' library (-lm). */
 #undef HAVE_LIBM
 
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index 49d1b1f..0970649 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -137,9 +137,6 @@
 /* Define if libedit is available on this platform. */
 #undef HAVE_LIBEDIT
 
-/* Define to 1 if you have the `imagehlp' library (-limagehlp). */
-#undef HAVE_LIBIMAGEHLP
-
 /* Define to 1 if you have the `m' library (-lm). */
 #undef HAVE_LIBM
 
diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h
index 8e5794d..871e60c 100644
--- a/include/llvm/DebugInfo/DIContext.h
+++ b/include/llvm/DebugInfo/DIContext.h
@@ -172,6 +172,6 @@ public:
   virtual std::unique_ptr<LoadedObjectInfo> clone() const = 0;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
index 72f304a..6ab5d5c 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
@@ -57,6 +57,6 @@ private:
   AttributeSpecVector AttributeSpecs;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
index f891438..47dbf5f 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
@@ -49,6 +49,6 @@ public:
   void dump(raw_ostream &OS) const;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
index 9f7527f..743f9c6 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
@@ -26,6 +26,6 @@ public:
   ~DWARFCompileUnit() override;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 0e29ad6..423c0d3 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -295,6 +295,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
index 88519ce..2114208 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
@@ -58,6 +58,6 @@ private:
   void clear();
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
index 15850b2..837a8e6 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
@@ -65,6 +65,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
index 58359fa..791f010 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
@@ -82,6 +82,6 @@ private:
   DenseSet<uint32_t> ParsedCUOffsets;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
index 3cbae41..f29d5fe 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
@@ -155,6 +155,6 @@ struct DWARFDebugInfoEntryInlinedChain {
   const DWARFUnit *U;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index e728d59..93e7c79 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -248,6 +248,6 @@ private:
   LineTableMapTy LineTableMap;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
index 6a3f2ad..bd44c2e 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
@@ -76,6 +76,6 @@ public:
   void parse(DataExtractor data);
   void dump(raw_ostream &OS) const;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 2d6bb0e..7ddcc0d 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -91,6 +91,6 @@ private:
   void dumpString(raw_ostream &OS, const DWARFUnit *U) const;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
index de853c3..f24e278 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
@@ -32,7 +32,7 @@ protected:
   bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) override;
 };
 
-} // namespace llvm
+}
 
 #endif
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 54209cf..5604b93 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -279,6 +279,6 @@ private:
   const DWARFDebugInfoEntryMinimal *getSubprogramForAddress(uint64_t Address);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h b/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
index 8a06d55..b5fa8c3 100644
--- a/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
+++ b/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
@@ -54,6 +54,6 @@ private:
 
   std::unique_ptr<IPDBEnumSymbols> Enumerator;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/PDB/IPDBDataStream.h b/include/llvm/DebugInfo/PDB/IPDBDataStream.h
index 429cd7e..808a0f3 100644
--- a/include/llvm/DebugInfo/PDB/IPDBDataStream.h
+++ b/include/llvm/DebugInfo/PDB/IPDBDataStream.h
@@ -32,6 +32,6 @@ public:
   virtual void reset() = 0;
   virtual IPDBDataStream *clone() const = 0;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h b/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
index 5001a95..645ac96 100644
--- a/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
+++ b/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
@@ -28,6 +28,6 @@ public:
   virtual void reset() = 0;
   virtual MyType *clone() const = 0;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/PDB/IPDBLineNumber.h b/include/llvm/DebugInfo/PDB/IPDBLineNumber.h
index 30036df..92cd58d 100644
--- a/include/llvm/DebugInfo/PDB/IPDBLineNumber.h
+++ b/include/llvm/DebugInfo/PDB/IPDBLineNumber.h
@@ -31,6 +31,6 @@ public:
   virtual uint32_t getCompilandId() const = 0;
   virtual bool isStatement() const = 0;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/PDB/IPDBSession.h b/include/llvm/DebugInfo/PDB/IPDBSession.h
index 1dca911..a130a38 100644
--- a/include/llvm/DebugInfo/PDB/IPDBSession.h
+++ b/include/llvm/DebugInfo/PDB/IPDBSession.h
@@ -56,6 +56,6 @@ public:
 
   virtual std::unique_ptr<IPDBEnumDataStreams> getDebugStreams() const = 0;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/PDB/IPDBSourceFile.h b/include/llvm/DebugInfo/PDB/IPDBSourceFile.h
index 8081ea5..55000ef 100644
--- a/include/llvm/DebugInfo/PDB/IPDBSourceFile.h
+++ b/include/llvm/DebugInfo/PDB/IPDBSourceFile.h
@@ -32,6 +32,6 @@ public:
   virtual PDB_Checksum getChecksumType() const = 0;
   virtual std::unique_ptr<IPDBEnumSymbols> getCompilands() const = 0;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/PDB/PDBContext.h b/include/llvm/DebugInfo/PDB/PDBContext.h
index 3b4a77e..2bb9746 100644
--- a/include/llvm/DebugInfo/PDB/PDBContext.h
+++ b/include/llvm/DebugInfo/PDB/PDBContext.h
@@ -55,6 +55,6 @@ private:
   std::string getFunctionName(uint64_t Address, DINameKind NameKind) const;
   std::unique_ptr<IPDBSession> Session;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/PDB/PDBExtras.h b/include/llvm/DebugInfo/PDB/PDBExtras.h
index 64f9694..48ce1c1 100644
--- a/include/llvm/DebugInfo/PDB/PDBExtras.h
+++ b/include/llvm/DebugInfo/PDB/PDBExtras.h
@@ -33,6 +33,6 @@ raw_ostream &operator<<(raw_ostream &OS, const PDB_UniqueId &Id);
 raw_ostream &operator<<(raw_ostream &OS, const Variant &Value);
 raw_ostream &operator<<(raw_ostream &OS, const VersionInfo &Version);
 raw_ostream &operator<<(raw_ostream &OS, const TagStats &Stats);
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/PDB/PDBSymDumper.h b/include/llvm/DebugInfo/PDB/PDBSymDumper.h
index ffd31a5..65110f3 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymDumper.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymDumper.h
@@ -56,6 +56,6 @@ public:
 private:
   bool RequireImpl;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h b/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
index bd85e60..c055dd7 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
@@ -34,6 +34,6 @@ public:
   // FORWARD_SYMBOL_METHOD(getValue)
   FORWARD_SYMBOL_METHOD(getVirtualAddress)
 };
-} // namespace llvm
+}
 
 #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLANNOTATION_H
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h b/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
index 6782179..2ca1250 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
@@ -36,6 +36,6 @@ public:
   FORWARD_SYMBOL_METHOD(getSymIndexId)
   FORWARD_SYMBOL_METHOD(getVirtualAddress)
 };
-} // namespace llvm
+}
 
 #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLBLOCK_H
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h b/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
index d92830f..f8c796a 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
@@ -33,6 +33,6 @@ public:
   FORWARD_SYMBOL_METHOD(getSourceFileName)
   FORWARD_SYMBOL_METHOD(getSymIndexId)
 };
-} // namespace llvm
+}
 
 #endif // LLVM_DEBUGINFO_PDB_PDBSYMBOLCOMPILAND_H
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index f86490b..e8af601 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -634,6 +634,6 @@ public:
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionEngine, LLVMExecutionEngineRef)
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/ExecutionEngine/GenericValue.h b/include/llvm/ExecutionEngine/GenericValue.h
index ea5ddfc..0e92f79 100644
--- a/include/llvm/ExecutionEngine/GenericValue.h
+++ b/include/llvm/ExecutionEngine/GenericValue.h
@@ -49,5 +49,5 @@ struct GenericValue {
 inline GenericValue PTOGV(void *P) { return GenericValue(P); }
 inline void* GVTOP(const GenericValue &GV) { return GV.PointerVal; }
 
-} // namespace llvm
+} // End llvm namespace.
 #endif
diff --git a/include/llvm/ExecutionEngine/MCJIT.h b/include/llvm/ExecutionEngine/MCJIT.h
index 294f11d..66ddb7c 100644
--- a/include/llvm/ExecutionEngine/MCJIT.h
+++ b/include/llvm/ExecutionEngine/MCJIT.h
@@ -33,6 +33,6 @@ namespace {
       LLVMLinkInMCJIT();
     }
   } ForceMCJITLinking;
-} // namespace
+}
 
 #endif
diff --git a/include/llvm/ExecutionEngine/ObjectCache.h b/include/llvm/ExecutionEngine/ObjectCache.h
index 1084de8..cc01a4e 100644
--- a/include/llvm/ExecutionEngine/ObjectCache.h
+++ b/include/llvm/ExecutionEngine/ObjectCache.h
@@ -35,6 +35,6 @@ public:
   virtual std::unique_ptr<MemoryBuffer> getObject(const Module* M) = 0;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 4c515db..9694b80 100644
--- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -241,11 +241,10 @@ private:
   }
 
   static std::string Mangle(StringRef Name, const DataLayout &DL) {
-    Mangler M(&DL);
     std::string MangledName;
     {
       raw_string_ostream MangledNameStream(MangledName);
-      M.getNameWithPrefix(MangledNameStream, Name);
+      Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
     }
     return MangledName;
   }
diff --git a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
index 71c83f7..93ba02b 100644
--- a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
@@ -193,7 +193,7 @@ private:
       auto Symbols = llvm::make_unique<StringMap<const GlobalValue*>>();
 
       for (const auto &M : Ms) {
-        Mangler Mang(&M->getDataLayout());
+        Mangler Mang;
 
         for (const auto &V : M->globals())
           if (auto GV = addGlobalValue(*Symbols, V, Mang, SearchName,
diff --git a/include/llvm/ExecutionEngine/Orc/NullResolver.h b/include/llvm/ExecutionEngine/Orc/NullResolver.h
new file mode 100644
index 0000000..1560c6d
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/NullResolver.h
@@ -0,0 +1,36 @@
+//===------ NullResolver.h - Reject symbol lookup requests ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//   Defines a RuntimeDyld::SymbolResolver subclass that rejects all symbol
+// resolution requests, for clients that have no cross-object fixups.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_NULLRESOLVER_H
+#define LLVM_EXECUTIONENGINE_ORC_NULLRESOLVER_H
+
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+
+namespace llvm {
+namespace orc {
+
+/// SymbolResolver impliementation that rejects all resolution requests.
+/// Useful for clients that have no cross-object fixups.
+class NullResolver : public RuntimeDyld::SymbolResolver {
+public:
+  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) final;
+
+  RuntimeDyld::SymbolInfo
+  findSymbolInLogicalDylib(const std::string &Name) final;
+};
+
+} // End namespace orc.
+} // End namespace llvm.
+
+#endif // LLVM_EXECUTIONENGINE_ORC_NULLRESOLVER_H
diff --git a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
new file mode 100644
index 0000000..7af6620
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
@@ -0,0 +1,112 @@
+//===- ObjectTransformLayer.h - Run all objects through functor -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Run all objects passed in through a user supplied functor.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_OBJECTTRANSFORMLAYER_H
+#define LLVM_EXECUTIONENGINE_ORC_OBJECTTRANSFORMLAYER_H
+
+#include "JITSymbol.h"
+
+namespace llvm {
+namespace orc {
+
+/// @brief Object mutating layer.
+///
+///   This layer accepts sets of ObjectFiles (via addObjectSet). It
+/// immediately applies the user supplied functor to each object, then adds
+/// the set of transformed objects to the layer below.
+template <typename BaseLayerT, typename TransformFtor>
+class ObjectTransformLayer {
+public:
+  /// @brief Handle to a set of added objects.
+  typedef typename BaseLayerT::ObjSetHandleT ObjSetHandleT;
+
+  /// @brief Construct an ObjectTransformLayer with the given BaseLayer
+  ObjectTransformLayer(BaseLayerT &BaseLayer,
+                       TransformFtor Transform = TransformFtor())
+      : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+
+  /// @brief Apply the transform functor to each object in the object set, then
+  ///        add the resulting set of objects to the base layer, along with the
+  ///        memory manager and symbol resolver.
+  ///
+  /// @return A handle for the added objects.
+  template <typename ObjSetT, typename MemoryManagerPtrT,
+            typename SymbolResolverPtrT>
+  ObjSetHandleT addObjectSet(ObjSetT &Objects, MemoryManagerPtrT MemMgr,
+                             SymbolResolverPtrT Resolver) {
+
+    for (auto I = Objects.begin(), E = Objects.end(); I != E; ++I)
+      *I = Transform(std::move(*I));
+
+    return BaseLayer.addObjectSet(Objects, std::move(MemMgr),
+                                  std::move(Resolver));
+  }
+
+  /// @brief Remove the object set associated with the handle H.
+  void removeObjectSet(ObjSetHandleT H) { BaseLayer.removeObjectSet(H); }
+
+  /// @brief Search for the given named symbol.
+  /// @param Name The name of the symbol to search for.
+  /// @param ExportedSymbolsOnly If true, search only for exported symbols.
+  /// @return A handle for the given named symbol, if it exists.
+  JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
+    return BaseLayer.findSymbol(Name, ExportedSymbolsOnly);
+  }
+
+  /// @brief Get the address of the given symbol in the context of the set of
+  ///        objects represented by the handle H. This call is forwarded to the
+  ///        base layer's implementation.
+  /// @param H The handle for the object set to search in.
+  /// @param Name The name of the symbol to search for.
+  /// @param ExportedSymbolsOnly If true, search only for exported symbols.
+  /// @return A handle for the given named symbol, if it is found in the
+  ///         given object set.
+  JITSymbol findSymbolIn(ObjSetHandleT H, const std::string &Name,
+                         bool ExportedSymbolsOnly) {
+    return BaseLayer.findSymbolIn(H, Name, ExportedSymbolsOnly);
+  }
+
+  /// @brief Immediately emit and finalize the object set represented by the
+  ///        given handle.
+  /// @param H Handle for object set to emit/finalize.
+  void emitAndFinalize(ObjSetHandleT H) { BaseLayer.emitAndFinalize(H); }
+
+  /// @brief Map section addresses for the objects associated with the handle H.
+  void mapSectionAddress(ObjSetHandleT H, const void *LocalAddress,
+                         TargetAddress TargetAddr) {
+    BaseLayer.mapSectionAddress(H, LocalAddress, TargetAddr);
+  }
+
+  // Ownership hack.
+  // FIXME: Remove this as soon as RuntimeDyldELF can apply relocations without
+  //        referencing the original object.
+  template <typename OwningMBSet>
+  void takeOwnershipOfBuffers(ObjSetHandleT H, OwningMBSet MBs) {
+    BaseLayer.takeOwnershipOfBuffers(H, std::move(MBs));
+  }
+
+  /// @brief Access the transform functor directly.
+  TransformFtor &getTransform() { return Transform; }
+
+  /// @brief Access the mumate functor directly.
+  const TransformFtor &getTransform() const { return Transform; }
+
+private:
+  BaseLayerT &BaseLayer;
+  TransformFtor Transform;
+};
+
+} // End namespace orc.
+} // End namespace llvm.
+
+#endif // LLVM_EXECUTIONENGINE_ORC_OBJECTTRANSFORMLAYER_H
diff --git a/include/llvm/ExecutionEngine/SectionMemoryManager.h b/include/llvm/ExecutionEngine/SectionMemoryManager.h
index bbf9968..0b0dcb0 100644
--- a/include/llvm/ExecutionEngine/SectionMemoryManager.h
+++ b/include/llvm/ExecutionEngine/SectionMemoryManager.h
@@ -100,7 +100,7 @@ private:
   MemoryGroup RODataMem;
 };
 
-} // namespace llvm
+}
 
 #endif // LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
 
diff --git a/include/llvm/IR/Argument.h b/include/llvm/IR/Argument.h
index 12c8df5..fc04fe7 100644
--- a/include/llvm/IR/Argument.h
+++ b/include/llvm/IR/Argument.h
@@ -131,6 +131,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/AssemblyAnnotationWriter.h b/include/llvm/IR/AssemblyAnnotationWriter.h
index 1ae3018..19e32a2 100644
--- a/include/llvm/IR/AssemblyAnnotationWriter.h
+++ b/include/llvm/IR/AssemblyAnnotationWriter.h
@@ -58,6 +58,6 @@ public:
   virtual void printInfoComment(const Value &, formatted_raw_ostream &) {}
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h
index 1d92d18..366bf70 100644
--- a/include/llvm/IR/Attributes.h
+++ b/include/llvm/IR/Attributes.h
@@ -575,6 +575,6 @@ AttrBuilder typeIncompatible(const Type *Ty);
 
 } // end AttributeFuncs namespace
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/IR/AutoUpgrade.h b/include/llvm/IR/AutoUpgrade.h
index 9ecabec6..a4b3c41 100644
--- a/include/llvm/IR/AutoUpgrade.h
+++ b/include/llvm/IR/AutoUpgrade.h
@@ -66,6 +66,6 @@ namespace llvm {
 
   /// Upgrade a metadata string constant in place.
   void UpgradeMDStringConstant(std::string &String);
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h
index b0fad4f..66581bf 100644
--- a/include/llvm/IR/BasicBlock.h
+++ b/include/llvm/IR/BasicBlock.h
@@ -346,6 +346,6 @@ inline BasicBlock *ilist_traits<BasicBlock>::createSentinel() const {
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(BasicBlock, LLVMBasicBlockRef)
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/CFG.h b/include/llvm/IR/CFG.h
index e6e21b4..f78220a 100644
--- a/include/llvm/IR/CFG.h
+++ b/include/llvm/IR/CFG.h
@@ -396,6 +396,6 @@ template <> struct GraphTraits<Inverse<const Function*> > :
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h
index 0270caa..dd2903e 100644
--- a/include/llvm/IR/CallSite.h
+++ b/include/llvm/IR/CallSite.h
@@ -38,6 +38,7 @@ class CallInst;
 class InvokeInst;
 
 template <typename FunTy = const Function,
+          typename BBTy = const BasicBlock,
           typename ValTy = const Value,
           typename UserTy = const User,
           typename InstrTy = const Instruction,
@@ -82,6 +83,9 @@ public:
   InstrTy *operator->() const { return I.getPointer(); }
   explicit operator bool() const { return I.getPointer(); }
 
+  /// Get the basic block containing the call site
+  BBTy* getParent() const { return getInstruction()->getParent(); }
+
   /// getCalledValue - Return the pointer to function that is being called.
   ///
   ValTy *getCalledValue() const {
@@ -189,6 +193,20 @@ public:
   else                                   \
     cast<InvokeInst>(II)->METHOD
 
+  unsigned getNumArgOperands() const {
+    CALLSITE_DELEGATE_GETTER(getNumArgOperands());
+  }
+
+  ValTy *getArgOperand(unsigned i) const { 
+    CALLSITE_DELEGATE_GETTER(getArgOperand(i));
+  }
+
+  bool isInlineAsm() const { 
+    if (isCall())
+      return cast<CallInst>(getInstruction())->isInlineAsm();
+    return false;
+  }
+
   /// getCallingConv/setCallingConv - get or set the calling convention of the
   /// call.
   CallingConv::ID getCallingConv() const {
@@ -366,8 +384,9 @@ private:
   }
 };
 
-class CallSite : public CallSiteBase<Function, Value, User, Instruction,
-                                     CallInst, InvokeInst, User::op_iterator> {
+class CallSite : public CallSiteBase<Function, BasicBlock, Value, User,
+                                     Instruction, CallInst, InvokeInst,
+                                     User::op_iterator> {
 public:
   CallSite() {}
   CallSite(CallSiteBase B) : CallSiteBase(B) {}
@@ -397,6 +416,6 @@ public:
   ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {}
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h
index 846e58c..9872e6e 100644
--- a/include/llvm/IR/CallingConv.h
+++ b/include/llvm/IR/CallingConv.h
@@ -146,8 +146,8 @@ namespace CallingConv {
     /// in SSE registers.
     X86_VectorCall = 80
   };
-} // namespace CallingConv
+} // End CallingConv namespace
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Comdat.h b/include/llvm/IR/Comdat.h
index 50b11be..4d4c15f 100644
--- a/include/llvm/IR/Comdat.h
+++ b/include/llvm/IR/Comdat.h
@@ -61,6 +61,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Comdat &C) {
   return OS;
 }
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h
index 7db09d0..019b434 100644
--- a/include/llvm/IR/Constant.h
+++ b/include/llvm/IR/Constant.h
@@ -47,9 +47,6 @@ protected:
   Constant(Type *ty, ValueTy vty, Use *Ops, unsigned NumOps)
     : User(ty, vty, Ops, NumOps) {}
 
-  void destroyConstantImpl();
-  void replaceUsesOfWithOnConstantImpl(Constant *Replacement);
-
 public:
   /// isNullValue - Return true if this is the value that would be returned by
   /// getNullValue.
@@ -126,14 +123,14 @@ public:
   /// vector of constant integers, all equal, and the common value is returned.
   const APInt &getUniqueInteger() const;
 
-  /// destroyConstant - Called if some element of this constant is no longer
-  /// valid.  At this point only other constants may be on the use_list for this
+  /// Called if some element of this constant is no longer valid.
+  /// At this point only other constants may be on the use_list for this
   /// constant.  Any constants on our Use list must also be destroy'd.  The
   /// implementation must be sure to remove the constant from the list of
-  /// available cached constants.  Implementations should call
-  /// destroyConstantImpl as the last thing they do, to destroy all users and
-  /// delete this.
-  virtual void destroyConstant() { llvm_unreachable("Not reached!"); }
+  /// available cached constants.  Implementations should implement
+  /// destroyConstantImpl to remove constants from any pools/maps they are
+  /// contained it.
+  void destroyConstant();
 
   //// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Value *V) {
@@ -141,8 +138,8 @@ public:
            V->getValueID() <= ConstantLastVal;
   }
 
-  /// replaceUsesOfWithOnConstant - This method is a special form of
-  /// User::replaceUsesOfWith (which does not work on constants) that does work
+  /// This method is a special form of User::replaceUsesOfWith
+  /// (which does not work on constants) that does work
   /// on constants.  Basically this method goes through the trouble of building
   /// a new constant that is equivalent to the current one, with all uses of
   /// From replaced with uses of To.  After this construction is completed, all
@@ -151,15 +148,7 @@ public:
   /// use Value::replaceAllUsesWith, which automatically dispatches to this
   /// method as needed.
   ///
-  virtual void replaceUsesOfWithOnConstant(Value *, Value *, Use *) {
-    // Provide a default implementation for constants (like integers) that
-    // cannot use any other values.  This cannot be called at runtime, but needs
-    // to be here to avoid link errors.
-    assert(getNumOperands() == 0 && "replaceUsesOfWithOnConstant must be "
-           "implemented for all constants that have operands!");
-    llvm_unreachable("Constants that do not have operands cannot be using "
-                     "'From'!");
-  }
+  void handleOperandChange(Value *, Value *, Use *);
 
   static Constant *getNullValue(Type* Ty);
 
@@ -187,6 +176,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/ConstantFolder.h b/include/llvm/IR/ConstantFolder.h
index 4e87cd0..fb6ca3b 100644
--- a/include/llvm/IR/ConstantFolder.h
+++ b/include/llvm/IR/ConstantFolder.h
@@ -240,6 +240,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/IR/ConstantRange.h b/include/llvm/IR/ConstantRange.h
index 8a7488e..9ded3ca 100644
--- a/include/llvm/IR/ConstantRange.h
+++ b/include/llvm/IR/ConstantRange.h
@@ -273,6 +273,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ConstantRange &CR) {
   return OS;
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h
index b2ef77b..0c7a84f 100644
--- a/include/llvm/IR/Constants.h
+++ b/include/llvm/IR/Constants.h
@@ -50,6 +50,11 @@ class ConstantInt : public Constant {
   ConstantInt(const ConstantInt &) = delete;
   ConstantInt(IntegerType *Ty, const APInt& V);
   APInt Val;
+
+  friend class Constant;
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
 protected:
   // allocate space for exactly zero operands
   void *operator new(size_t s) {
@@ -231,6 +236,11 @@ class ConstantFP : public Constant {
   void *operator new(size_t, unsigned) = delete;
   ConstantFP(const ConstantFP &) = delete;
   friend class LLVMContextImpl;
+
+  friend class Constant;
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
 protected:
   ConstantFP(Type *Ty, const APFloat& V);
 protected:
@@ -297,6 +307,11 @@ public:
 class ConstantAggregateZero : public Constant {
   void *operator new(size_t, unsigned) = delete;
   ConstantAggregateZero(const ConstantAggregateZero &) = delete;
+
+  friend class Constant;
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
 protected:
   explicit ConstantAggregateZero(Type *ty)
     : Constant(ty, ConstantAggregateZeroVal, nullptr, 0) {}
@@ -308,8 +323,6 @@ protected:
 public:
   static ConstantAggregateZero *get(Type *Ty);
 
-  void destroyConstant() override;
-
   /// getSequentialElement - If this CAZ has array or vector type, return a zero
   /// with the right element type.
   Constant *getSequentialElement() const;
@@ -343,6 +356,11 @@ public:
 class ConstantArray : public Constant {
   friend struct ConstantAggrKeyType<ConstantArray>;
   ConstantArray(const ConstantArray &) = delete;
+
+  friend class Constant;
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
 protected:
   ConstantArray(ArrayType *T, ArrayRef<Constant *> Val);
 public:
@@ -363,9 +381,6 @@ public:
     return cast<ArrayType>(Value::getType());
   }
 
-  void destroyConstant() override;
-  void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) override;
-
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantArrayVal;
@@ -385,6 +400,11 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantArray, Constant)
 class ConstantStruct : public Constant {
   friend struct ConstantAggrKeyType<ConstantStruct>;
   ConstantStruct(const ConstantStruct &) = delete;
+
+  friend class Constant;
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
 protected:
   ConstantStruct(StructType *T, ArrayRef<Constant *> Val);
 public:
@@ -421,9 +441,6 @@ public:
     return cast<StructType>(Value::getType());
   }
 
-  void destroyConstant() override;
-  void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) override;
-
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantStructVal;
@@ -444,6 +461,11 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantStruct, Constant)
 class ConstantVector : public Constant {
   friend struct ConstantAggrKeyType<ConstantVector>;
   ConstantVector(const ConstantVector &) = delete;
+
+  friend class Constant;
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
 protected:
   ConstantVector(VectorType *T, ArrayRef<Constant *> Val);
 public:
@@ -472,9 +494,6 @@ public:
   /// elements have the same value, return that value. Otherwise return NULL.
   Constant *getSplatValue() const;
 
-  void destroyConstant() override;
-  void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) override;
-
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantVectorVal;
@@ -494,6 +513,11 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantVector, Constant)
 class ConstantPointerNull : public Constant {
   void *operator new(size_t, unsigned) = delete;
   ConstantPointerNull(const ConstantPointerNull &) = delete;
+
+  friend class Constant;
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
 protected:
   explicit ConstantPointerNull(PointerType *T)
     : Constant(T,
@@ -508,8 +532,6 @@ public:
   /// get() - Static factory methods - Return objects of the specified value
   static ConstantPointerNull *get(PointerType *T);
 
-  void destroyConstant() override;
-
   /// getType - Specialize the getType() method to always return an PointerType,
   /// which reduces the amount of casting needed in parts of the compiler.
   ///
@@ -545,6 +567,11 @@ class ConstantDataSequential : public Constant {
   ConstantDataSequential *Next;
   void *operator new(size_t, unsigned) = delete;
   ConstantDataSequential(const ConstantDataSequential &) = delete;
+
+  friend class Constant;
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
 protected:
   explicit ConstantDataSequential(Type *ty, ValueTy VT, const char *Data)
     : Constant(ty, VT, nullptr, 0), DataElements(Data), Next(nullptr) {}
@@ -635,8 +662,6 @@ public:
   /// host endianness of the data elements.
   StringRef getRawDataValues() const;
 
-  void destroyConstant() override;
-
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   ///
   static bool classof(const Value *V) {
@@ -778,6 +803,11 @@ class BlockAddress : public Constant {
   void *operator new(size_t, unsigned) = delete;
   void *operator new(size_t s) { return User::operator new(s, 2); }
   BlockAddress(Function *F, BasicBlock *BB);
+
+  friend class Constant;
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
 public:
   /// get - Return a BlockAddress for the specified function and basic block.
   static BlockAddress *get(Function *F, BasicBlock *BB);
@@ -798,9 +828,6 @@ public:
   Function *getFunction() const { return (Function*)Op<0>().get(); }
   BasicBlock *getBasicBlock() const { return (BasicBlock*)Op<1>().get(); }
 
-  void destroyConstant() override;
-  void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) override;
-
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Value *V) {
     return V->getValueID() == BlockAddressVal;
@@ -825,6 +852,10 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BlockAddress, Value)
 class ConstantExpr : public Constant {
   friend struct ConstantExprKeyType;
 
+  friend class Constant;
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
 protected:
   ConstantExpr(Type *ty, unsigned Opcode, Use *Ops, unsigned NumOps)
     : Constant(ty, ConstantExprVal, Ops, NumOps) {
@@ -1156,9 +1187,6 @@ public:
   /// would make it harder to remove ConstantExprs altogether.
   Instruction *getAsInstruction();
 
-  void destroyConstant() override;
-  void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) override;
-
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Value *V) {
     return V->getValueID() == ConstantExprVal;
@@ -1192,6 +1220,11 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantExpr, Constant)
 class UndefValue : public Constant {
   void *operator new(size_t, unsigned) = delete;
   UndefValue(const UndefValue &) = delete;
+
+  friend class Constant;
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
 protected:
   explicit UndefValue(Type *T) : Constant(T, UndefValueVal, nullptr, 0) {}
 protected:
@@ -1224,14 +1257,12 @@ public:
   /// \brief Return the number of elements in the array, vector, or struct.
   unsigned getNumElements() const;
 
-  void destroyConstant() override;
-
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Value *V) {
     return V->getValueID() == UndefValueVal;
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h
index 9971131..d6296b6 100644
--- a/include/llvm/IR/DIBuilder.h
+++ b/include/llvm/IR/DIBuilder.h
@@ -36,14 +36,9 @@ namespace llvm {
     Module &M;
     LLVMContext &VMContext;
 
-    TempMDTuple TempEnumTypes;
-    TempMDTuple TempRetainTypes;
-    TempMDTuple TempSubprograms;
-    TempMDTuple TempGVs;
-    TempMDTuple TempImportedModules;
-
-    Function *DeclareFn;     // llvm.dbg.declare
-    Function *ValueFn;       // llvm.dbg.value
+    DICompileUnit *CUNode;   ///< The one compile unit created by this DIBuiler.
+    Function *DeclareFn;     ///< llvm.dbg.declare
+    Function *ValueFn;       ///< llvm.dbg.value
 
     SmallVector<Metadata *, 4> AllEnumTypes;
     /// Track the RetainTypes, since they can be updated later on.
@@ -566,6 +561,20 @@ namespace llvm {
     DINamespace *createNameSpace(DIScope *Scope, StringRef Name, DIFile *File,
                                  unsigned LineNo);
 
+    /// createModule - This creates new descriptor for a module
+    /// with the specified parent scope.
+    /// @param Scope       Parent scope
+    /// @param Name        Name of this module
+    /// @param ConfigurationMacros
+    ///                    A space-separated shell-quoted list of -D macro
+    ///                    definitions as they would appear on a command line.
+    /// @param IncludePath The path to the module map file.
+    /// @param ISysRoot    The clang system root (value of -isysroot).
+    DIModule *createModule(DIScope *Scope, StringRef Name,
+                           StringRef ConfigurationMacros,
+                           StringRef IncludePath,
+                           StringRef ISysRoot);
+
     /// createLexicalBlockFile - This creates a descriptor for a lexical
     /// block with a new file attached. This merely extends the existing
     /// lexical block as it crosses a file.
@@ -598,6 +607,13 @@ namespace llvm {
     DIImportedEntity *createImportedModule(DIScope *Context,
                                            DIImportedEntity *NS, unsigned Line);
 
+    /// \brief Create a descriptor for an imported module.
+    /// @param Context The scope this module is imported into
+    /// @param M The module being imported here
+    /// @param Line Line number
+    DIImportedEntity *createImportedModule(DIScope *Context, DIModule *M,
+                                           unsigned Line);
+
     /// \brief Create a descriptor for an imported function.
     /// @param Context The scope this module is imported into
     /// @param Decl The declaration (or definition) of a function, type, or
diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h
index 81cf665..892d6c9 100644
--- a/include/llvm/IR/DataLayout.h
+++ b/include/llvm/IR/DataLayout.h
@@ -222,7 +222,9 @@ public:
   /// This representation is in the same format accepted by the string
   /// constructor above. This should not be used to compare two DataLayout as
   /// different string can represent the same layout.
-  std::string getStringRepresentation() const { return StringRepresentation; }
+  const std::string &getStringRepresentation() const {
+    return StringRepresentation;
+  }
 
   /// \brief Test if the DataLayout was constructed from an empty string.
   bool isDefault() const { return StringRepresentation.empty(); }
@@ -542,6 +544,6 @@ inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
   }
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h
index 03dd901..5c99300 100644
--- a/include/llvm/IR/DebugInfoMetadata.h
+++ b/include/llvm/IR/DebugInfoMetadata.h
@@ -218,6 +218,7 @@ public:
     case DILocalVariableKind:
     case DIObjCPropertyKind:
     case DIImportedEntityKind:
+    case DIModuleKind:
       return true;
     }
   }
@@ -443,6 +444,7 @@ public:
     case DILexicalBlockKind:
     case DILexicalBlockFileKind:
     case DINamespaceKind:
+    case DIModuleKind:
       return true;
     }
   }
@@ -1083,12 +1085,21 @@ public:
   /// deleted on a uniquing collision.  In practice, uniquing collisions on \a
   /// DICompileUnit should be fairly rare.
   /// @{
+  void replaceEnumTypes(DISubprogramArray N) {
+    replaceOperandWith(4, N.get());
+  }
+  void replaceRetainedTypes(DISubprogramArray N) {
+    replaceOperandWith(5, N.get());
+  }
   void replaceSubprograms(DISubprogramArray N) {
     replaceOperandWith(6, N.get());
   }
   void replaceGlobalVariables(DIGlobalVariableArray N) {
     replaceOperandWith(7, N.get());
   }
+  void replaceImportedEntities(DIGlobalVariableArray N) {
+    replaceOperandWith(8, N.get());
+  }
   /// @}
 
   static bool classof(const Metadata *MD) {
@@ -1623,6 +1634,66 @@ public:
   }
 };
 
+/// \brief A (clang) module that has been imported by the compile unit.
+///
+class DIModule : public DIScope {
+  friend class LLVMContextImpl;
+  friend class MDNode;
+
+  DIModule(LLVMContext &Context, StorageType Storage, ArrayRef<Metadata *> Ops)
+      : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops) {}
+  ~DIModule() {}
+
+  static DIModule *getImpl(LLVMContext &Context, DIScope *Scope,
+                           StringRef Name, StringRef ConfigurationMacros,
+                           StringRef IncludePath, StringRef ISysRoot,
+                           StorageType Storage, bool ShouldCreate = true) {
+    return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
+                   getCanonicalMDString(Context, ConfigurationMacros),
+		   getCanonicalMDString(Context, IncludePath),
+		   getCanonicalMDString(Context, ISysRoot),
+                   Storage, ShouldCreate);
+  }
+  static DIModule *getImpl(LLVMContext &Context, Metadata *Scope,
+                           MDString *Name, MDString *ConfigurationMacros,
+			   MDString *IncludePath, MDString *ISysRoot,
+			   StorageType Storage, bool ShouldCreate = true);
+
+  TempDIModule cloneImpl() const {
+    return getTemporary(getContext(), getScope(), getName(),
+                        getConfigurationMacros(), getIncludePath(),
+                        getISysRoot());
+  }
+
+public:
+  DEFINE_MDNODE_GET(DIModule, (DIScope *Scope, StringRef Name,
+			       StringRef ConfigurationMacros, StringRef IncludePath,
+			       StringRef ISysRoot),
+                    (Scope, Name, ConfigurationMacros, IncludePath, ISysRoot))
+  DEFINE_MDNODE_GET(DIModule,
+                    (Metadata *Scope, MDString *Name, MDString *ConfigurationMacros,
+		     MDString *IncludePath, MDString *ISysRoot),
+                    (Scope, Name, ConfigurationMacros, IncludePath, ISysRoot))
+
+  TempDIModule clone() const { return cloneImpl(); }
+
+  DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
+  StringRef getName() const { return getStringOperand(1); }
+  StringRef getConfigurationMacros() const { return getStringOperand(2); }
+  StringRef getIncludePath() const { return getStringOperand(3); }
+  StringRef getISysRoot() const { return getStringOperand(4); }
+
+  Metadata *getRawScope() const { return getOperand(0); }
+  MDString *getRawName() const { return getOperandAs<MDString>(1); }
+  MDString *getRawConfigurationMacros() const { return getOperandAs<MDString>(2); }
+  MDString *getRawIncludePath() const { return getOperandAs<MDString>(3); }
+  MDString *getRawISysRoot() const { return getOperandAs<MDString>(4); }
+
+  static bool classof(const Metadata *MD) {
+    return MD->getMetadataID() == DIModuleKind;
+  }
+};
+
 /// \brief Base class for template parameters.
 class DITemplateParameter : public DINode {
 protected:
diff --git a/include/llvm/IR/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h
index 9f2671a..4a94499 100644
--- a/include/llvm/IR/DerivedTypes.h
+++ b/include/llvm/IR/DerivedTypes.h
@@ -477,6 +477,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Dominators.h b/include/llvm/IR/Dominators.h
index 93f5ede..c1f208e 100644
--- a/include/llvm/IR/Dominators.h
+++ b/include/llvm/IR/Dominators.h
@@ -230,6 +230,6 @@ public:
   void print(raw_ostream &OS, const Module *M = nullptr) const override;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index f66ac0b..02ea056 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -618,6 +618,6 @@ struct OperandTraits<Function> : public OptionalOperandTraits<Function> {};
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(Function, Value)
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/GVMaterializer.h b/include/llvm/IR/GVMaterializer.h
index 433de3f..1d6c915 100644
--- a/include/llvm/IR/GVMaterializer.h
+++ b/include/llvm/IR/GVMaterializer.h
@@ -59,6 +59,6 @@ public:
   virtual std::vector<StructType *> getIdentifiedStructTypes() const = 0;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/GlobalAlias.h b/include/llvm/IR/GlobalAlias.h
index 2316749..ce73b7a 100644
--- a/include/llvm/IR/GlobalAlias.h
+++ b/include/llvm/IR/GlobalAlias.h
@@ -118,6 +118,6 @@ struct OperandTraits<GlobalAlias> :
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalAlias, Constant)
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/GlobalObject.h b/include/llvm/IR/GlobalObject.h
index 5f58c9c..f055241 100644
--- a/include/llvm/IR/GlobalObject.h
+++ b/include/llvm/IR/GlobalObject.h
@@ -71,6 +71,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h
index 5e1c5ff..f237970 100644
--- a/include/llvm/IR/GlobalValue.h
+++ b/include/llvm/IR/GlobalValue.h
@@ -90,6 +90,10 @@ private:
   // (19 + 3 + 2 + 1 + 2 + 5) == 32.
   unsigned SubClassData : GlobalValueSubClassDataBits;
 
+  friend class Constant;
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
 protected:
   /// \brief The intrinsic ID for this subclass (which must be a Function).
   ///
@@ -334,9 +338,6 @@ public:
 
 /// @}
 
-  /// Override from Constant class.
-  void destroyConstant() override;
-
   /// Return true if the primary definition of this global value is outside of
   /// the current translation unit.
   bool isDeclaration() const;
@@ -367,6 +368,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h
index 4269a70..a015983 100644
--- a/include/llvm/IR/GlobalVariable.h
+++ b/include/llvm/IR/GlobalVariable.h
@@ -45,7 +45,6 @@ class GlobalVariable : public GlobalObject, public ilist_node<GlobalVariable> {
                                                // can change from its initial
                                                // value before global
                                                // initializers are run?
-
 public:
   // allocate space for exactly one operand
   void *operator new(size_t s) {
@@ -166,10 +165,6 @@ public:
   ///
   void eraseFromParent() override;
 
-  /// Override Constant's implementation of this method so we can
-  /// replace constant initializers.
-  void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) override;
-
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Value *V) {
     return V->getValueID() == Value::GlobalVariableVal;
@@ -183,6 +178,6 @@ struct OperandTraits<GlobalVariable> :
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalVariable, Value)
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
index 0472ec5..e6b5393 100644
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -101,19 +101,8 @@ public:
   void SetInsertPoint(BasicBlock *TheBB, BasicBlock::iterator IP) {
     BB = TheBB;
     InsertPt = IP;
-  }
-
-  /// \brief Find the nearest point that dominates this use, and specify that
-  /// created instructions should be inserted at this point.
-  void SetInsertPoint(Use &U) {
-    Instruction *UseInst = cast<Instruction>(U.getUser());
-    if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
-      BasicBlock *PredBB = Phi->getIncomingBlock(U);
-      assert(U != PredBB->getTerminator() && "critical edge not split");
-      SetInsertPoint(PredBB, PredBB->getTerminator());
-      return;
-    }
-    SetInsertPoint(UseInst);
+    if (IP != TheBB->end())
+      SetCurrentDebugLocation(IP->getDebugLoc());
   }
 
   /// \brief Set location information used by debugging information.
@@ -550,13 +539,6 @@ public:
   explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = nullptr)
     : IRBuilderBase(IP->getContext(), FPMathTag), Folder() {
     SetInsertPoint(IP);
-    SetCurrentDebugLocation(IP->getDebugLoc());
-  }
-
-  explicit IRBuilder(Use &U, MDNode *FPMathTag = nullptr)
-    : IRBuilderBase(U->getContext(), FPMathTag), Folder() {
-    SetInsertPoint(U);
-    SetCurrentDebugLocation(cast<Instruction>(U.getUser())->getDebugLoc());
   }
 
   IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F,
@@ -1679,6 +1661,6 @@ public:
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRBuilder<>, LLVMBuilderRef)
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/IR/IRPrintingPasses.h b/include/llvm/IR/IRPrintingPasses.h
index 3969c83..5f1d56f 100644
--- a/include/llvm/IR/IRPrintingPasses.h
+++ b/include/llvm/IR/IRPrintingPasses.h
@@ -83,6 +83,6 @@ public:
   static StringRef name() { return "PrintFunctionPass"; }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h
index b5174c8..08b5102 100644
--- a/include/llvm/IR/InlineAsm.h
+++ b/include/llvm/IR/InlineAsm.h
@@ -358,6 +358,6 @@ public:
 
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/InstIterator.h b/include/llvm/IR/InstIterator.h
index a73d489..f3ce649 100644
--- a/include/llvm/IR/InstIterator.h
+++ b/include/llvm/IR/InstIterator.h
@@ -153,6 +153,6 @@ inline iterator_range<const_inst_iterator> inst_range(const Function &F) {
   return iterator_range<const_inst_iterator>(inst_begin(F), inst_end(F));
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/InstVisitor.h b/include/llvm/IR/InstVisitor.h
index 0eb337e..581e860 100644
--- a/include/llvm/IR/InstVisitor.h
+++ b/include/llvm/IR/InstVisitor.h
@@ -284,6 +284,6 @@ private:
 
 #undef DELEGATE
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h
index 9df7043..b791ded 100644
--- a/include/llvm/IR/InstrTypes.h
+++ b/include/llvm/IR/InstrTypes.h
@@ -139,7 +139,11 @@ protected:
                  const Twine &Name, Instruction *InsertBefore);
   BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty,
                  const Twine &Name, BasicBlock *InsertAtEnd);
-  BinaryOperator *clone_impl() const override;
+
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  BinaryOperator *cloneImpl() const;
+
 public:
   // allocate space for exactly two operands
   void *operator new(size_t s) {
@@ -894,6 +898,6 @@ struct OperandTraits<CmpInst> : public FixedNumOperandTraits<CmpInst, 2> {
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value)
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h
index 6fea926..6e3de1f 100644
--- a/include/llvm/IR/Instruction.h
+++ b/include/llvm/IR/Instruction.h
@@ -509,8 +509,10 @@ protected:
               Instruction *InsertBefore = nullptr);
   Instruction(Type *Ty, unsigned iType, Use *Ops, unsigned NumOps,
               BasicBlock *InsertAtEnd);
-  virtual Instruction *clone_impl() const = 0;
 
+private:
+  /// Create a copy of this instruction.
+  Instruction *cloneImpl() const;
 };
 
 inline Instruction *ilist_traits<Instruction>::createSentinel() const {
@@ -536,6 +538,6 @@ public:
   enum { NumLowBitsAvailable = 2 };
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index 369b7db..c5890f0 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -22,6 +22,7 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <iterator>
@@ -76,7 +77,10 @@ class AllocaInst : public UnaryInstruction {
   Type *AllocatedType;
 
 protected:
-  AllocaInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  AllocaInst *cloneImpl() const;
+
 public:
   explicit AllocaInst(Type *Ty, Value *ArraySize = nullptr,
                       const Twine &Name = "",
@@ -173,7 +177,10 @@ private:
 class LoadInst : public UnaryInstruction {
   void AssertOK();
 protected:
-  LoadInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  LoadInst *cloneImpl() const;
+
 public:
   LoadInst(Value *Ptr, const Twine &NameStr, Instruction *InsertBefore);
   LoadInst(Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd);
@@ -310,7 +317,10 @@ class StoreInst : public Instruction {
   void *operator new(size_t, unsigned) = delete;
   void AssertOK();
 protected:
-  StoreInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  StoreInst *cloneImpl() const;
+
 public:
   // allocate space for exactly two operands
   void *operator new(size_t s) {
@@ -436,7 +446,10 @@ class FenceInst : public Instruction {
   void *operator new(size_t, unsigned) = delete;
   void Init(AtomicOrdering Ordering, SynchronizationScope SynchScope);
 protected:
-  FenceInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  FenceInst *cloneImpl() const;
+
 public:
   // allocate space for exactly zero operands
   void *operator new(size_t s) {
@@ -505,7 +518,10 @@ class AtomicCmpXchgInst : public Instruction {
             AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering,
             SynchronizationScope SynchScope);
 protected:
-  AtomicCmpXchgInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  AtomicCmpXchgInst *cloneImpl() const;
+
 public:
   // allocate space for exactly three operands
   void *operator new(size_t s) {
@@ -658,7 +674,10 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicCmpXchgInst, Value)
 class AtomicRMWInst : public Instruction {
   void *operator new(size_t, unsigned) = delete;
 protected:
-  AtomicRMWInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  AtomicRMWInst *cloneImpl() const;
+
 public:
   /// This enumeration lists the possible modifications atomicrmw can make.  In
   /// the descriptions, 'p' is the pointer to the instruction's memory location,
@@ -827,7 +846,10 @@ class GetElementPtrInst : public Instruction {
                            const Twine &NameStr, BasicBlock *InsertAtEnd);
 
 protected:
-  GetElementPtrInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  GetElementPtrInst *cloneImpl() const;
+
 public:
   static GetElementPtrInst *Create(Type *PointeeType, Value *Ptr,
                                    ArrayRef<Value *> IdxList,
@@ -1078,8 +1100,11 @@ class ICmpInst: public CmpInst {
   }
 
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical ICmpInst
-  ICmpInst *clone_impl() const override;
+  ICmpInst *cloneImpl() const;
+
 public:
   /// \brief Constructor with insert-before-instruction semantics.
   ICmpInst(
@@ -1210,8 +1235,11 @@ public:
 /// \brief Represents a floating point comparison operator.
 class FCmpInst: public CmpInst {
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical FCmpInst
-  FCmpInst *clone_impl() const override;
+  FCmpInst *cloneImpl() const;
+
 public:
   /// \brief Constructor with insert-before-instruction semantics.
   FCmpInst(
@@ -1350,7 +1378,10 @@ class CallInst : public Instruction {
                     Instruction *InsertBefore);
   CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd);
 protected:
-  CallInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  CallInst *cloneImpl() const;
+
 public:
   static CallInst *Create(Value *Func,
                           ArrayRef<Value *> Args,
@@ -1478,6 +1509,9 @@ public:
   /// addAttribute - adds the attribute to the list of attributes.
   void addAttribute(unsigned i, Attribute::AttrKind attr);
 
+  /// addAttribute - adds the attribute to the list of attributes.
+  void addAttribute(unsigned i, StringRef Kind, StringRef Value);
+
   /// removeAttribute - removes the attribute from the list of attributes.
   void removeAttribute(unsigned i, Attribute attr);
 
@@ -1495,6 +1529,11 @@ public:
     return hasFnAttrImpl(A);
   }
 
+  /// \brief Determine whether this call has the given attribute.
+  bool hasFnAttr(StringRef A) const {
+    return hasFnAttrImpl(A);
+  }
+
   /// \brief Determine whether the call or the callee has the given attributes.
   bool paramHasAttr(unsigned i, Attribute::AttrKind A) const;
 
@@ -1621,7 +1660,14 @@ public:
   }
 private:
 
-  bool hasFnAttrImpl(Attribute::AttrKind A) const;
+  template<typename AttrKind>
+  bool hasFnAttrImpl(AttrKind A) const {
+    if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
+      return true;
+    if (const Function *F = getCalledFunction())
+      return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
+    return false;
+  }
 
   // Shadow Instruction::setInstructionSubclassData with a private forwarding
   // method so that subclasses cannot accidentally use it.
@@ -1687,7 +1733,10 @@ class SelectInst : public Instruction {
     setName(NameStr);
   }
 protected:
-  SelectInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  SelectInst *cloneImpl() const;
+
 public:
   static SelectInst *Create(Value *C, Value *S1, Value *S2,
                             const Twine &NameStr = "",
@@ -1742,7 +1791,9 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectInst, Value)
 ///
 class VAArgInst : public UnaryInstruction {
 protected:
-  VAArgInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  VAArgInst *cloneImpl() const;
 
 public:
   VAArgInst(Value *List, Type *Ty, const Twine &NameStr = "",
@@ -1782,7 +1833,9 @@ class ExtractElementInst : public Instruction {
   ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr,
                      BasicBlock *InsertAtEnd);
 protected:
-  ExtractElementInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  ExtractElementInst *cloneImpl() const;
 
 public:
   static ExtractElementInst *Create(Value *Vec, Value *Idx,
@@ -1843,7 +1896,9 @@ class InsertElementInst : public Instruction {
   InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
                     const Twine &NameStr, BasicBlock *InsertAtEnd);
 protected:
-  InsertElementInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  InsertElementInst *cloneImpl() const;
 
 public:
   static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx,
@@ -1896,7 +1951,9 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value)
 ///
 class ShuffleVectorInst : public Instruction {
 protected:
-  ShuffleVectorInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  ShuffleVectorInst *cloneImpl() const;
 
 public:
   // allocate space for exactly three operands
@@ -1997,7 +2054,9 @@ class ExtractValueInst : public UnaryInstruction {
     return User::operator new(s, 1);
   }
 protected:
-  ExtractValueInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  ExtractValueInst *cloneImpl() const;
 
 public:
   static ExtractValueInst *Create(Value *Agg,
@@ -2111,7 +2170,10 @@ class InsertValueInst : public Instruction {
   InsertValueInst(Value *Agg, Value *Val, unsigned Idx,
                   const Twine &NameStr, BasicBlock *InsertAtEnd);
 protected:
-  InsertValueInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  InsertValueInst *cloneImpl() const;
+
 public:
   // allocate space for exactly two operands
   void *operator new(size_t s) {
@@ -2252,7 +2314,10 @@ protected:
     User::allocHungoffUses(N, /* IsPhi */ true);
   }
 
-  PHINode *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  PHINode *cloneImpl() const;
+
 public:
   /// Constructors - NumReservedValues is a hint for the number of incoming
   /// edges that this phi node will have (use 0 if you really have no idea).
@@ -2445,7 +2510,10 @@ private:
                           const Twine &NameStr, BasicBlock *InsertAtEnd);
 
 protected:
-  LandingPadInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  LandingPadInst *cloneImpl() const;
+
 public:
   /// Constructors - NumReservedClauses is a hint for the number of incoming
   /// clauses that this landingpad will have (use 0 if you really have no idea).
@@ -2538,7 +2606,10 @@ private:
   ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd);
   explicit ReturnInst(LLVMContext &C, BasicBlock *InsertAtEnd);
 protected:
-  ReturnInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  ReturnInst *cloneImpl() const;
+
 public:
   static ReturnInst* Create(LLVMContext &C, Value *retVal = nullptr,
                             Instruction *InsertBefore = nullptr) {
@@ -2610,7 +2681,10 @@ class BranchInst : public TerminatorInst {
   BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
              BasicBlock *InsertAtEnd);
 protected:
-  BranchInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  BranchInst *cloneImpl() const;
+
 public:
   static BranchInst *Create(BasicBlock *IfTrue,
                             Instruction *InsertBefore = nullptr) {
@@ -2717,7 +2791,10 @@ class SwitchInst : public TerminatorInst {
   SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
              BasicBlock *InsertAtEnd);
 protected:
-  SwitchInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  SwitchInst *cloneImpl() const;
+
 public:
 
   // -2
@@ -3022,7 +3099,10 @@ class IndirectBrInst : public TerminatorInst {
   /// autoinserts at the end of the specified BasicBlock.
   IndirectBrInst(Value *Address, unsigned NumDests, BasicBlock *InsertAtEnd);
 protected:
-  IndirectBrInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  IndirectBrInst *cloneImpl() const;
+
 public:
   static IndirectBrInst *Create(Value *Address, unsigned NumDests,
                                 Instruction *InsertBefore = nullptr) {
@@ -3129,7 +3209,10 @@ class InvokeInst : public TerminatorInst {
                     ArrayRef<Value *> Args, unsigned Values,
                     const Twine &NameStr, BasicBlock *InsertAtEnd);
 protected:
-  InvokeInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  InvokeInst *cloneImpl() const;
+
 public:
   static InvokeInst *Create(Value *Func,
                             BasicBlock *IfNormal, BasicBlock *IfException,
@@ -3424,7 +3507,10 @@ class ResumeInst : public TerminatorInst {
   explicit ResumeInst(Value *Exn, Instruction *InsertBefore=nullptr);
   ResumeInst(Value *Exn, BasicBlock *InsertAtEnd);
 protected:
-  ResumeInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  ResumeInst *cloneImpl() const;
+
 public:
   static ResumeInst *Create(Value *Exn, Instruction *InsertBefore = nullptr) {
     return new(1) ResumeInst(Exn, InsertBefore);
@@ -3473,7 +3559,9 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value)
 class UnreachableInst : public TerminatorInst {
   void *operator new(size_t, unsigned) = delete;
 protected:
-  UnreachableInst *clone_impl() const override;
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  UnreachableInst *cloneImpl() const;
 
 public:
   // allocate space for exactly zero operands
@@ -3505,8 +3593,10 @@ private:
 /// \brief This class represents a truncation of integer types.
 class TruncInst : public CastInst {
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical TruncInst
-  TruncInst *clone_impl() const override;
+  TruncInst *cloneImpl() const;
 
 public:
   /// \brief Constructor with insert-before-instruction semantics
@@ -3541,8 +3631,10 @@ public:
 /// \brief This class represents zero extension of integer types.
 class ZExtInst : public CastInst {
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical ZExtInst
-  ZExtInst *clone_impl() const override;
+  ZExtInst *cloneImpl() const;
 
 public:
   /// \brief Constructor with insert-before-instruction semantics
@@ -3577,8 +3669,10 @@ public:
 /// \brief This class represents a sign extension of integer types.
 class SExtInst : public CastInst {
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical SExtInst
-  SExtInst *clone_impl() const override;
+  SExtInst *cloneImpl() const;
 
 public:
   /// \brief Constructor with insert-before-instruction semantics
@@ -3613,8 +3707,10 @@ public:
 /// \brief This class represents a truncation of floating point types.
 class FPTruncInst : public CastInst {
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical FPTruncInst
-  FPTruncInst *clone_impl() const override;
+  FPTruncInst *cloneImpl() const;
 
 public:
   /// \brief Constructor with insert-before-instruction semantics
@@ -3649,8 +3745,10 @@ public:
 /// \brief This class represents an extension of floating point types.
 class FPExtInst : public CastInst {
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical FPExtInst
-  FPExtInst *clone_impl() const override;
+  FPExtInst *cloneImpl() const;
 
 public:
   /// \brief Constructor with insert-before-instruction semantics
@@ -3685,8 +3783,10 @@ public:
 /// \brief This class represents a cast unsigned integer to floating point.
 class UIToFPInst : public CastInst {
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical UIToFPInst
-  UIToFPInst *clone_impl() const override;
+  UIToFPInst *cloneImpl() const;
 
 public:
   /// \brief Constructor with insert-before-instruction semantics
@@ -3721,8 +3821,10 @@ public:
 /// \brief This class represents a cast from signed integer to floating point.
 class SIToFPInst : public CastInst {
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical SIToFPInst
-  SIToFPInst *clone_impl() const override;
+  SIToFPInst *cloneImpl() const;
 
 public:
   /// \brief Constructor with insert-before-instruction semantics
@@ -3757,8 +3859,10 @@ public:
 /// \brief This class represents a cast from floating point to unsigned integer
 class FPToUIInst  : public CastInst {
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical FPToUIInst
-  FPToUIInst *clone_impl() const override;
+  FPToUIInst *cloneImpl() const;
 
 public:
   /// \brief Constructor with insert-before-instruction semantics
@@ -3793,8 +3897,10 @@ public:
 /// \brief This class represents a cast from floating point to signed integer.
 class FPToSIInst  : public CastInst {
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical FPToSIInst
-  FPToSIInst *clone_impl() const override;
+  FPToSIInst *cloneImpl() const;
 
 public:
   /// \brief Constructor with insert-before-instruction semantics
@@ -3845,8 +3951,10 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical IntToPtrInst
-  IntToPtrInst *clone_impl() const override;
+  IntToPtrInst *cloneImpl() const;
 
   /// \brief Returns the address space of this instruction's pointer type.
   unsigned getAddressSpace() const {
@@ -3869,8 +3977,10 @@ public:
 /// \brief This class represents a cast from a pointer to an integer
 class PtrToIntInst : public CastInst {
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical PtrToIntInst
-  PtrToIntInst *clone_impl() const override;
+  PtrToIntInst *cloneImpl() const;
 
 public:
   /// \brief Constructor with insert-before-instruction semantics
@@ -3917,8 +4027,10 @@ public:
 /// \brief This class represents a no-op cast from one type to another.
 class BitCastInst : public CastInst {
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical BitCastInst
-  BitCastInst *clone_impl() const override;
+  BitCastInst *cloneImpl() const;
 
 public:
   /// \brief Constructor with insert-before-instruction semantics
@@ -3954,8 +4066,10 @@ public:
 /// one address space to another.
 class AddrSpaceCastInst : public CastInst {
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
   /// \brief Clone an identical AddrSpaceCastInst
-  AddrSpaceCastInst *clone_impl() const override;
+  AddrSpaceCastInst *cloneImpl() const;
 
 public:
   /// \brief Constructor with insert-before-instruction semantics
@@ -3983,6 +4097,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h
index 102cbef..2c8b6eb 100644
--- a/include/llvm/IR/IntrinsicInst.h
+++ b/include/llvm/IR/IntrinsicInst.h
@@ -372,6 +372,6 @@ namespace llvm {
       return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
     }
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h
index 01781d5..43b8325 100644
--- a/include/llvm/IR/Intrinsics.h
+++ b/include/llvm/IR/Intrinsics.h
@@ -126,8 +126,8 @@ namespace Intrinsic {
   /// of IITDescriptors.
   void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl<IITDescriptor> &T);
 
-} // namespace Intrinsic
+} // End Intrinsic namespace
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td
index ce758e2..1dff808 100644
--- a/include/llvm/IR/IntrinsicsARM.td
+++ b/include/llvm/IR/IntrinsicsARM.td
@@ -83,11 +83,9 @@ def int_arm_vcvtru    : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
 
 // Move to coprocessor
 def int_arm_mcr : GCCBuiltin<"__builtin_arm_mcr">,
-                  MSBuiltin<"_MoveToCoprocessor">,
    Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
 def int_arm_mcr2 : GCCBuiltin<"__builtin_arm_mcr2">,
-                   MSBuiltin<"_MoveToCoprocessor2">,
    Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
                   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
 
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index d680085..05adc5a 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -687,6 +687,32 @@ def int_ppc_vsx_xsmindp : PowerPC_VSX_Sca_DDD_Intrinsic<"xsmindp">;
 // Vector divide.
 def int_ppc_vsx_xvdivdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvdivdp">;
 def int_ppc_vsx_xvdivsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvdivsp">;
+
+// Vector round-to-infinity (ceil)
+def int_ppc_vsx_xvrspip :
+      Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvrdpip :
+      Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+
+// Vector compare
+def int_ppc_vsx_xvcmpeqdp :
+      PowerPC_VSX_Intrinsic<"xvcmpeqdp", [llvm_v2i64_ty],
+                            [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpeqsp :
+      PowerPC_VSX_Intrinsic<"xvcmpeqsp", [llvm_v4i32_ty],
+                            [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpgedp :
+      PowerPC_VSX_Intrinsic<"xvcmpgedp", [llvm_v2i64_ty],
+                            [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpgesp :
+      PowerPC_VSX_Intrinsic<"xvcmpgesp", [llvm_v4i32_ty],
+                            [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpgtdp :
+      PowerPC_VSX_Intrinsic<"xvcmpgtdp", [llvm_v2i64_ty],
+                            [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpgtsp :
+      PowerPC_VSX_Intrinsic<"xvcmpgtsp", [llvm_v4i32_ty],
+                            [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index 1bed31c..b90825d 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -21,9 +21,17 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // SEH intrinsics for Windows
 let TargetPrefix = "x86" in {
   def int_x86_seh_lsda : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrNoMem]>;
-  def int_x86_seh_exceptioninfo : Intrinsic<[llvm_ptr_ty],
-                                            [llvm_ptr_ty, llvm_ptr_ty],
-					    [IntrReadMem]>;
+
+  // Restores the frame, base, and stack pointers as necessary after recovering
+  // from an exception. Any block resuming control flow in the parent function
+  // should call this before accessing any stack memory.
+  def int_x86_seh_restoreframe : Intrinsic<[], [], []>;
+
+  // Given a pointer to the end of an EH registration object, returns the true
+  // parent frame address that can be used with llvm.framerecover.
+  def int_x86_seh_recoverfp : Intrinsic<[llvm_ptr_ty],
+                                        [llvm_ptr_ty, llvm_ptr_ty],
+                                        [IntrNoMem]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1132,28 +1140,292 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
                   llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermt_d_512:
+  def int_x86_avx512_mask_vpermi2var_d_128 : 
+       GCCBuiltin<"__builtin_ia32_vpermi2vard128_mask">,
+        Intrinsic<[llvm_v4i32_ty],
+        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+        [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_d_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2vard256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_d_512 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2vard512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_hi_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2varhi128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+          [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_hi_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2varhi256_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+          [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_hi_512 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2varhi512_mask">,
+          Intrinsic<[llvm_v32i16_ty],
+          [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_pd_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2varpd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_pd_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2varpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_pd_512 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2varpd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_ps_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2varps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_ps_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2varps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_ps_512 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2varps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_q_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2varq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_q_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2varq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermi2var_q_512 : 
+        GCCBuiltin<"__builtin_ia32_vpermi2varq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermt2var_d_512:
         GCCBuiltin<"__builtin_ia32_vpermt2vard512_mask">,
         Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
                   llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermt_q_512:
+  def int_x86_avx512_mask_vpermt2var_q_512:
         GCCBuiltin<"__builtin_ia32_vpermt2varq512_mask">,
         Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
                   llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermt_ps_512:
+  def int_x86_avx512_mask_vpermt2var_ps_512:
         GCCBuiltin<"__builtin_ia32_vpermt2varps512_mask">,
         Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty,
                   llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermt_pd_512:
+  def int_x86_avx512_mask_vpermt2var_pd_512:
         GCCBuiltin<"__builtin_ia32_vpermt2varpd512_mask">,
         Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty,
                   llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
 
+  def int_x86_avx512_mask_vpermt2var_d_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2vard128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_d_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2vard128_maskz">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermt2var_d_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2vard256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_d_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2vard256_maskz">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_d_512 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2vard512_maskz">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermt2var_hi_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varhi128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+          [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_hi_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varhi128_maskz">,
+          Intrinsic<[llvm_v8i16_ty],
+          [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermt2var_hi_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varhi256_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+          [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_hi_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varhi256_maskz">,
+          Intrinsic<[llvm_v16i16_ty],
+          [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermt2var_hi_512 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varhi512_mask">,
+          Intrinsic<[llvm_v32i16_ty],
+          [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_hi_512 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varhi512_maskz">,
+          Intrinsic<[llvm_v32i16_ty],
+          [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermt2var_pd_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varpd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2i64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_pd_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varpd128_maskz">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2i64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermt2var_pd_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4i64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_pd_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varpd256_maskz">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4i64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_pd_512 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varpd512_maskz">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8i64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermt2var_ps_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4i32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_ps_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varps128_maskz">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4i32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermt2var_ps_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8i32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_ps_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varps256_maskz">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8i32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_ps_512 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varps512_maskz">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermt2var_q_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_q_128 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varq128_maskz">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermt2var_q_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_q_256 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varq256_maskz">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vpermt2var_q_512 : 
+        GCCBuiltin<"__builtin_ia32_vpermt2varq512_maskz">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pshuf_b_128 : 
+        GCCBuiltin<"__builtin_ia32_pshufb128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+          [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pshuf_b_256 : 
+        GCCBuiltin<"__builtin_ia32_pshufb256_mask">,
+          Intrinsic<[llvm_v32i8_ty],
+          [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pshuf_b_512 : 
+        GCCBuiltin<"__builtin_ia32_pshufb512_mask">,
+          Intrinsic<[llvm_v64i8_ty],
+          [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty,  llvm_i64_ty],
+          [IntrNoMem]>;
 }
 
+
 // Vector blend
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
@@ -1718,12 +1990,78 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
   def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">,
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pabs_d_512 : GCCBuiltin<"__builtin_ia32_pabsd512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                                           llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pabs_q_512 : GCCBuiltin<"__builtin_ia32_pabsq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                                          llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_b_128 : 
+       GCCBuiltin<"__builtin_ia32_pabsb128_mask">,
+        Intrinsic<[llvm_v16i8_ty],
+        [llvm_v16i8_ty, llvm_v16i8_ty,  llvm_i16_ty],
+        [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_b_256 : 
+        GCCBuiltin<"__builtin_ia32_pabsb256_mask">,
+          Intrinsic<[llvm_v32i8_ty],
+          [llvm_v32i8_ty, llvm_v32i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_b_512 : 
+        GCCBuiltin<"__builtin_ia32_pabsb512_mask">,
+          Intrinsic<[llvm_v64i8_ty],
+          [llvm_v64i8_ty, llvm_v64i8_ty,  llvm_i64_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_d_128 : 
+        GCCBuiltin<"__builtin_ia32_pabsd128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4i32_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_d_256 : 
+        GCCBuiltin<"__builtin_ia32_pabsd256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8i32_ty, llvm_v8i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_d_512 : 
+        GCCBuiltin<"__builtin_ia32_pabsd512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v16i32_ty, llvm_v16i32_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_q_128 : 
+        GCCBuiltin<"__builtin_ia32_pabsq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v2i64_ty, llvm_v2i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_q_256 : 
+        GCCBuiltin<"__builtin_ia32_pabsq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4i64_ty, llvm_v4i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_q_512 : 
+        GCCBuiltin<"__builtin_ia32_pabsq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8i64_ty, llvm_v8i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_w_128 : 
+        GCCBuiltin<"__builtin_ia32_pabsw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+          [llvm_v8i16_ty, llvm_v8i16_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_w_256 : 
+        GCCBuiltin<"__builtin_ia32_pabsw256_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+          [llvm_v16i16_ty, llvm_v16i16_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pabs_w_512 : 
+        GCCBuiltin<"__builtin_ia32_pabsw512_mask">,
+          Intrinsic<[llvm_v32i16_ty],
+          [llvm_v32i16_ty, llvm_v32i16_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
 }
 
 // Horizontal arithmetic ops
@@ -2120,36 +2458,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v4f64_ty],
                         [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
                         [IntrNoMem]>;
-  def int_x86_fma_mask_vfmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddps512_mask">,
-              Intrinsic<[llvm_v16f32_ty],
-                        [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
-                         llvm_i16_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256_mask">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddps128_mask">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">,
-              Intrinsic<[llvm_v8f64_ty],
-                        [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
-                         llvm_i8_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
+
   def int_x86_fma_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">,
               Intrinsic<[llvm_v4f32_ty],
                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
@@ -2174,36 +2483,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v4f64_ty],
                         [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
                         [IntrNoMem]>;
-  def int_x86_fma_mask_vfmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubps512_mask">,
-              Intrinsic<[llvm_v16f32_ty],
-                        [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
-                         llvm_i16_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256_mask">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubps128_mask">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask">,
-              Intrinsic<[llvm_v8f64_ty],
-                        [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
-                         llvm_i8_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256_mask">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
   def int_x86_fma_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">,
               Intrinsic<[llvm_v4f32_ty],
                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
@@ -2228,36 +2507,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v4f64_ty],
                         [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
                         [IntrNoMem]>;
-  def int_x86_fma_mask_vfnmadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmaddps512_mask">,
-              Intrinsic<[llvm_v16f32_ty],
-                        [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
-                         llvm_i16_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256_mask">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfnmadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmaddps128_mask">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfnmadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">,
-              Intrinsic<[llvm_v8f64_ty],
-                        [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
-                         llvm_i8_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256_mask">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfnmadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmaddpd128_mask">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
   def int_x86_fma_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">,
               Intrinsic<[llvm_v4f32_ty],
                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
@@ -2282,36 +2531,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v4f64_ty],
                         [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
                         [IntrNoMem]>;
-  def int_x86_fma_mask_vfnmsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask">,
-              Intrinsic<[llvm_v16f32_ty],
-                        [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
-                         llvm_i16_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfnmsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfnmsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">,
-              Intrinsic<[llvm_v8f64_ty],
-                        [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
-                         llvm_i8_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfnmsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
   def int_x86_fma_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
               Intrinsic<[llvm_v4f32_ty],
                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
@@ -2330,36 +2549,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v4f64_ty],
                         [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
                         [IntrNoMem]>;
-  def int_x86_fma_mask_vfmaddsub_ps_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask">,
-              Intrinsic<[llvm_v16f32_ty],
-                        [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
-                         llvm_i16_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmaddsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmaddsub_ps_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmaddsub_pd_512 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">,
-              Intrinsic<[llvm_v8f64_ty],
-                        [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
-                         llvm_i8_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmaddsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmaddsub_pd_128 : GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
   def int_x86_fma_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">,
               Intrinsic<[llvm_v4f32_ty],
                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
@@ -2378,36 +2567,403 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v4f64_ty],
                         [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
                         [IntrNoMem]>;
-  def int_x86_fma_mask_vfmsubadd_ps_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddps512_mask">,
-              Intrinsic<[llvm_v16f32_ty],
-                        [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,
-                         llvm_i16_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmsubadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddps256_mask">,
-              Intrinsic<[llvm_v8f32_ty],
-                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmsubadd_ps_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddps128_mask">,
-              Intrinsic<[llvm_v4f32_ty],
-                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmsubadd_pd_512 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask">,
-              Intrinsic<[llvm_v8f64_ty],
-                        [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,
-                         llvm_i8_ty, llvm_i32_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmsubadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd256_mask">,
-              Intrinsic<[llvm_v4f64_ty],
-                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_fma_mask_vfmsubadd_pd_128 : GCCBuiltin<"__builtin_ia32_vfmsubaddpd128_mask">,
-              Intrinsic<[llvm_v2f64_ty],
-                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
-                         llvm_i8_ty],
-                        [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfmadd_pd_128 :
+         GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmadd_pd_128 :
+         GCCBuiltin<"__builtin_ia32_vfmaddpd128_mask3">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vfmadd_pd_128 :
+         GCCBuiltin<"__builtin_ia32_vfmaddpd128_maskz">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfmadd_pd_256 :
+         GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmadd_pd_256 :
+         GCCBuiltin<"__builtin_ia32_vfmaddpd256_mask3">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vfmadd_pd_256 :
+         GCCBuiltin<"__builtin_ia32_vfmaddpd256_maskz">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfmadd_pd_512 :
+         GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmadd_pd_512 :
+         GCCBuiltin<"__builtin_ia32_vfmaddpd512_mask3">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vfmadd_pd_512 :
+         GCCBuiltin<"__builtin_ia32_vfmaddpd512_maskz">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfmadd_ps_128 :
+         GCCBuiltin<"__builtin_ia32_vfmaddps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmadd_ps_128 :
+         GCCBuiltin<"__builtin_ia32_vfmaddps128_mask3">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vfmadd_ps_128 :
+         GCCBuiltin<"__builtin_ia32_vfmaddps128_maskz">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfmadd_ps_256 :
+         GCCBuiltin<"__builtin_ia32_vfmaddps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmadd_ps_256 :
+         GCCBuiltin<"__builtin_ia32_vfmaddps256_mask3">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vfmadd_ps_256 :
+         GCCBuiltin<"__builtin_ia32_vfmaddps256_maskz">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfmadd_ps_512 :
+         GCCBuiltin<"__builtin_ia32_vfmaddps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmadd_ps_512 :
+         GCCBuiltin<"__builtin_ia32_vfmaddps512_mask3">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vfmadd_ps_512 :
+         GCCBuiltin<"__builtin_ia32_vfmaddps512_maskz">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfmaddsub_pd_128 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmaddsub_pd_128 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_mask3">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vfmaddsub_pd_128 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubpd128_maskz">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfmaddsub_pd_256 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmaddsub_pd_256 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_mask3">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vfmaddsub_pd_256 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubpd256_maskz">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfmaddsub_pd_512 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmaddsub_pd_512 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_mask3">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vfmaddsub_pd_512 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubpd512_maskz">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfmaddsub_ps_128 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmaddsub_ps_128 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubps128_mask3">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vfmaddsub_ps_128 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubps128_maskz">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfmaddsub_ps_256 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmaddsub_ps_256 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubps256_mask3">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vfmaddsub_ps_256 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubps256_maskz">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfmaddsub_ps_512 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmaddsub_ps_512 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubps512_mask3">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_vfmaddsub_ps_512 :
+         GCCBuiltin<"__builtin_ia32_vfmaddsubps512_maskz">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmsub_pd_128 :
+         GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask3">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmsub_pd_256 :
+         GCCBuiltin<"__builtin_ia32_vfmsubpd256_mask3">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmsub_pd_512 :
+         GCCBuiltin<"__builtin_ia32_vfmsubpd512_mask3">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmsub_ps_128 :
+         GCCBuiltin<"__builtin_ia32_vfmsubps128_mask3">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmsub_ps_256 :
+         GCCBuiltin<"__builtin_ia32_vfmsubps256_mask3">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmsub_ps_512 :
+         GCCBuiltin<"__builtin_ia32_vfmsubps512_mask3">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmsubadd_pd_128 :
+         GCCBuiltin<"__builtin_ia32_vfmsubaddpd128_mask3">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmsubadd_pd_256 :
+         GCCBuiltin<"__builtin_ia32_vfmsubaddpd256_mask3">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmsubadd_pd_512 :
+         GCCBuiltin<"__builtin_ia32_vfmsubaddpd512_mask3">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmsubadd_ps_128 :
+         GCCBuiltin<"__builtin_ia32_vfmsubaddps128_mask3">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmsubadd_ps_256 :
+         GCCBuiltin<"__builtin_ia32_vfmsubaddps256_mask3">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmsubadd_ps_512 :
+         GCCBuiltin<"__builtin_ia32_vfmsubaddps512_mask3">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfnmadd_pd_128 :
+         GCCBuiltin<"__builtin_ia32_vfnmaddpd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfnmadd_pd_256 :
+         GCCBuiltin<"__builtin_ia32_vfnmaddpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfnmadd_pd_512 :
+         GCCBuiltin<"__builtin_ia32_vfnmaddpd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfnmadd_ps_128 :
+         GCCBuiltin<"__builtin_ia32_vfnmaddps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfnmadd_ps_256 :
+         GCCBuiltin<"__builtin_ia32_vfnmaddps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfnmadd_ps_512 :
+         GCCBuiltin<"__builtin_ia32_vfnmaddps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfnmsub_pd_128 :
+         GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfnmsub_pd_128 :
+         GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask3">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfnmsub_pd_256 :
+         GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfnmsub_pd_256 :
+         GCCBuiltin<"__builtin_ia32_vfnmsubpd256_mask3">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfnmsub_pd_512 :
+         GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfnmsub_pd_512 :
+         GCCBuiltin<"__builtin_ia32_vfnmsubpd512_mask3">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfnmsub_ps_128 :
+         GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfnmsub_ps_128 :
+         GCCBuiltin<"__builtin_ia32_vfnmsubps128_mask3">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfnmsub_ps_256 :
+         GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfnmsub_ps_256 :
+         GCCBuiltin<"__builtin_ia32_vfnmsubps256_mask3">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vfnmsub_ps_512 :
+         GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfnmsub_ps_512 :
+         GCCBuiltin<"__builtin_ia32_vfnmsubps512_mask3">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
 }
 
 //===----------------------------------------------------------------------===//
@@ -2967,6 +3523,19 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 }
 
 //===----------------------------------------------------------------------===//
+// FXSR
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_fxrstor : GCCBuiltin<"__builtin_ia32_fxrstor">,
+              Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_x86_fxrstor64 : GCCBuiltin<"__builtin_ia32_fxrstor64">,
+              Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_x86_fxsave : GCCBuiltin<"__builtin_ia32_fxsave">,
+              Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_x86_fxsave64 : GCCBuiltin<"__builtin_ia32_fxsave64">,
+              Intrinsic<[], [llvm_ptr_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
 // Half float conversion
 
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
@@ -3501,6 +4070,26 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
                                       llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
                                      [IntrNoMem]>;
+
+  def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">,
+          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, 
+                    llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_scalef_pd_256 : GCCBuiltin<"__builtin_ia32_scalefpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, 
+                    llvm_v4f64_ty, llvm_i8_ty],[IntrNoMem]>;
+  def int_x86_avx512_mask_scalef_pd_512 : GCCBuiltin<"__builtin_ia32_scalefpd512_mask">,
+          Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, 
+                    llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_scalef_ps_128 : GCCBuiltin<"__builtin_ia32_scalefps128_mask">,
+          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, 
+                    llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_scalef_ps_256 : GCCBuiltin<"__builtin_ia32_scalefps256_mask">,
+          Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, 
+                    llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_scalef_ps_512 : GCCBuiltin<"__builtin_ia32_scalefps512_mask">,
+          Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, 
+                    llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+
   def int_x86_avx512_sqrt_ss        : GCCBuiltin<"__builtin_ia32_sqrtrndss">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
                         [IntrNoMem]>;
@@ -3934,6 +4523,102 @@ let TargetPrefix = "x86" in {
                      llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
                     [IntrReadArgMem]>;
 
+  def int_x86_avx512_gather3div2_df : 
+        GCCBuiltin<"__builtin_ia32_gather3div2df">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3div2_di : 
+        GCCBuiltin<"__builtin_ia32_gather3div2di">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3div4_df : 
+        GCCBuiltin<"__builtin_ia32_gather3div4df">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3div4_di : 
+        GCCBuiltin<"__builtin_ia32_gather3div4di">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3div4_sf : 
+        GCCBuiltin<"__builtin_ia32_gather3div4sf">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3div4_si : 
+        GCCBuiltin<"__builtin_ia32_gather3div4si">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3div8_sf : 
+        GCCBuiltin<"__builtin_ia32_gather3div8sf">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3div8_si : 
+        GCCBuiltin<"__builtin_ia32_gather3div8si">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3siv2_df : 
+        GCCBuiltin<"__builtin_ia32_gather3siv2df">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3siv2_di : 
+        GCCBuiltin<"__builtin_ia32_gather3siv2di">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3siv4_df : 
+        GCCBuiltin<"__builtin_ia32_gather3siv4df">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3siv4_di : 
+        GCCBuiltin<"__builtin_ia32_gather3siv4di">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3siv4_sf : 
+        GCCBuiltin<"__builtin_ia32_gather3siv4sf">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3siv4_si : 
+        GCCBuiltin<"__builtin_ia32_gather3siv4si">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3siv8_sf : 
+        GCCBuiltin<"__builtin_ia32_gather3siv8sf">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
+  def int_x86_avx512_gather3siv8_si : 
+        GCCBuiltin<"__builtin_ia32_gather3siv8si">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrReadArgMem]>;
+
 // scatter
   def int_x86_avx512_scatter_dpd_512  : GCCBuiltin<"__builtin_ia32_scattersiv8df">,
           Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
@@ -3970,6 +4655,102 @@ let TargetPrefix = "x86" in {
                          llvm_i32_ty],
                     [IntrReadWriteArgMem]>;
 
+  def int_x86_avx512_scatterdiv2_df : 
+       GCCBuiltin<"__builtin_ia32_scatterdiv2df">,
+        Intrinsic<[],
+        [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i32_ty],
+        [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scatterdiv2_di : 
+        GCCBuiltin<"__builtin_ia32_scatterdiv2di">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scatterdiv4_df : 
+        GCCBuiltin<"__builtin_ia32_scatterdiv4df">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scatterdiv4_di : 
+        GCCBuiltin<"__builtin_ia32_scatterdiv4di">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scatterdiv4_sf : 
+        GCCBuiltin<"__builtin_ia32_scatterdiv4sf">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scatterdiv4_si : 
+        GCCBuiltin<"__builtin_ia32_scatterdiv4si">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scatterdiv8_sf : 
+        GCCBuiltin<"__builtin_ia32_scatterdiv8sf">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scatterdiv8_si : 
+        GCCBuiltin<"__builtin_ia32_scatterdiv8si">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scattersiv2_df : 
+        GCCBuiltin<"__builtin_ia32_scattersiv2df">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scattersiv2_di : 
+        GCCBuiltin<"__builtin_ia32_scattersiv2di">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scattersiv4_df : 
+        GCCBuiltin<"__builtin_ia32_scattersiv4df">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scattersiv4_di : 
+        GCCBuiltin<"__builtin_ia32_scattersiv4di">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scattersiv4_sf : 
+        GCCBuiltin<"__builtin_ia32_scattersiv4sf">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scattersiv4_si : 
+        GCCBuiltin<"__builtin_ia32_scattersiv4si">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scattersiv8_sf : 
+        GCCBuiltin<"__builtin_ia32_scattersiv8sf">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
+  def int_x86_avx512_scattersiv8_si : 
+        GCCBuiltin<"__builtin_ia32_scattersiv8si">,
+          Intrinsic<[],
+          [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
+          [IntrReadWriteArgMem]>;
+
   // gather prefetch
   def int_x86_avx512_gatherpf_dpd_512  : GCCBuiltin<"__builtin_ia32_gatherpfdpd">,
           Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h
index 53c8b3a..e6c2209 100644
--- a/include/llvm/IR/LLVMContext.h
+++ b/include/llvm/IR/LLVMContext.h
@@ -209,6 +209,6 @@ inline LLVMContextRef *wrap(const LLVMContext **Tys) {
   return reinterpret_cast<LLVMContextRef*>(const_cast<LLVMContext**>(Tys));
 }
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/IR/LegacyPassManager.h b/include/llvm/IR/LegacyPassManager.h
index 7c678fb..5257a0e 100644
--- a/include/llvm/IR/LegacyPassManager.h
+++ b/include/llvm/IR/LegacyPassManager.h
@@ -93,11 +93,11 @@ private:
   Module *M;
 };
 
-} // namespace legacy
+} // End legacy namespace
 
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_STDCXX_CONVERSION_FUNCTIONS(legacy::PassManagerBase, LLVMPassManagerRef)
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h
index e2f1ab4..7f7889a 100644
--- a/include/llvm/IR/LegacyPassManagers.h
+++ b/include/llvm/IR/LegacyPassManagers.h
@@ -474,6 +474,6 @@ public:
 
 Timer *getPassTimer(Pass *);
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/IR/LegacyPassNameParser.h b/include/llvm/IR/LegacyPassNameParser.h
index 3f98e76..39ae80d 100644
--- a/include/llvm/IR/LegacyPassNameParser.h
+++ b/include/llvm/IR/LegacyPassNameParser.h
@@ -134,6 +134,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Mangler.h b/include/llvm/IR/Mangler.h
index 6bda319..b72b259 100644
--- a/include/llvm/IR/Mangler.h
+++ b/include/llvm/IR/Mangler.h
@@ -25,28 +25,15 @@ template <typename T> class SmallVectorImpl;
 class Twine;
 
 class Mangler {
-public:
-  enum ManglerPrefixTy {
-    Default,               ///< Emit default string before each symbol.
-    Private,               ///< Emit "private" prefix before each symbol.
-    LinkerPrivate          ///< Emit "linker private" prefix before each symbol.
-  };
-
-private:
-  const DataLayout *DL;
-
-  /// AnonGlobalIDs - We need to give global values the same name every time
-  /// they are mangled.  This keeps track of the number we give to anonymous
-  /// ones.
-  ///
+  /// We need to give global values the same name every time they are mangled.
+  /// This keeps track of the number we give to anonymous ones.
   mutable DenseMap<const GlobalValue*, unsigned> AnonGlobalIDs;
 
-  /// NextAnonGlobalID - This simple counter is used to unique value names.
-  ///
+  /// This simple counter is used to unique value names.
   mutable unsigned NextAnonGlobalID;
 
 public:
-  Mangler(const DataLayout *DL) : DL(DL), NextAnonGlobalID(1) {}
+  Mangler() : NextAnonGlobalID(1) {}
 
   /// Print the appropriate prefix and the specified global variable's name.
   /// If the global variable doesn't have a name, this fills in a unique name
@@ -58,12 +45,12 @@ public:
 
   /// Print the appropriate prefix and the specified name as the global variable
   /// name. GVName must not be empty.
-  void getNameWithPrefix(raw_ostream &OS, const Twine &GVName,
-                         ManglerPrefixTy PrefixTy = Mangler::Default) const;
-  void getNameWithPrefix(SmallVectorImpl<char> &OutName, const Twine &GVName,
-                         ManglerPrefixTy PrefixTy = Mangler::Default) const;
+  static void getNameWithPrefix(raw_ostream &OS, const Twine &GVName,
+                                const DataLayout &DL);
+  static void getNameWithPrefix(SmallVectorImpl<char> &OutName,
+                                const Twine &GVName, const DataLayout &DL);
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Metadata.def b/include/llvm/IR/Metadata.def
index f2abff4..857e4637d 100644
--- a/include/llvm/IR/Metadata.def
+++ b/include/llvm/IR/Metadata.def
@@ -82,6 +82,7 @@ HANDLE_SPECIALIZED_MDNODE_BRANCH(DILexicalBlockBase)
 HANDLE_SPECIALIZED_MDNODE_LEAF(DILexicalBlock)
 HANDLE_SPECIALIZED_MDNODE_LEAF(DILexicalBlockFile)
 HANDLE_SPECIALIZED_MDNODE_LEAF(DINamespace)
+HANDLE_SPECIALIZED_MDNODE_LEAF(DIModule)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DITemplateParameter)
 HANDLE_SPECIALIZED_MDNODE_LEAF(DITemplateTypeParameter)
 HANDLE_SPECIALIZED_MDNODE_LEAF(DITemplateValueParameter)
diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h
index bf4a030..c639625 100644
--- a/include/llvm/IR/Metadata.h
+++ b/include/llvm/IR/Metadata.h
@@ -27,8 +27,11 @@
 #include <type_traits>
 
 namespace llvm {
+
 class LLVMContext;
 class Module;
+class ModuleSlotTracker;
+
 template<typename ValueSubClass, typename ItemParentClass>
   class SymbolTableListTraits;
 
@@ -73,6 +76,7 @@ public:
     DILexicalBlockKind,
     DILexicalBlockFileKind,
     DINamespaceKind,
+    DIModuleKind,
     DITemplateTypeParameterKind,
     DITemplateValueParameterKind,
     DIGlobalVariableKind,
@@ -121,7 +125,11 @@ public:
   ///
   /// If \c M is provided, metadata nodes will be numbered canonically;
   /// otherwise, pointer addresses are substituted.
+  /// @{
   void print(raw_ostream &OS, const Module *M = nullptr) const;
+  void print(raw_ostream &OS, ModuleSlotTracker &MST,
+             const Module *M = nullptr) const;
+  /// @}
 
   /// \brief Print as operand.
   ///
@@ -129,7 +137,11 @@ public:
   ///
   /// If \c M is provided, metadata nodes will be numbered canonically;
   /// otherwise, pointer addresses are substituted.
+  /// @{
   void printAsOperand(raw_ostream &OS, const Module *M = nullptr) const;
+  void printAsOperand(raw_ostream &OS, ModuleSlotTracker &MST,
+                      const Module *M = nullptr) const;
+  /// @}
 };
 
 #define HANDLE_METADATA(CLASS) class CLASS;
@@ -1203,6 +1215,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h
index 598a58e..1668b95 100644
--- a/include/llvm/IR/Module.h
+++ b/include/llvm/IR/Module.h
@@ -249,7 +249,7 @@ public:
 
   /// Get the data layout string for the module's target platform. This is
   /// equivalent to getDataLayout()->getStringRepresentation().
-  const std::string getDataLayoutStr() const {
+  const std::string &getDataLayoutStr() const {
     return DL.getStringRepresentation();
   }
 
@@ -694,6 +694,6 @@ inline Module *unwrap(LLVMModuleProviderRef MP) {
   return reinterpret_cast<Module*>(MP);
 }
   
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/ModuleSlotTracker.h b/include/llvm/IR/ModuleSlotTracker.h
new file mode 100644
index 0000000..c37dcec
--- /dev/null
+++ b/include/llvm/IR/ModuleSlotTracker.h
@@ -0,0 +1,68 @@
+//===-- llvm/IR/ModuleSlotTracker.h -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_MODULESLOTTRACKER_H
+#define LLVM_IR_MODULESLOTTRACKER_H
+
+#include <memory>
+
+namespace llvm {
+
+class Module;
+class Function;
+class SlotTracker;
+
+/// Manage lifetime of a slot tracker for printing IR.
+///
+/// Wrapper around the \a SlotTracker used internally by \a AsmWriter.  This
+/// class allows callers to share the cost of incorporating the metadata in a
+/// module or a function.
+///
+/// If the IR changes from underneath \a ModuleSlotTracker, strings like
+/// "<badref>" will be printed, or, worse, the wrong slots entirely.
+class ModuleSlotTracker {
+  /// Storage for a slot tracker.
+  std::unique_ptr<SlotTracker> MachineStorage;
+
+  const Module *M = nullptr;
+  const Function *F = nullptr;
+  SlotTracker *Machine = nullptr;
+
+public:
+  /// Wrap a preinitialized SlotTracker.
+  ModuleSlotTracker(SlotTracker &Machine, const Module *M,
+                    const Function *F = nullptr);
+
+  /// Construct a slot tracker from a module.
+  ///
+  /// If \a M is \c nullptr, uses a null slot tracker.  Otherwise, initializes
+  /// a slot tracker, and initializes all metadata slots.  \c
+  /// ShouldInitializeAllMetadata defaults to true because this is expected to
+  /// be shared between multiple callers, and otherwise MDNode references will
+  /// not match up.
+  explicit ModuleSlotTracker(const Module *M,
+                             bool ShouldInitializeAllMetadata = true);
+
+  /// Destructor to clean up storage.
+  ~ModuleSlotTracker();
+
+  SlotTracker *getMachine() const { return Machine; }
+  const Module *getModule() const { return M; }
+  const Function *getCurrentFunction() const { return F; }
+
+  /// Incorporate the given function.
+  ///
+  /// Purge the currently incorporated function and incorporate \c F.  If \c F
+  /// is currently incorporated, this is a no-op.
+  void incorporateFunction(const Function &F);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/IR/NoFolder.h b/include/llvm/IR/NoFolder.h
index 55b6798..61f4817 100644
--- a/include/llvm/IR/NoFolder.h
+++ b/include/llvm/IR/NoFolder.h
@@ -294,6 +294,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/IR/OperandTraits.h b/include/llvm/IR/OperandTraits.h
index 91ec8d2..e97a800 100644
--- a/include/llvm/IR/OperandTraits.h
+++ b/include/llvm/IR/OperandTraits.h
@@ -155,6 +155,6 @@ template <int Idx_nocapture> const Use &CLASS::Op() const { \
 }
 
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h
index 82f516e..1b9102e 100644
--- a/include/llvm/IR/Operator.h
+++ b/include/llvm/IR/Operator.h
@@ -491,6 +491,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h
index 2ff1a6f..4166bab 100644
--- a/include/llvm/IR/PassManager.h
+++ b/include/llvm/IR/PassManager.h
@@ -890,6 +890,6 @@ struct InvalidateAllAnalysesPass {
   static StringRef name() { return "InvalidateAllAnalysesPass"; }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/IR/PassManagerInternal.h b/include/llvm/IR/PassManagerInternal.h
index 7921b4f..92de10b 100644
--- a/include/llvm/IR/PassManagerInternal.h
+++ b/include/llvm/IR/PassManagerInternal.h
@@ -345,6 +345,6 @@ struct AnalysisPassModel<IRUnitT, PassT, false> : AnalysisPassConcept<IRUnitT> {
 };
 
 } // End namespace detail
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h
index 8159cde..4ab1f84 100644
--- a/include/llvm/IR/Statepoint.h
+++ b/include/llvm/IR/Statepoint.h
@@ -20,6 +20,7 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Compiler.h"
@@ -39,13 +40,13 @@ class GCRelocateOperands;
 class ImmutableStatepoint;
 
 bool isStatepoint(const ImmutableCallSite &CS);
-bool isStatepoint(const Value *inst);
-bool isStatepoint(const Value &inst);
+bool isStatepoint(const Value *V);
+bool isStatepoint(const Value &V);
 
-bool isGCRelocate(const Value *inst);
+bool isGCRelocate(const Value *V);
 bool isGCRelocate(const ImmutableCallSite &CS);
 
-bool isGCResult(const Value *inst);
+bool isGCResult(const Value *V);
 bool isGCResult(const ImmutableCallSite &CS);
 
 /// Analogous to CallSiteBase, this provides most of the actual
@@ -54,20 +55,23 @@ bool isGCResult(const ImmutableCallSite &CS);
 /// concrete subtypes.  This is structured analogous to CallSite
 /// rather than the IntrinsicInst.h helpers since we want to support
 /// invokable statepoints in the near future.
-/// TODO: This does not currently allow the if(Statepoint S = ...)
-///   idiom used with CallSites.  Consider refactoring to support.
-template <typename InstructionTy, typename ValueTy, typename CallSiteTy>
+template <typename FunTy, typename InstructionTy, typename ValueTy,
+          typename CallSiteTy>
 class StatepointBase {
   CallSiteTy StatepointCS;
   void *operator new(size_t, unsigned) = delete;
   void *operator new(size_t s) = delete;
 
 protected:
-  explicit StatepointBase(InstructionTy *I) : StatepointCS(I) {
-    assert(isStatepoint(I));
+  explicit StatepointBase(InstructionTy *I) {
+    if (isStatepoint(I)) {
+      StatepointCS = CallSiteTy(I);
+      assert(StatepointCS && "isStatepoint implies CallSite");
+    }
   }
-  explicit StatepointBase(CallSiteTy CS) : StatepointCS(CS) {
-    assert(isStatepoint(CS));
+  explicit StatepointBase(CallSiteTy CS) {
+    if (isStatepoint(CS))
+      StatepointCS = CS;
   }
 
 public:
@@ -76,29 +80,37 @@ public:
   enum {
     IDPos = 0,
     NumPatchBytesPos = 1,
-    ActualCalleePos = 2,
+    CalledFunctionPos = 2,
     NumCallArgsPos = 3,
     FlagsPos = 4,
     CallArgsBeginPos = 5,
   };
 
+  explicit operator bool() const {
+    // We do not assign non-statepoint CallSites to StatepointCS.
+    return (bool)StatepointCS;
+  }
+
   /// Return the underlying CallSite.
-  CallSiteTy getCallSite() { return StatepointCS; }
+  CallSiteTy getCallSite() const {
+    assert(*this && "check validity first!");
+    return StatepointCS;
+  }
 
   uint64_t getFlags() const {
-    return cast<ConstantInt>(StatepointCS.getArgument(FlagsPos))
+    return cast<ConstantInt>(getCallSite().getArgument(FlagsPos))
         ->getZExtValue();
   }
 
   /// Return the ID associated with this statepoint.
-  uint64_t getID() {
-    const Value *IDVal = StatepointCS.getArgument(IDPos);
+  uint64_t getID() const {
+    const Value *IDVal = getCallSite().getArgument(IDPos);
     return cast<ConstantInt>(IDVal)->getZExtValue();
   }
 
   /// Return the number of patchable bytes associated with this statepoint.
-  uint32_t getNumPatchBytes() {
-    const Value *NumPatchBytesVal = StatepointCS.getArgument(NumPatchBytesPos);
+  uint32_t getNumPatchBytes() const {
+    const Value *NumPatchBytesVal = getCallSite().getArgument(NumPatchBytesPos);
     uint64_t NumPatchBytes =
       cast<ConstantInt>(NumPatchBytesVal)->getZExtValue();
     assert(isInt<32>(NumPatchBytes) && "should fit in 32 bits!");
@@ -106,91 +118,125 @@ public:
   }
 
   /// Return the value actually being called or invoked.
-  ValueTy *getActualCallee() {
-    return StatepointCS.getArgument(ActualCalleePos);
+  ValueTy *getCalledValue() const {
+    return getCallSite().getArgument(CalledFunctionPos);
+  }
+
+  InstructionTy *getInstruction() const {
+    return getCallSite().getInstruction();
+  }
+
+  /// Return the function being called if this is a direct call, otherwise
+  /// return null (if it's an indirect call).
+  FunTy *getCalledFunction() const {
+    return dyn_cast<Function>(getCalledValue());
+  }
+
+  /// Return the caller function for this statepoint.
+  FunTy *getCaller() const { return getCallSite().getCaller(); }
+
+  /// Determine if the statepoint cannot unwind.
+  bool doesNotThrow() const {
+    Function *F = getCalledFunction();
+    return getCallSite().doesNotThrow() || (F ? F->doesNotThrow() : false);
   }
 
   /// Return the type of the value returned by the call underlying the
   /// statepoint.
-  Type *getActualReturnType() {
+  Type *getActualReturnType() const {
     auto *FTy = cast<FunctionType>(
-        cast<PointerType>(getActualCallee()->getType())->getElementType());
+        cast<PointerType>(getCalledValue()->getType())->getElementType());
     return FTy->getReturnType();
   }
 
   /// Number of arguments to be passed to the actual callee.
-  int getNumCallArgs() {
-    const Value *NumCallArgsVal = StatepointCS.getArgument(NumCallArgsPos);
+  int getNumCallArgs() const {
+    const Value *NumCallArgsVal = getCallSite().getArgument(NumCallArgsPos);
     return cast<ConstantInt>(NumCallArgsVal)->getZExtValue();
   }
 
-  typename CallSiteTy::arg_iterator call_args_begin() {
-    assert(CallArgsBeginPos <= (int)StatepointCS.arg_size());
-    return StatepointCS.arg_begin() + CallArgsBeginPos;
+  size_t arg_size() const { return getNumCallArgs(); }
+  typename CallSiteTy::arg_iterator arg_begin() const {
+    assert(CallArgsBeginPos <= (int)getCallSite().arg_size());
+    return getCallSite().arg_begin() + CallArgsBeginPos;
   }
-  typename CallSiteTy::arg_iterator call_args_end() {
-    auto I = call_args_begin() + getNumCallArgs();
-    assert((StatepointCS.arg_end() - I) >= 0);
+  typename CallSiteTy::arg_iterator arg_end() const {
+    auto I = arg_begin() + arg_size();
+    assert((getCallSite().arg_end() - I) >= 0);
     return I;
   }
 
+  ValueTy *getArgument(unsigned Index) {
+    assert(Index < arg_size() && "out of bounds!");
+    return *(arg_begin() + Index);
+  }
+
   /// range adapter for call arguments
-  iterator_range<arg_iterator> call_args() {
-    return iterator_range<arg_iterator>(call_args_begin(), call_args_end());
+  iterator_range<arg_iterator> call_args() const {
+    return iterator_range<arg_iterator>(arg_begin(), arg_end());
+  }
+
+  /// \brief Return true if the call or the callee has the given attribute.
+  bool paramHasAttr(unsigned i, Attribute::AttrKind A) const {
+    Function *F = getCalledFunction();
+    return getCallSite().paramHasAttr(i + CallArgsBeginPos, A) ||
+          (F ? F->getAttributes().hasAttribute(i, A) : false);
   }
 
   /// Number of GC transition args.
-  int getNumTotalGCTransitionArgs() {
-    const Value *NumGCTransitionArgs = *call_args_end();
+  int getNumTotalGCTransitionArgs() const {
+    const Value *NumGCTransitionArgs = *arg_end();
     return cast<ConstantInt>(NumGCTransitionArgs)->getZExtValue();
   }
-  typename CallSiteTy::arg_iterator gc_transition_args_begin() {
-    auto I = call_args_end() + 1;
-    assert((StatepointCS.arg_end() - I) >= 0);
+  typename CallSiteTy::arg_iterator gc_transition_args_begin() const {
+    auto I = arg_end() + 1;
+    assert((getCallSite().arg_end() - I) >= 0);
     return I;
   }
-  typename CallSiteTy::arg_iterator gc_transition_args_end() {
+  typename CallSiteTy::arg_iterator gc_transition_args_end() const {
     auto I = gc_transition_args_begin() + getNumTotalGCTransitionArgs();
-    assert((StatepointCS.arg_end() - I) >= 0);
+    assert((getCallSite().arg_end() - I) >= 0);
     return I;
   }
 
   /// range adapter for GC transition arguments
-  iterator_range<arg_iterator> gc_transition_args() {
+  iterator_range<arg_iterator> gc_transition_args() const {
     return iterator_range<arg_iterator>(gc_transition_args_begin(),
                                         gc_transition_args_end());
   }
 
   /// Number of additional arguments excluding those intended
   /// for garbage collection.
-  int getNumTotalVMSArgs() {
+  int getNumTotalVMSArgs() const {
     const Value *NumVMSArgs = *gc_transition_args_end();
     return cast<ConstantInt>(NumVMSArgs)->getZExtValue();
   }
 
-  typename CallSiteTy::arg_iterator vm_state_begin() {
+  typename CallSiteTy::arg_iterator vm_state_begin() const {
     auto I = gc_transition_args_end() + 1;
-    assert((StatepointCS.arg_end() - I) >= 0);
+    assert((getCallSite().arg_end() - I) >= 0);
     return I;
   }
-  typename CallSiteTy::arg_iterator vm_state_end() {
+  typename CallSiteTy::arg_iterator vm_state_end() const {
     auto I = vm_state_begin() + getNumTotalVMSArgs();
-    assert((StatepointCS.arg_end() - I) >= 0);
+    assert((getCallSite().arg_end() - I) >= 0);
     return I;
   }
 
   /// range adapter for vm state arguments
-  iterator_range<arg_iterator> vm_state_args() {
+  iterator_range<arg_iterator> vm_state_args() const {
     return iterator_range<arg_iterator>(vm_state_begin(), vm_state_end());
   }
 
-  typename CallSiteTy::arg_iterator gc_args_begin() { return vm_state_end(); }
-  typename CallSiteTy::arg_iterator gc_args_end() {
-    return StatepointCS.arg_end();
+  typename CallSiteTy::arg_iterator gc_args_begin() const {
+    return vm_state_end();
+  }
+  typename CallSiteTy::arg_iterator gc_args_end() const {
+    return getCallSite().arg_end();
   }
 
   /// range adapter for gc arguments
-  iterator_range<arg_iterator> gc_args() {
+  iterator_range<arg_iterator> gc_args() const {
     return iterator_range<arg_iterator>(gc_args_begin(), gc_args_end());
   }
 
@@ -198,7 +244,18 @@ public:
   /// May contain several relocations for the same base/derived pair.
   /// For example this could happen due to relocations on unwinding
   /// path of invoke.
-  std::vector<GCRelocateOperands> getRelocates();
+  std::vector<GCRelocateOperands> getRelocates() const;
+
+  /// Get the experimental_gc_result call tied to this statepoint.  Can be
+  /// nullptr if there isn't a gc_result tied to this statepoint.  Guaranteed to
+  /// be a CallInst if non-null.
+  InstructionTy *getGCResult() const {
+    for (auto *U : getInstruction()->users())
+      if (isGCResult(U))
+        return cast<CallInst>(U);
+
+    return nullptr;
+  }
 
 #ifndef NDEBUG
   /// Asserts if this statepoint is malformed.  Common cases for failure
@@ -209,8 +266,8 @@ public:
            "number of arguments to actually callee can't be negative");
 
     // The internal asserts in the iterator accessors do the rest.
-    (void)call_args_begin();
-    (void)call_args_end();
+    (void)arg_begin();
+    (void)arg_end();
     (void)gc_transition_args_begin();
     (void)gc_transition_args_end();
     (void)vm_state_begin();
@@ -224,9 +281,10 @@ public:
 /// A specialization of it's base class for read only access
 /// to a gc.statepoint.
 class ImmutableStatepoint
-    : public StatepointBase<const Instruction, const Value, ImmutableCallSite> {
-  typedef StatepointBase<const Instruction, const Value, ImmutableCallSite>
-      Base;
+    : public StatepointBase<const Function, const Instruction, const Value,
+                            ImmutableCallSite> {
+  typedef StatepointBase<const Function, const Instruction, const Value,
+                         ImmutableCallSite> Base;
 
 public:
   explicit ImmutableStatepoint(const Instruction *I) : Base(I) {}
@@ -235,8 +293,9 @@ public:
 
 /// A specialization of it's base class for read-write access
 /// to a gc.statepoint.
-class Statepoint : public StatepointBase<Instruction, Value, CallSite> {
-  typedef StatepointBase<Instruction, Value, CallSite> Base;
+class Statepoint
+    : public StatepointBase<Function, Instruction, Value, CallSite> {
+  typedef StatepointBase<Function, Instruction, Value, CallSite> Base;
 
 public:
   explicit Statepoint(Instruction *I) : Base(I) {}
@@ -313,9 +372,11 @@ public:
   }
 };
 
-template <typename InstructionTy, typename ValueTy, typename CallSiteTy>
+template <typename FunTy, typename InstructionTy, typename ValueTy,
+          typename CallSiteTy>
 std::vector<GCRelocateOperands>
-StatepointBase<InstructionTy, ValueTy, CallSiteTy>::getRelocates() {
+StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
+    const {
 
   std::vector<GCRelocateOperands> Result;
 
@@ -324,7 +385,7 @@ StatepointBase<InstructionTy, ValueTy, CallSiteTy>::getRelocates() {
   // Search for relocated pointers.  Note that working backwards from the
   // gc_relocates ensures that we only get pairs which are actually relocated
   // and used after the statepoint.
-  for (const User *U : StatepointCS.getInstruction()->users())
+  for (const User *U : getInstruction()->users())
     if (isGCRelocate(U))
       Result.push_back(GCRelocateOperands(U));
 
@@ -333,7 +394,7 @@ StatepointBase<InstructionTy, ValueTy, CallSiteTy>::getRelocates() {
 
   // We need to scan thorough exceptional relocations if it is invoke statepoint
   LandingPadInst *LandingPad =
-      cast<InvokeInst>(StatepointCS.getInstruction())->getLandingPadInst();
+      cast<InvokeInst>(getInstruction())->getLandingPadInst();
 
   // Search for extract value from landingpad instruction to which
   // gc relocates will be attached
@@ -348,6 +409,6 @@ StatepointBase<InstructionTy, ValueTy, CallSiteTy>::getRelocates() {
   }
   return Result;
 }
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/IR/SymbolTableListTraits.h b/include/llvm/IR/SymbolTableListTraits.h
index ef69498..0a5149c 100644
--- a/include/llvm/IR/SymbolTableListTraits.h
+++ b/include/llvm/IR/SymbolTableListTraits.h
@@ -73,6 +73,6 @@ public:
   static ValueSymbolTable *toPtr(ValueSymbolTable &R) { return &R; }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h
index a626046..6ab0bd0 100644
--- a/include/llvm/IR/Type.h
+++ b/include/llvm/IR/Type.h
@@ -484,6 +484,6 @@ inline LLVMTypeRef *wrap(Type **Tys) {
   return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys));
 }
   
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/TypeFinder.h b/include/llvm/IR/TypeFinder.h
index aa50d0e..73a63ad 100644
--- a/include/llvm/IR/TypeFinder.h
+++ b/include/llvm/IR/TypeFinder.h
@@ -74,6 +74,6 @@ private:
   void incorporateMDNode(const MDNode *V);
 };
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Use.h b/include/llvm/IR/Use.h
index 8f87df6..160d71b 100644
--- a/include/llvm/IR/Use.h
+++ b/include/llvm/IR/Use.h
@@ -168,6 +168,6 @@ template <> struct simplify_type<const Use> {
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Use, LLVMUseRef)
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h
index 41d5770..93614fa 100644
--- a/include/llvm/IR/User.h
+++ b/include/llvm/IR/User.h
@@ -259,6 +259,6 @@ template<> struct simplify_type<User::const_op_iterator> {
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Value.def b/include/llvm/IR/Value.def
new file mode 100644
index 0000000..c2a0639
--- /dev/null
+++ b/include/llvm/IR/Value.def
@@ -0,0 +1,90 @@
+//===-------- llvm/IR/Value.def - File that describes Values ---v-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains descriptions of the various LLVM values.  This is
+// used as a central place for enumerating the different values.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+// Provide definitions of macros so that users of this file do not have to
+// define everything to use it...
+//
+#if !(defined HANDLE_GLOBAL_VALUE || defined HANDLE_CONSTANT ||                \
+      defined HANDLE_INSTRUCTION || defined HANDLE_INLINE_ASM_VALUE ||         \
+      defined HANDLE_METADATA_VALUE || defined HANDLE_VALUE ||                 \
+      defined HANDLE_CONSTANT_MARKER)
+#error "Missing macro definition of HANDLE_VALUE*"
+#endif
+
+#ifndef HANDLE_GLOBAL_VALUE
+#define HANDLE_GLOBAL_VALUE(ValueName) HANDLE_CONSTANT(ValueName)
+#endif
+
+#ifndef HANDLE_CONSTANT
+#define HANDLE_CONSTANT(ValueName) HANDLE_VALUE(ValueName)
+#endif
+
+#ifndef HANDLE_INSTRUCTION
+#define HANDLE_INSTRUCTION(ValueName) HANDLE_VALUE(ValueName)
+#endif
+
+#ifndef HANDLE_INLINE_ASM_VALUE
+#define HANDLE_INLINE_ASM_VALUE(ValueName) HANDLE_VALUE(ValueName)
+#endif
+
+#ifndef HANDLE_METADATA_VALUE
+#define HANDLE_METADATA_VALUE(ValueName) HANDLE_VALUE(ValueName)
+#endif
+
+#ifndef HANDLE_VALUE
+#define HANDLE_VALUE(ValueName)
+#endif
+
+#ifndef HANDLE_CONSTANT_MARKER
+#define HANDLE_CONSTANT_MARKER(MarkerName, ValueName)
+#endif
+
+HANDLE_VALUE(Argument)
+HANDLE_VALUE(BasicBlock)
+
+HANDLE_GLOBAL_VALUE(Function)
+HANDLE_GLOBAL_VALUE(GlobalAlias)
+HANDLE_GLOBAL_VALUE(GlobalVariable)
+HANDLE_CONSTANT(UndefValue)
+HANDLE_CONSTANT(BlockAddress)
+HANDLE_CONSTANT(ConstantExpr)
+HANDLE_CONSTANT(ConstantAggregateZero)
+HANDLE_CONSTANT(ConstantDataArray)
+HANDLE_CONSTANT(ConstantDataVector)
+HANDLE_CONSTANT(ConstantInt)
+HANDLE_CONSTANT(ConstantFP)
+HANDLE_CONSTANT(ConstantArray)
+HANDLE_CONSTANT(ConstantStruct)
+HANDLE_CONSTANT(ConstantVector)
+HANDLE_CONSTANT(ConstantPointerNull)
+
+HANDLE_METADATA_VALUE(MetadataAsValue)
+HANDLE_INLINE_ASM_VALUE(InlineAsm)
+
+HANDLE_INSTRUCTION(Instruction)
+// Enum values starting at InstructionVal are used for Instructions;
+// don't add new values here!
+
+HANDLE_CONSTANT_MARKER(ConstantFirstVal, Function)
+HANDLE_CONSTANT_MARKER(ConstantLastVal, ConstantPointerNull)
+
+#undef HANDLE_GLOBAL_VALUE
+#undef HANDLE_CONSTANT
+#undef HANDLE_INSTRUCTION
+#undef HANDLE_METADATA_VALUE
+#undef HANDLE_INLINE_ASM_VALUE
+#undef HANDLE_VALUE
+#undef HANDLE_CONSTANT_MARKER
diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h
index 6b36ba6..484afc6 100644
--- a/include/llvm/IR/Value.h
+++ b/include/llvm/IR/Value.h
@@ -38,6 +38,7 @@ class InlineAsm;
 class Instruction;
 class LLVMContext;
 class Module;
+class ModuleSlotTracker;
 class StringRef;
 class Twine;
 class Type;
@@ -199,7 +200,10 @@ public:
   void dump() const;
 
   /// \brief Implement operator<< on Value.
+  /// @{
   void print(raw_ostream &O) const;
+  void print(raw_ostream &O, ModuleSlotTracker &MST) const;
+  /// @}
 
   /// \brief Print the name of this Value out to the specified raw_ostream.
   ///
@@ -207,8 +211,12 @@ public:
   /// instruction that generated it. If you specify a Module for context, then
   /// even constanst get pretty-printed; for example, the type of a null
   /// pointer is printed symbolically.
+  /// @{
   void printAsOperand(raw_ostream &O, bool PrintType = true,
                       const Module *M = nullptr) const;
+  void printAsOperand(raw_ostream &O, bool PrintType,
+                      ModuleSlotTracker &MST) const;
+  /// @}
 
   /// \brief All values are typed, get the type of this value.
   Type *getType() const { return VTy; }
@@ -333,32 +341,12 @@ public:
   /// Value classes SubclassID field. They are used for concrete type
   /// identification.
   enum ValueTy {
-    ArgumentVal,              // This is an instance of Argument
-    BasicBlockVal,            // This is an instance of BasicBlock
-    FunctionVal,              // This is an instance of Function
-    GlobalAliasVal,           // This is an instance of GlobalAlias
-    GlobalVariableVal,        // This is an instance of GlobalVariable
-    UndefValueVal,            // This is an instance of UndefValue
-    BlockAddressVal,          // This is an instance of BlockAddress
-    ConstantExprVal,          // This is an instance of ConstantExpr
-    ConstantAggregateZeroVal, // This is an instance of ConstantAggregateZero
-    ConstantDataArrayVal,     // This is an instance of ConstantDataArray
-    ConstantDataVectorVal,    // This is an instance of ConstantDataVector
-    ConstantIntVal,           // This is an instance of ConstantInt
-    ConstantFPVal,            // This is an instance of ConstantFP
-    ConstantArrayVal,         // This is an instance of ConstantArray
-    ConstantStructVal,        // This is an instance of ConstantStruct
-    ConstantVectorVal,        // This is an instance of ConstantVector
-    ConstantPointerNullVal,   // This is an instance of ConstantPointerNull
-    MetadataAsValueVal,       // This is an instance of MetadataAsValue
-    InlineAsmVal,             // This is an instance of InlineAsm
-    InstructionVal,           // This is an instance of Instruction
-    // Enum values starting at InstructionVal are used for Instructions;
-    // don't add new values here!
+#define HANDLE_VALUE(Name) Name##Val,
+#include "llvm/IR/Value.def"
 
     // Markers:
-    ConstantFirstVal = FunctionVal,
-    ConstantLastVal  = ConstantPointerNullVal
+#define HANDLE_CONSTANT_MARKER(Marker, Constant) Marker = Constant##Val,
+#include "llvm/IR/Value.def"
   };
 
   /// \brief Return an ID for the concrete type of this object.
@@ -716,6 +704,6 @@ inline LLVMValueRef *wrap(const Value **Vals) {
   return reinterpret_cast<LLVMValueRef*>(const_cast<Value**>(Vals));
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/ValueHandle.h b/include/llvm/IR/ValueHandle.h
index e92aed3..53fa80a 100644
--- a/include/llvm/IR/ValueHandle.h
+++ b/include/llvm/IR/ValueHandle.h
@@ -159,11 +159,13 @@ public:
 
 // Specialize simplify_type to allow WeakVH to participate in
 // dyn_cast, isa, etc.
-template<> struct simplify_type<WeakVH> {
-  typedef Value* SimpleType;
-  static SimpleType getSimplifiedValue(WeakVH &WVH) {
-    return WVH;
-  }
+template <> struct simplify_type<WeakVH> {
+  typedef Value *SimpleType;
+  static SimpleType getSimplifiedValue(WeakVH &WVH) { return WVH; }
+};
+template <> struct simplify_type<const WeakVH> {
+  typedef Value *SimpleType;
+  static SimpleType getSimplifiedValue(const WeakVH &WVH) { return WVH; }
 };
 
 /// \brief Value handle that asserts if the Value is deleted.
@@ -380,6 +382,6 @@ public:
   virtual void allUsesReplacedWith(Value *) {}
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/ValueSymbolTable.h b/include/llvm/IR/ValueSymbolTable.h
index 8219f50..bf1fade 100644
--- a/include/llvm/IR/ValueSymbolTable.h
+++ b/include/llvm/IR/ValueSymbolTable.h
@@ -128,6 +128,6 @@ private:
 /// @}
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Verifier.h b/include/llvm/IR/Verifier.h
index 7da4d97..89039d2 100644
--- a/include/llvm/IR/Verifier.h
+++ b/include/llvm/IR/Verifier.h
@@ -72,6 +72,6 @@ public:
   static StringRef name() { return "VerifierPass"; }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/IRReader/IRReader.h b/include/llvm/IRReader/IRReader.h
index bdaea6d..2d9ace0 100644
--- a/include/llvm/IRReader/IRReader.h
+++ b/include/llvm/IRReader/IRReader.h
@@ -43,6 +43,6 @@ std::unique_ptr<Module> parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err,
 /// for it.
 std::unique_ptr<Module> parseIRFile(StringRef Filename, SMDiagnostic &Err,
                                     LLVMContext &Context);
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 33ffadb..74fbc0f 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -302,6 +302,6 @@ void initializePlaceSafepointsPass(PassRegistry&);
 void initializeDwarfEHPreparePass(PassRegistry&);
 void initializeFloat2IntPass(PassRegistry&);
 void initializeLoopDistributePass(PassRegistry&);
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/LTO/LTOCodeGenerator.h b/include/llvm/LTO/LTOCodeGenerator.h
index c079f79..0c46fc0 100644
--- a/include/llvm/LTO/LTOCodeGenerator.h
+++ b/include/llvm/LTO/LTOCodeGenerator.h
@@ -177,5 +177,5 @@ private:
   bool ShouldInternalize = true;
   bool ShouldEmbedUselists = false;
 };
-} // namespace llvm
+}
 #endif
diff --git a/include/llvm/LTO/LTOModule.h b/include/llvm/LTO/LTOModule.h
index c2eb362..c4e2be6 100644
--- a/include/llvm/LTO/LTOModule.h
+++ b/include/llvm/LTO/LTOModule.h
@@ -47,12 +47,11 @@ private:
 
   std::unique_ptr<LLVMContext> OwnedContext;
 
+  std::string LinkerOpts;
+
   std::unique_ptr<object::IRObjectFile> IRFile;
   std::unique_ptr<TargetMachine> _target;
-  StringSet<>                             _linkeropt_strings;
-  std::vector<const char *>               _deplibs;
-  std::vector<const char *>               _linkeropts;
-  std::vector<NameAndAttributes>          _symbols;
+  std::vector<NameAndAttributes> _symbols;
 
   // _defines and _undefines only needed to disambiguate tentative definitions
   StringSet<>                             _defines;
@@ -149,28 +148,8 @@ public:
     return nullptr;
   }
 
-  /// Get the number of dependent libraries
-  uint32_t getDependentLibraryCount() {
-    return _deplibs.size();
-  }
-
-  /// Get the dependent library at the specified index.
-  const char *getDependentLibrary(uint32_t index) {
-    if (index < _deplibs.size())
-      return _deplibs[index];
-    return nullptr;
-  }
-
-  /// Get the number of linker options
-  uint32_t getLinkerOptCount() {
-    return _linkeropts.size();
-  }
-
-  /// Get the linker option at the specified index.
-  const char *getLinkerOpt(uint32_t index) {
-    if (index < _linkeropts.size())
-      return _linkeropts[index];
-    return nullptr;
+  const char *getLinkerOpts() {
+    return LinkerOpts.c_str();
   }
 
   const std::vector<const char*> &getAsmUndefinedRefs() {
@@ -224,5 +203,5 @@ private:
   static LTOModule *makeLTOModule(MemoryBufferRef Buffer, TargetOptions options,
                                   std::string &errMsg, LLVMContext *Context);
 };
-} // namespace llvm
+}
 #endif
diff --git a/include/llvm/LibDriver/LibDriver.h b/include/llvm/LibDriver/LibDriver.h
index 99c783c..aaaa7b7 100644
--- a/include/llvm/LibDriver/LibDriver.h
+++ b/include/llvm/LibDriver/LibDriver.h
@@ -15,9 +15,11 @@
 #ifndef LLVM_LIBDRIVER_LIBDRIVER_H
 #define LLVM_LIBDRIVER_LIBDRIVER_H
 
+#include "llvm/ADT/ArrayRef.h"
+
 namespace llvm {
 
-int libDriverMain(int argc, const char **argv);
+int libDriverMain(llvm::ArrayRef<const char*> ARgs);
 
 }
 
diff --git a/include/llvm/LineEditor/LineEditor.h b/include/llvm/LineEditor/LineEditor.h
index e644b19..bb106f8 100644
--- a/include/llvm/LineEditor/LineEditor.h
+++ b/include/llvm/LineEditor/LineEditor.h
@@ -148,6 +148,6 @@ private:
   std::unique_ptr<const CompleterConcept> Completer;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Linker/Linker.h b/include/llvm/Linker/Linker.h
index de23acb..c43b90e 100644
--- a/include/llvm/Linker/Linker.h
+++ b/include/llvm/Linker/Linker.h
@@ -90,6 +90,6 @@ private:
   DiagnosticHandlerFunction DiagnosticHandler;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h
index 07bba90..2bfad2d 100644
--- a/include/llvm/MC/MCAsmBackend.h
+++ b/include/llvm/MC/MCAsmBackend.h
@@ -138,6 +138,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index f72959a5..9bb0fa6 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -39,7 +39,7 @@ enum class EncodingType {
   X86,     /// Windows x86, uses no CFI, just EH tables
   MIPS = Alpha,
 };
-} // namespace WinEH
+}
 
 enum class ExceptionHandling {
   None,     /// No exception support
@@ -555,6 +555,6 @@ public:
 
   bool shouldUseLogicalShr() const { return UseLogicalShr; }
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/MC/MCAsmInfoCOFF.h b/include/llvm/MC/MCAsmInfoCOFF.h
index 24f03e4..56444f3 100644
--- a/include/llvm/MC/MCAsmInfoCOFF.h
+++ b/include/llvm/MC/MCAsmInfoCOFF.h
@@ -30,7 +30,7 @@ namespace llvm {
   protected:
     explicit MCAsmInfoGNUCOFF();
   };
-} // namespace llvm
+}
 
 
 #endif // LLVM_MC_MCASMINFOCOFF_H
diff --git a/include/llvm/MC/MCCodeEmitter.h b/include/llvm/MC/MCCodeEmitter.h
index b4445d1..b6c1915 100644
--- a/include/llvm/MC/MCCodeEmitter.h
+++ b/include/llvm/MC/MCCodeEmitter.h
@@ -41,6 +41,6 @@ public:
                                  const MCSubtargetInfo &STI) const = 0;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index 855013a..01f694d 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -132,6 +132,6 @@ public:
 MCObjectWriter *createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
                                       raw_pwrite_stream &OS,
                                       bool IsLittleEndian);
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCExternalSymbolizer.h b/include/llvm/MC/MCExternalSymbolizer.h
index a88b32e..2c7d237 100644
--- a/include/llvm/MC/MCExternalSymbolizer.h
+++ b/include/llvm/MC/MCExternalSymbolizer.h
@@ -53,6 +53,6 @@ public:
                                        uint64_t Address) override;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/MC/MCFixedLenDisassembler.h b/include/llvm/MC/MCFixedLenDisassembler.h
index 9fbdf9c..ad99943 100644
--- a/include/llvm/MC/MCFixedLenDisassembler.h
+++ b/include/llvm/MC/MCFixedLenDisassembler.h
@@ -26,7 +26,7 @@ enum DecoderOps {
   OPC_Fail              // OPC_Fail()
 };
 
-} // namespace MCD
+} // namespace MCDecode
 } // namespace llvm
 
 #endif
diff --git a/include/llvm/MC/MCFixup.h b/include/llvm/MC/MCFixup.h
index c09f55a..8ab477c 100644
--- a/include/llvm/MC/MCFixup.h
+++ b/include/llvm/MC/MCFixup.h
@@ -108,6 +108,6 @@ public:
   SMLoc getLoc() const { return Loc; }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCFixupKindInfo.h b/include/llvm/MC/MCFixupKindInfo.h
index b779781..58183bd 100644
--- a/include/llvm/MC/MCFixupKindInfo.h
+++ b/include/llvm/MC/MCFixupKindInfo.h
@@ -38,6 +38,6 @@ struct MCFixupKindInfo {
   unsigned Flags;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCInstrAnalysis.h b/include/llvm/MC/MCInstrAnalysis.h
index a0a6810..8f5159e 100644
--- a/include/llvm/MC/MCInstrAnalysis.h
+++ b/include/llvm/MC/MCInstrAnalysis.h
@@ -66,6 +66,6 @@ public:
                  uint64_t &Target) const;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index fe67e44..3209a2c 100644
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -49,7 +49,7 @@ enum OperandType {
   OPERAND_PCREL = 4,
   OPERAND_FIRST_TARGET = 5
 };
-} // namespace MCOI
+}
 
 /// \brief This holds information about one operand of a machine instruction,
 /// indicating the register class for register operands, etc.
@@ -128,7 +128,7 @@ enum Flag {
   InsertSubreg,
   Convergent
 };
-} // namespace MCID
+}
 
 /// \brief Describe properties that are true of each instruction in the target
 /// description file.  This captures information about side effects, register
diff --git a/include/llvm/MC/MCInstrInfo.h b/include/llvm/MC/MCInstrInfo.h
index d75c4ca..70c8658 100644
--- a/include/llvm/MC/MCInstrInfo.h
+++ b/include/llvm/MC/MCInstrInfo.h
@@ -54,6 +54,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCInstrItineraries.h b/include/llvm/MC/MCInstrItineraries.h
index a58bd7b..161705d 100644
--- a/include/llvm/MC/MCInstrItineraries.h
+++ b/include/llvm/MC/MCInstrItineraries.h
@@ -234,6 +234,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h
index 10b7905..175d73e 100644
--- a/include/llvm/MC/MCMachObjectWriter.h
+++ b/include/llvm/MC/MCMachObjectWriter.h
@@ -264,6 +264,6 @@ MCObjectWriter *createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
                                        raw_pwrite_stream &OS,
                                        bool IsLittleEndian);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index ca7fba5..2211673 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -188,6 +188,6 @@ public:
   /// @}
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
index 0bf8aa6..71f15b3 100644
--- a/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -190,6 +190,6 @@ public:
   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index c840958..ac8706d 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -197,12 +197,23 @@ public:
   /// \brief Ensure that we have a valid section set in the streamer. Otherwise,
   /// report an error and switch to .text.
   virtual void checkForValidSection() = 0;
+
+  /// \brief Parse an arbitrary expression of a specified parenthesis depth,
+  /// assuming that the initial '(' characters have already been consumed.
+  ///
+  /// \param ParenDepth - Specifies how many trailing expressions outside the
+  /// current parentheses we have to parse.
+  /// \param Res - The value of the expression. The result is undefined
+  /// on error.
+  /// \return - False on success.
+  virtual bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
+                                     SMLoc &EndLoc) = 0;
 };
 
 /// \brief Create an MCAsmParser instance.
 MCAsmParser *createMCAsmParser(SourceMgr &, MCContext &, MCStreamer &,
                                const MCAsmInfo &);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCParser/MCAsmParserExtension.h b/include/llvm/MC/MCParser/MCAsmParserExtension.h
index 46f716e..077fd21 100644
--- a/include/llvm/MC/MCParser/MCAsmParserExtension.h
+++ b/include/llvm/MC/MCParser/MCAsmParserExtension.h
@@ -84,6 +84,6 @@ public:
   /// @}
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCParser/MCAsmParserUtils.h b/include/llvm/MC/MCParser/MCAsmParserUtils.h
new file mode 100644
index 0000000..9834fe9
--- /dev/null
+++ b/include/llvm/MC/MCParser/MCAsmParserUtils.h
@@ -0,0 +1,33 @@
+//===------ llvm/MC/MCAsmParserUtils.h - Asm Parser Utilities ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCPARSER_MCASMPARSERUTILS_H
+#define LLVM_MC_MCPARSER_MCASMPARSERUTILS_H
+
+namespace llvm {
+
+class MCAsmParser;
+class MCExpr;
+class MCSymbol;
+class StringRef;
+
+namespace MCParserUtils {
+
+/// Parse a value expression and return whether it can be assigned to a symbol
+/// with the given name.
+///
+/// On success, returns false and sets the Symbol and Value output parameters.
+bool parseAssignmentExpression(StringRef Name, bool allow_redef,
+                               MCAsmParser &Parser, MCSymbol *&Symbol,
+                               const MCExpr *&Value);
+
+} // namespace MCParserUtils
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h
index 7a41abc..8e25ee1 100644
--- a/include/llvm/MC/MCRegisterInfo.h
+++ b/include/llvm/MC/MCRegisterInfo.h
@@ -686,6 +686,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCRelocationInfo.h b/include/llvm/MC/MCRelocationInfo.h
index 8fc5c9f..40e0217 100644
--- a/include/llvm/MC/MCRelocationInfo.h
+++ b/include/llvm/MC/MCRelocationInfo.h
@@ -50,6 +50,6 @@ public:
                                                      unsigned VariantKind);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h
index 635eab9..1adfedd 100644
--- a/include/llvm/MC/MCSchedule.h
+++ b/include/llvm/MC/MCSchedule.h
@@ -245,6 +245,6 @@ struct MCSchedModel {
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 50d8d31..6b9b8a1 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -78,7 +78,7 @@ public:
   MCTargetStreamer(MCStreamer &S);
   virtual ~MCTargetStreamer();
 
-  const MCStreamer &getStreamer() { return Streamer; }
+  MCStreamer &getStreamer() { return Streamer; }
 
   // Allow a target to add behavior to the EmitLabel of MCStreamer.
   virtual void emitLabel(MCSymbol *Symbol);
diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h
index 0a23306..b8ad02f 100644
--- a/include/llvm/MC/MCSubtargetInfo.h
+++ b/include/llvm/MC/MCSubtargetInfo.h
@@ -160,6 +160,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index 0acf6e5..17e6b85 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_MC_MCSYMBOL_H
 #define LLVM_MC_MCSYMBOL_H
 
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/MC/MCAssembler.h"
@@ -46,6 +47,15 @@ protected:
     SymbolKindMachO,
   };
 
+  /// A symbol can contain an Offset, or Value, or be Common, but never more
+  /// than one of these.
+  enum Contents : uint8_t {
+    SymContentsUnset,
+    SymContentsOffset,
+    SymContentsVariable,
+    SymContentsCommon,
+  };
+
   // Special sentinal value for the absolute pseudo section.
   //
   // FIXME: Use a PointerInt wrapper for this?
@@ -62,10 +72,12 @@ protected:
   ///
   /// If this is a fragment, then it gives the fragment this symbol's value is
   /// relative to, if any.
-  mutable PointerUnion<MCSection *, MCFragment *> SectionOrFragment;
-
-  /// Value - If non-null, the value for a variable symbol.
-  const MCExpr *Value;
+  ///
+  /// For the 'HasName' integer, this is true if this symbol is named.
+  /// A named symbol will have a pointer to the name allocated in the bytes
+  /// immediately prior to the MCSymbol.
+  mutable PointerIntPair<PointerUnion<MCSection *, MCFragment *>, 1>
+      SectionOrFragmentAndHasName;
 
   /// IsTemporary - True if this is an assembler temporary label, which
   /// typically does not survive in the .o file's symbol table.  Usually
@@ -86,11 +98,6 @@ protected:
   /// This symbol is private extern.
   mutable unsigned IsPrivateExtern : 1;
 
-  /// True if this symbol is named.
-  /// A named symbol will have a pointer to the name allocated in the bytes
-  /// immediately prior to the MCSymbol.
-  unsigned HasName : 1;
-
   /// LLVM RTTI discriminator. This is actually a SymbolKind enumerator, but is
   /// unsigned to avoid sign extension and achieve better bitpacking with MSVC.
   unsigned Kind : 2;
@@ -98,6 +105,23 @@ protected:
   /// True if we have created a relocation that uses this symbol.
   mutable unsigned IsUsedInReloc : 1;
 
+  /// This is actually a Contents enumerator, but is unsigned to avoid sign
+  /// extension and achieve better bitpacking with MSVC.
+  unsigned SymbolContents : 2;
+
+  /// The alignment of the symbol, if it is 'common', or -1.
+  ///
+  /// The alignment is stored as log2(align) + 1.  This allows all values from
+  /// 0 to 2^31 to be stored which is every power of 2 representable by an
+  /// unsigned.
+  static const unsigned NumCommonAlignmentBits = 5;
+  unsigned CommonAlignLog2 : NumCommonAlignmentBits;
+
+  /// The Flags field is used by object file implementations to store
+  /// additional per symbol information which is not easily classified.
+  static const unsigned NumFlagsBits = 16;
+  mutable uint32_t Flags : NumFlagsBits;
+
   /// Index field, for use by the object file implementation.
   mutable uint32_t Index = 0;
 
@@ -107,16 +131,10 @@ protected:
 
     /// The size of the symbol, if it is 'common'.
     uint64_t CommonSize;
-  };
 
-  /// The alignment of the symbol, if it is 'common', or -1.
-  //
-  // FIXME: Pack this in with other fields?
-  unsigned CommonAlign = -1U;
-
-  /// The Flags field is used by object file implementations to store
-  /// additional per symbol information which is not easily classified.
-  mutable uint32_t Flags = 0;
+    /// If non-null, the value for a variable symbol.
+    const MCExpr *Value;
+  };
 
 protected: // MCContext creates and uniques these.
   friend class MCExpr;
@@ -132,11 +150,12 @@ protected: // MCContext creates and uniques these.
   } NameEntryStorageTy;
 
   MCSymbol(SymbolKind Kind, const StringMapEntry<bool> *Name, bool isTemporary)
-      : Value(nullptr), IsTemporary(isTemporary), IsRedefinable(false),
-        IsUsed(false), IsRegistered(false), IsExternal(false),
-        IsPrivateExtern(false), HasName(!!Name), Kind(Kind),
-        IsUsedInReloc(false) {
+      : IsTemporary(isTemporary), IsRedefinable(false), IsUsed(false),
+        IsRegistered(false), IsExternal(false), IsPrivateExtern(false),
+        Kind(Kind), IsUsedInReloc(false), SymbolContents(SymContentsUnset),
+        CommonAlignLog2(0), Flags(0) {
     Offset = 0;
+    SectionOrFragmentAndHasName.setInt(!!Name);
     if (Name)
       getNameEntryPtr() = Name;
   }
@@ -163,16 +182,17 @@ private:
   MCSection *getSectionPtr() const {
     if (MCFragment *F = getFragment())
       return F->getParent();
+    const auto &SectionOrFragment = SectionOrFragmentAndHasName.getPointer();
     assert(!SectionOrFragment.is<MCFragment *>() && "Section or null expected");
     MCSection *Section = SectionOrFragment.dyn_cast<MCSection *>();
-    if (Section || !Value)
+    if (Section || !isVariable())
       return Section;
-    return Section = Value->findAssociatedSection();
+    return Section = getVariableValue()->findAssociatedSection();
   }
 
   /// \brief Get a reference to the name field.  Requires that we have a name
   const StringMapEntry<bool> *&getNameEntryPtr() {
-    assert(HasName && "Name is required");
+    assert(SectionOrFragmentAndHasName.getInt() && "Name is required");
     NameEntryStorageTy *Name = reinterpret_cast<NameEntryStorageTy *>(this);
     return (*(Name - 1)).NameEntry;
   }
@@ -183,7 +203,7 @@ private:
 public:
   /// getName - Get the symbol name.
   StringRef getName() const {
-    if (!HasName)
+    if (!SectionOrFragmentAndHasName.getInt())
       return StringRef();
 
     return getNameEntryPtr()->first();
@@ -212,8 +232,11 @@ public:
   /// \brief Prepare this symbol to be redefined.
   void redefineIfPossible() {
     if (IsRedefinable) {
-      Value = nullptr;
-      SectionOrFragment = nullptr;
+      if (SymbolContents == SymContentsVariable) {
+        Value = nullptr;
+        SymbolContents = SymContentsUnset;
+      }
+      setUndefined();
       IsRedefinable = false;
     }
   }
@@ -246,13 +269,15 @@ public:
   /// Mark the symbol as defined in the section \p S.
   void setSection(MCSection &S) {
     assert(!isVariable() && "Cannot set section of variable");
-    assert(!SectionOrFragment.is<MCFragment *>() && "Section or null expected");
-    SectionOrFragment = &S;
+    assert(!SectionOrFragmentAndHasName.getPointer().is<MCFragment *>() &&
+           "Section or null expected");
+    SectionOrFragmentAndHasName.setPointer(&S);
   }
 
   /// Mark the symbol as undefined.
   void setUndefined() {
-    SectionOrFragment = nullptr;
+    SectionOrFragmentAndHasName.setPointer(
+        PointerUnion<MCSection *, MCFragment *>());
   }
 
   bool isELF() const { return Kind == SymbolKindELF; }
@@ -266,7 +291,9 @@ public:
   /// @{
 
   /// isVariable - Check if this is a variable symbol.
-  bool isVariable() const { return Value != nullptr; }
+  bool isVariable() const {
+    return SymbolContents == SymContentsVariable;
+  }
 
   /// getVariableValue() - Get the value for variable symbols.
   const MCExpr *getVariableValue() const {
@@ -290,12 +317,17 @@ public:
   }
 
   uint64_t getOffset() const {
-    assert(!isCommon());
+    assert((SymbolContents == SymContentsUnset ||
+            SymbolContents == SymContentsOffset) &&
+           "Cannot get offset for a common/variable symbol");
     return Offset;
   }
   void setOffset(uint64_t Value) {
-    assert(!isCommon());
+    assert((SymbolContents == SymContentsUnset ||
+            SymbolContents == SymContentsOffset) &&
+           "Cannot set offset for a common/variable symbol");
     Offset = Value;
+    SymbolContents = SymContentsOffset;
   }
 
   /// Return the size of a 'common' symbol.
@@ -311,13 +343,20 @@ public:
   void setCommon(uint64_t Size, unsigned Align) {
     assert(getOffset() == 0);
     CommonSize = Size;
-    CommonAlign = Align;
+    SymbolContents = SymContentsCommon;
+
+    assert((!Align || isPowerOf2_32(Align)) &&
+           "Alignment must be a power of 2");
+    unsigned Log2Align = Log2_32(Align) + 1;
+    assert(Log2Align < (1U << NumCommonAlignmentBits) &&
+           "Out of range alignment");
+    CommonAlignLog2 = Log2Align;
   }
 
   ///  Return the alignment of a 'common' symbol.
   unsigned getCommonAlignment() const {
     assert(isCommon() && "Not a 'common' symbol!");
-    return CommonAlign;
+    return CommonAlignLog2 ? (1U << (CommonAlignLog2 - 1)) : 0;
   }
 
   /// Declare this symbol as being 'common'.
@@ -328,7 +367,7 @@ public:
   bool declareCommon(uint64_t Size, unsigned Align) {
     assert(isCommon() || getOffset() == 0);
     if(isCommon()) {
-      if(CommonSize != Size || CommonAlign != Align)
+      if(CommonSize != Size || getCommonAlignment() != Align)
        return true;
     } else
       setCommon(Size, Align);
@@ -336,13 +375,15 @@ public:
   }
 
   /// Is this a 'common' symbol.
-  bool isCommon() const { return CommonAlign != -1U; }
+  bool isCommon() const {
+    return SymbolContents == SymContentsCommon;
+  }
 
   MCFragment *getFragment() const {
-    return SectionOrFragment.dyn_cast<MCFragment *>();
+    return SectionOrFragmentAndHasName.getPointer().dyn_cast<MCFragment *>();
   }
   void setFragment(MCFragment *Value) const {
-    SectionOrFragment = Value;
+    SectionOrFragmentAndHasName.setPointer(Value);
   }
 
   bool isExternal() const { return IsExternal; }
@@ -362,10 +403,14 @@ protected:
   uint32_t getFlags() const { return Flags; }
 
   /// Set the (implementation defined) symbol flags.
-  void setFlags(uint32_t Value) const { Flags = Value; }
+  void setFlags(uint32_t Value) const {
+    assert(Value < (1U << NumFlagsBits) && "Out of range flags");
+    Flags = Value;
+  }
 
   /// Modify the flags via a mask
   void modifyFlags(uint32_t Value, uint32_t Mask) const {
+    assert(Value < (1U << NumFlagsBits) && "Out of range flags");
     Flags = (Flags & ~Mask) | Value;
   }
 };
diff --git a/include/llvm/MC/MCSymbolCOFF.h b/include/llvm/MC/MCSymbolCOFF.h
index 3b853f7..2172c67 100644
--- a/include/llvm/MC/MCSymbolCOFF.h
+++ b/include/llvm/MC/MCSymbolCOFF.h
@@ -59,6 +59,6 @@ public:
 
   static bool classof(const MCSymbol *S) { return S->isCOFF(); }
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/MC/MCSymbolELF.h b/include/llvm/MC/MCSymbolELF.h
index b0ce3fe..bbcd22e 100644
--- a/include/llvm/MC/MCSymbolELF.h
+++ b/include/llvm/MC/MCSymbolELF.h
@@ -49,6 +49,6 @@ public:
 private:
   void setIsBindingSet() const;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/MC/MCSymbolMachO.h b/include/llvm/MC/MCSymbolMachO.h
index a162080..166ae9e 100644
--- a/include/llvm/MC/MCSymbolMachO.h
+++ b/include/llvm/MC/MCSymbolMachO.h
@@ -118,6 +118,6 @@ public:
 
   static bool classof(const MCSymbol *S) { return S->isMachO(); }
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/MC/MCSymbolizer.h b/include/llvm/MC/MCSymbolizer.h
index 41c1b0d..2ef1767 100644
--- a/include/llvm/MC/MCSymbolizer.h
+++ b/include/llvm/MC/MCSymbolizer.h
@@ -80,6 +80,6 @@ public:
                                                uint64_t Address) = 0;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h
index 4ee53ad..36db391 100644
--- a/include/llvm/MC/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCTargetAsmParser.h
@@ -201,6 +201,6 @@ public:
   virtual void onLabelParsed(MCSymbol *Symbol) { };
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCWin64EH.h b/include/llvm/MC/MCWin64EH.h
index f2211d7..0e81a19 100644
--- a/include/llvm/MC/MCWin64EH.h
+++ b/include/llvm/MC/MCWin64EH.h
@@ -57,7 +57,7 @@ public:
   void Emit(MCStreamer &Streamer) const override;
   void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI) const override;
 };
-} // namespace Win64EH
+}
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/MC/MCWinCOFFObjectWriter.h b/include/llvm/MC/MCWinCOFFObjectWriter.h
index edf87f5..e2e95c7 100644
--- a/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -42,6 +42,6 @@ class raw_pwrite_stream;
   /// \returns The constructed object writer.
   MCObjectWriter *createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
                                             raw_pwrite_stream &OS);
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCWinCOFFStreamer.h b/include/llvm/MC/MCWinCOFFStreamer.h
index fcca838..6fbc754 100644
--- a/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/include/llvm/MC/MCWinCOFFStreamer.h
@@ -75,7 +75,7 @@ protected:
 private:
   LLVM_ATTRIBUTE_NORETURN void FatalError(const Twine &Msg) const;
 };
-} // namespace llvm
+}
 
 #endif
 
diff --git a/include/llvm/MC/MCWinEH.h b/include/llvm/MC/MCWinEH.h
index d22791e..723d7a3 100644
--- a/include/llvm/MC/MCWinEH.h
+++ b/include/llvm/MC/MCWinEH.h
@@ -78,7 +78,7 @@ public:
   virtual void Emit(MCStreamer &Streamer) const = 0;
   virtual void EmitUnwindInfo(MCStreamer &Streamer, FrameInfo *FI) const = 0;
 };
-} // namespace WinEH
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/MC/MachineLocation.h b/include/llvm/MC/MachineLocation.h
index 1c42182..2a18615 100644
--- a/include/llvm/MC/MachineLocation.h
+++ b/include/llvm/MC/MachineLocation.h
@@ -78,6 +78,6 @@ inline bool operator!=(const MachineLocation &LHS, const MachineLocation &RHS) {
   return !(LHS == RHS);
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/StringTableBuilder.h b/include/llvm/MC/StringTableBuilder.h
index 700a8a6..897d449 100644
--- a/include/llvm/MC/StringTableBuilder.h
+++ b/include/llvm/MC/StringTableBuilder.h
@@ -62,6 +62,6 @@ private:
   }
 };
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/MC/YAML.h b/include/llvm/MC/YAML.h
index ae83298..383cdc6 100644
--- a/include/llvm/MC/YAML.h
+++ b/include/llvm/MC/YAML.h
@@ -89,6 +89,6 @@ template <> struct ScalarTraits<BinaryRef> {
   static StringRef input(StringRef, void *, BinaryRef &);
   static bool mustQuote(StringRef S) { return needsQuotes(S); }
 };
-} // namespace yaml
-} // namespace llvm
+}
+}
 #endif
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index 3a52a9d..8da6919 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -217,7 +217,7 @@ private:
   unsigned IsThin : 1;
 };
 
-} // namespace object
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/Object/ArchiveWriter.h b/include/llvm/Object/ArchiveWriter.h
index 8a394fa..1616e46 100644
--- a/include/llvm/Object/ArchiveWriter.h
+++ b/include/llvm/Object/ArchiveWriter.h
@@ -46,6 +46,6 @@ std::pair<StringRef, std::error_code>
 writeArchive(StringRef ArcName, std::vector<NewArchiveIterator> &NewMembers,
              bool WriteSymtab);
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h
index 949edf8..a3d6d0d 100644
--- a/include/llvm/Object/Binary.h
+++ b/include/llvm/Object/Binary.h
@@ -178,7 +178,7 @@ template <typename T> const T* OwningBinary<T>::getBinary() const {
 }
 
 ErrorOr<OwningBinary<Binary>> createBinary(StringRef Path);
-} // namespace object
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index ad657b5..fc60582 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -249,6 +249,15 @@ struct coff_symbol {
 typedef coff_symbol<support::ulittle16_t> coff_symbol16;
 typedef coff_symbol<support::ulittle32_t> coff_symbol32;
 
+// Contains only common parts of coff_symbol16 and coff_symbol32.
+struct coff_symbol_generic {
+  union {
+    char ShortName[COFF::NameSize];
+    StringTableOffset Offset;
+  } Name;
+  support::ulittle32_t Value;
+};
+
 class COFFSymbolRef {
 public:
   COFFSymbolRef(const coff_symbol16 *CS) : CS16(CS), CS32(nullptr) {}
@@ -259,6 +268,12 @@ public:
     return CS16 ? static_cast<const void *>(CS16) : CS32;
   }
 
+  const coff_symbol_generic *getGeneric() const {
+    if (CS16)
+      return reinterpret_cast<const coff_symbol_generic *>(CS16);
+    return reinterpret_cast<const coff_symbol_generic *>(CS32);
+  }
+
   friend bool operator<(COFFSymbolRef A, COFFSymbolRef B) {
     return A.getRawPtr() < B.getRawPtr();
   }
@@ -493,6 +508,29 @@ struct coff_load_configuration32 {
   support::ulittle32_t SEHandlerCount;
 };
 
+struct coff_load_configuration64 {
+  support::ulittle32_t Characteristics;
+  support::ulittle32_t TimeDateStamp;
+  support::ulittle16_t MajorVersion;
+  support::ulittle16_t MinorVersion;
+  support::ulittle32_t GlobalFlagsClear;
+  support::ulittle32_t GlobalFlagsSet;
+  support::ulittle32_t CriticalSectionDefaultTimeout;
+  support::ulittle32_t DeCommitFreeBlockThreshold;
+  support::ulittle32_t DeCommitTotalFreeThreshold;
+  support::ulittle32_t LockPrefixTable;
+  support::ulittle32_t MaximumAllocationSize;
+  support::ulittle32_t VirtualMemoryThreshold;
+  support::ulittle32_t ProcessAffinityMask;
+  support::ulittle32_t ProcessHeapFlags;
+  support::ulittle16_t CSDVersion;
+  support::ulittle16_t Reserved;
+  support::ulittle32_t EditList;
+  support::ulittle64_t SecurityCookie;
+  support::ulittle64_t SEHandlerTable;
+  support::ulittle64_t SEHandlerCount;
+};
+
 struct coff_runtime_function_x64 {
   support::ulittle32_t BeginAddress;
   support::ulittle32_t EndAddress;
@@ -609,14 +647,13 @@ public:
   }
 protected:
   void moveSymbolNext(DataRefImpl &Symb) const override;
-  std::error_code getSymbolName(DataRefImpl Symb,
-                                StringRef &Res) const override;
+  ErrorOr<StringRef> getSymbolName(DataRefImpl Symb) const override;
   std::error_code getSymbolAddress(DataRefImpl Symb,
                                    uint64_t &Res) const override;
-  uint64_t getSymbolSize(DataRefImpl Symb) const override;
+  uint64_t getSymbolValue(DataRefImpl Symb) const override;
+  uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
   uint32_t getSymbolFlags(DataRefImpl Symb) const override;
-  std::error_code getSymbolType(DataRefImpl Symb,
-                                SymbolRef::Type &Res) const override;
+  SymbolRef::Type getSymbolType(DataRefImpl Symb) const override;
   std::error_code getSymbolSection(DataRefImpl Symb,
                                    section_iterator &Res) const override;
   void moveSectionNext(DataRefImpl &Sec) const override;
@@ -631,21 +668,17 @@ protected:
   bool isSectionData(DataRefImpl Sec) const override;
   bool isSectionBSS(DataRefImpl Sec) const override;
   bool isSectionVirtual(DataRefImpl Sec) const override;
-  bool sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb) const override;
   relocation_iterator section_rel_begin(DataRefImpl Sec) const override;
   relocation_iterator section_rel_end(DataRefImpl Sec) const override;
 
   void moveRelocationNext(DataRefImpl &Rel) const override;
-  std::error_code getRelocationAddress(DataRefImpl Rel,
-                                       uint64_t &Res) const override;
-  std::error_code getRelocationOffset(DataRefImpl Rel,
-                                      uint64_t &Res) const override;
+  ErrorOr<uint64_t> getRelocationAddress(DataRefImpl Rel) const override;
+  uint64_t getRelocationOffset(DataRefImpl Rel) const override;
   symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override;
-  std::error_code getRelocationType(DataRefImpl Rel,
-                                    uint64_t &Res) const override;
-  std::error_code
-  getRelocationTypeName(DataRefImpl Rel,
-                        SmallVectorImpl<char> &Result) const override;
+  uint64_t getRelocationType(DataRefImpl Rel) const override;
+  void getRelocationTypeName(DataRefImpl Rel,
+                             SmallVectorImpl<char> &Result) const override;
+
 public:
   COFFObjectFile(MemoryBufferRef Object, std::error_code &EC);
   basic_symbol_iterator symbol_begin_impl() const override;
@@ -657,6 +690,8 @@ public:
   COFFSymbolRef getCOFFSymbol(const DataRefImpl &Ref) const;
   COFFSymbolRef getCOFFSymbol(const SymbolRef &Symbol) const;
   const coff_relocation *getCOFFRelocation(const RelocationRef &Reloc) const;
+  unsigned getSectionID(SectionRef Sec) const;
+  unsigned getSymbolSectionID(SymbolRef Sym) const;
 
   uint8_t getBytesInAddress() const override;
   StringRef getFileFormatName() const override;
@@ -720,6 +755,8 @@ public:
     return std::error_code();
   }
   std::error_code getSymbolName(COFFSymbolRef Symbol, StringRef &Res) const;
+  std::error_code getSymbolName(const coff_symbol_generic *Symbol,
+                                StringRef &Res) const;
 
   ArrayRef<uint8_t> getSymbolAuxData(COFFSymbolRef Symbol) const;
 
@@ -731,6 +768,9 @@ public:
     llvm_unreachable("null symbol table pointer!");
   }
 
+  iterator_range<const coff_relocation *>
+  getRelocations(const coff_section *Sec) const;
+
   std::error_code getSectionName(const coff_section *Sec, StringRef &Res) const;
   uint64_t getSectionSize(const coff_section *Sec) const;
   std::error_code getSectionContents(const coff_section *Sec,
diff --git a/include/llvm/Object/COFFYAML.h b/include/llvm/Object/COFFYAML.h
index 5ba3db3..12a2522 100644
--- a/include/llvm/Object/COFFYAML.h
+++ b/include/llvm/Object/COFFYAML.h
@@ -37,7 +37,7 @@ inline DLLCharacteristics operator|(DLLCharacteristics a,
   uint16_t Ret = static_cast<uint16_t>(a) | static_cast<uint16_t>(b);
   return static_cast<DLLCharacteristics>(Ret);
 }
-} // namespace COFF
+}
 
 // The structure of the yaml files is not an exact 1:1 match to COFF. In order
 // to use yaml::IO, we use these structures which are closer to the source.
@@ -87,8 +87,8 @@ namespace COFFYAML {
     std::vector<Symbol> Symbols;
     Object();
   };
-} // namespace COFFYAML
-} // namespace llvm
+}
+}
 
 LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Section)
 LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Symbol)
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index e87737d..3b0c548 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -143,8 +143,7 @@ public:
   typedef iterator_range<Elf_Dyn_Iter> Elf_Dyn_Range;
   typedef ELFEntityIterator<const Elf_Rela> Elf_Rela_Iter;
   typedef ELFEntityIterator<const Elf_Rel> Elf_Rel_Iter;
-  typedef ELFEntityIterator<const Elf_Shdr> Elf_Shdr_Iter;
-  typedef iterator_range<Elf_Shdr_Iter> Elf_Shdr_Range;
+  typedef iterator_range<const Elf_Shdr *> Elf_Shdr_Range;
 
   /// \brief Archive files are 2 byte aligned, so we need this for
   ///     PointerIntPair to work.
@@ -158,74 +157,7 @@ public:
     enum { NumLowBitsAvailable = 1 };
   };
 
-  class Elf_Sym_Iter {
-  public:
-    typedef ptrdiff_t difference_type;
-    typedef const Elf_Sym value_type;
-    typedef std::random_access_iterator_tag iterator_category;
-    typedef value_type &reference;
-    typedef value_type *pointer;
-
-    /// \brief Default construct iterator.
-    Elf_Sym_Iter() : EntitySize(0), Current(0, false) {}
-    Elf_Sym_Iter(uintX_t EntSize, const char *Start, bool IsDynamic)
-        : EntitySize(EntSize), Current(Start, IsDynamic) {}
-
-    reference operator*() {
-      assert(Current.getPointer() &&
-             "Attempted to dereference an invalid iterator!");
-      return *reinterpret_cast<pointer>(Current.getPointer());
-    }
-
-    pointer operator->() {
-      assert(Current.getPointer() &&
-             "Attempted to dereference an invalid iterator!");
-      return reinterpret_cast<pointer>(Current.getPointer());
-    }
-
-    bool operator==(const Elf_Sym_Iter &Other) {
-      return Current == Other.Current;
-    }
-
-    bool operator!=(const Elf_Sym_Iter &Other) { return !(*this == Other); }
-
-    Elf_Sym_Iter &operator++() {
-      assert(Current.getPointer() &&
-             "Attempted to increment an invalid iterator!");
-      Current.setPointer(Current.getPointer() + EntitySize);
-      return *this;
-    }
-
-    Elf_Sym_Iter operator++(int) {
-      Elf_Sym_Iter Tmp = *this;
-      ++*this;
-      return Tmp;
-    }
-
-    Elf_Sym_Iter operator+(difference_type Dist) {
-      assert(Current.getPointer() &&
-             "Attempted to increment an invalid iterator!");
-      Current.setPointer(Current.getPointer() + EntitySize * Dist);
-      return *this;
-    }
-
-    difference_type operator-(const Elf_Sym_Iter &Other) const {
-      assert(EntitySize == Other.EntitySize &&
-             "Subtracting iterators of different EntitySize!");
-      return (Current.getPointer() - Other.Current.getPointer()) / EntitySize;
-    }
-
-    const char *get() const { return Current.getPointer(); }
-
-    bool isDynamic() const { return Current.getInt(); }
-
-    uintX_t getEntSize() const { return EntitySize; }
-
-  private:
-    uintX_t EntitySize;
-    PointerIntPair<const char *, 1, bool,
-                   ArchivePointerTypeTraits<const char> > Current;
-  };
+  typedef iterator_range<const Elf_Sym *> Elf_Sym_Range;
 
 private:
   typedef SmallVector<const Elf_Shdr *, 2> Sections_t;
@@ -238,17 +170,19 @@ private:
   }
 
   const Elf_Ehdr *Header;
-  const Elf_Shdr *SectionHeaderTable;
-  const Elf_Shdr *dot_shstrtab_sec; // Section header string table.
-  const Elf_Shdr *dot_strtab_sec;   // Symbol header string table.
-  const Elf_Shdr *dot_symtab_sec;   // Symbol table section.
-
-  const Elf_Shdr *SymbolTableSectionHeaderIndex;
+  const Elf_Shdr *SectionHeaderTable = nullptr;
+  StringRef DotShstrtab;                    // Section header string table.
+  StringRef DotStrtab;                      // Symbol header string table.
+  const Elf_Shdr *dot_symtab_sec = nullptr; // Symbol table section.
+  StringRef DynSymStrTab;                   // Dynnamic symbol string table.
+  const Elf_Shdr *DotDynSymSec = nullptr;   // Dynamic symbol table section.
+
+  const Elf_Shdr *SymbolTableSectionHeaderIndex = nullptr;
   DenseMap<const Elf_Sym *, ELF::Elf64_Word> ExtendedSymbolTable;
 
-  const Elf_Shdr *dot_gnu_version_sec;   // .gnu.version
-  const Elf_Shdr *dot_gnu_version_r_sec; // .gnu.version_r
-  const Elf_Shdr *dot_gnu_version_d_sec; // .gnu.version_d
+  const Elf_Shdr *dot_gnu_version_sec = nullptr;   // .gnu.version
+  const Elf_Shdr *dot_gnu_version_r_sec = nullptr; // .gnu.version_r
+  const Elf_Shdr *dot_gnu_version_d_sec = nullptr; // .gnu.version_d
 
   /// \brief Represents a region described by entries in the .dynamic table.
   struct DynRegionInfo {
@@ -263,12 +197,11 @@ private:
 
   DynRegionInfo DynamicRegion;
   DynRegionInfo DynHashRegion;
-  DynRegionInfo DynStrRegion;
-  DynRegionInfo DynSymRegion;
+  DynRegionInfo DynRelaRegion;
 
   // Pointer to SONAME entry in dynamic string table
   // This is set the first time getLoadName is called.
-  mutable const char *dt_soname;
+  mutable const char *dt_soname = nullptr;
 
   // Records for each version index the corresponding Verdef or Vernaux entry.
   // This is filled the first time LoadVersionMap() is called.
@@ -301,8 +234,11 @@ public:
   const T        *getEntry(uint32_t Section, uint32_t Entry) const;
   template <typename T>
   const T *getEntry(const Elf_Shdr *Section, uint32_t Entry) const;
-  const char     *getString(uint32_t section, uint32_t offset) const;
-  const char     *getString(const Elf_Shdr *section, uint32_t offset) const;
+
+  const Elf_Shdr *getDotSymtabSec() const { return dot_symtab_sec; }
+  const Elf_Shdr *getDotDynSymSec() const { return DotDynSymSec; }
+
+  ErrorOr<StringRef> getStringTable(const Elf_Shdr *Section) const;
   const char *getDynamicString(uintX_t Offset) const;
   ErrorOr<StringRef> getSymbolVersion(const Elf_Shdr *section,
                                       const Elf_Sym *Symb,
@@ -331,55 +267,77 @@ public:
       Header->getDataEncoding() == ELF::ELFDATA2LSB;
   }
 
-  Elf_Shdr_Iter begin_sections() const;
-  Elf_Shdr_Iter end_sections() const;
+  const Elf_Shdr *section_begin() const;
+  const Elf_Shdr *section_end() const;
   Elf_Shdr_Range sections() const {
-    return make_range(begin_sections(), end_sections());
+    return make_range(section_begin(), section_end());
   }
 
-  Elf_Sym_Iter begin_symbols() const;
-  Elf_Sym_Iter end_symbols() const;
+  const Elf_Sym *symbol_begin() const;
+  const Elf_Sym *symbol_end() const;
+  Elf_Sym_Range symbols() const {
+    return make_range(symbol_begin(), symbol_end());
+  }
 
-  Elf_Dyn_Iter begin_dynamic_table() const;
+  Elf_Dyn_Iter dynamic_table_begin() const;
   /// \param NULLEnd use one past the first DT_NULL entry as the end instead of
   /// the section size.
-  Elf_Dyn_Iter end_dynamic_table(bool NULLEnd = false) const;
+  Elf_Dyn_Iter dynamic_table_end(bool NULLEnd = false) const;
   Elf_Dyn_Range dynamic_table(bool NULLEnd = false) const {
-    return make_range(begin_dynamic_table(), end_dynamic_table(NULLEnd));
+    return make_range(dynamic_table_begin(), dynamic_table_end(NULLEnd));
+  }
+
+  const Elf_Sym *dynamic_symbol_begin() const {
+    if (!DotDynSymSec)
+      return nullptr;
+    if (DotDynSymSec->sh_entsize != sizeof(Elf_Sym))
+      report_fatal_error("Invalid symbol size");
+    return reinterpret_cast<const Elf_Sym *>(base() + DotDynSymSec->sh_offset);
+  }
+
+  const Elf_Sym *dynamic_symbol_end() const {
+    if (!DotDynSymSec)
+      return nullptr;
+    return reinterpret_cast<const Elf_Sym *>(base() + DotDynSymSec->sh_offset +
+                                             DotDynSymSec->sh_size);
   }
 
-  Elf_Sym_Iter begin_dynamic_symbols() const {
-    if (DynSymRegion.Addr)
-      return Elf_Sym_Iter(DynSymRegion.EntSize, (const char *)DynSymRegion.Addr,
-                          true);
-    return Elf_Sym_Iter(0, nullptr, true);
+  Elf_Sym_Range dynamic_symbols() const {
+    return make_range(dynamic_symbol_begin(), dynamic_symbol_end());
   }
 
-  Elf_Sym_Iter end_dynamic_symbols() const {
-    if (DynSymRegion.Addr)
-      return Elf_Sym_Iter(DynSymRegion.EntSize,
-                          (const char *)DynSymRegion.Addr + DynSymRegion.Size,
-                          true);
-    return Elf_Sym_Iter(0, nullptr, true);
+  Elf_Rela_Iter dyn_rela_begin() const {
+    if (DynRelaRegion.Addr)
+      return Elf_Rela_Iter(DynRelaRegion.EntSize,
+        (const char *)DynRelaRegion.Addr);
+    return Elf_Rela_Iter(0, nullptr);
   }
 
-  Elf_Rela_Iter begin_rela(const Elf_Shdr *sec) const {
+  Elf_Rela_Iter dyn_rela_end() const {
+    if (DynRelaRegion.Addr)
+      return Elf_Rela_Iter(
+        DynRelaRegion.EntSize,
+        (const char *)DynRelaRegion.Addr + DynRelaRegion.Size);
+    return Elf_Rela_Iter(0, nullptr);
+  }
+
+  Elf_Rela_Iter rela_begin(const Elf_Shdr *sec) const {
     return Elf_Rela_Iter(sec->sh_entsize,
                          (const char *)(base() + sec->sh_offset));
   }
 
-  Elf_Rela_Iter end_rela(const Elf_Shdr *sec) const {
+  Elf_Rela_Iter rela_end(const Elf_Shdr *sec) const {
     return Elf_Rela_Iter(
         sec->sh_entsize,
         (const char *)(base() + sec->sh_offset + sec->sh_size));
   }
 
-  Elf_Rel_Iter begin_rel(const Elf_Shdr *sec) const {
+  Elf_Rel_Iter rel_begin(const Elf_Shdr *sec) const {
     return Elf_Rel_Iter(sec->sh_entsize,
                         (const char *)(base() + sec->sh_offset));
   }
 
-  Elf_Rel_Iter end_rel(const Elf_Shdr *sec) const {
+  Elf_Rel_Iter rel_end(const Elf_Shdr *sec) const {
     return Elf_Rel_Iter(sec->sh_entsize,
                         (const char *)(base() + sec->sh_offset + sec->sh_size));
   }
@@ -387,12 +345,12 @@ public:
   /// \brief Iterate over program header table.
   typedef ELFEntityIterator<const Elf_Phdr> Elf_Phdr_Iter;
 
-  Elf_Phdr_Iter begin_program_headers() const {
+  Elf_Phdr_Iter program_header_begin() const {
     return Elf_Phdr_Iter(Header->e_phentsize,
                          (const char*)base() + Header->e_phoff);
   }
 
-  Elf_Phdr_Iter end_program_headers() const {
+  Elf_Phdr_Iter program_header_end() const {
     return Elf_Phdr_Iter(Header->e_phentsize,
                          (const char*)base() +
                            Header->e_phoff +
@@ -401,24 +359,17 @@ public:
 
   uint64_t getNumSections() const;
   uintX_t getStringTableIndex() const;
-  ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const;
+  ELF::Elf64_Word getExtendedSymbolTableIndex(const Elf_Sym *symb) const;
   const Elf_Ehdr *getHeader() const { return Header; }
-  const Elf_Shdr *getSection(const Elf_Sym *symb) const;
-  const Elf_Shdr *getSection(uint32_t Index) const;
+  ErrorOr<const Elf_Shdr *> getSection(const Elf_Sym *symb) const;
+  ErrorOr<const Elf_Shdr *> getSection(uint32_t Index) const;
   const Elf_Sym *getSymbol(uint32_t index) const;
 
-  ErrorOr<StringRef> getSymbolName(Elf_Sym_Iter Sym) const;
+  ErrorOr<StringRef> getStaticSymbolName(const Elf_Sym *Symb) const;
+  ErrorOr<StringRef> getDynamicSymbolName(const Elf_Sym *Symb) const;
+  ErrorOr<StringRef> getSymbolName(const Elf_Sym *Symb, bool IsDynamic) const;
 
-  /// \brief Get the name of \p Symb.
-  /// \param SymTab The symbol table section \p Symb is contained in.
-  /// \param Symb The symbol to get the name of.
-  ///
-  /// \p SymTab is used to lookup the string table to use to get the symbol's
-  /// name.
-  ErrorOr<StringRef> getSymbolName(const Elf_Shdr *SymTab,
-                                   const Elf_Sym *Symb) const;
   ErrorOr<StringRef> getSectionName(const Elf_Shdr *Section) const;
-  uint64_t getSymbolIndex(const Elf_Sym *sym) const;
   ErrorOr<ArrayRef<uint8_t> > getSectionContents(const Elf_Shdr *Sec) const;
   StringRef getLoadName() const;
 };
@@ -490,7 +441,7 @@ void ELFFile<ELFT>::LoadVersionNeeds(const Elf_Shdr *sec) const {
 template <class ELFT>
 void ELFFile<ELFT>::LoadVersionMap() const {
   // If there is no dynamic symtab or version table, there is nothing to do.
-  if (!DynSymRegion.Addr || !dot_gnu_version_sec)
+  if (!DotDynSymSec || !dot_gnu_version_sec)
     return;
 
   // Has the VersionMap already been loaded?
@@ -510,18 +461,19 @@ void ELFFile<ELFT>::LoadVersionMap() const {
 }
 
 template <class ELFT>
-ELF::Elf64_Word ELFFile<ELFT>::getSymbolTableIndex(const Elf_Sym *symb) const {
-  if (symb->st_shndx == ELF::SHN_XINDEX)
-    return ExtendedSymbolTable.lookup(symb);
-  return symb->st_shndx;
+ELF::Elf64_Word
+ELFFile<ELFT>::getExtendedSymbolTableIndex(const Elf_Sym *symb) const {
+  assert(symb->st_shndx == ELF::SHN_XINDEX);
+  return ExtendedSymbolTable.lookup(symb);
 }
 
 template <class ELFT>
-const typename ELFFile<ELFT>::Elf_Shdr *
+ErrorOr<const typename ELFFile<ELFT>::Elf_Shdr *>
 ELFFile<ELFT>::getSection(const Elf_Sym *symb) const {
-  if (symb->st_shndx == ELF::SHN_XINDEX)
+  uint32_t Index = symb->st_shndx;
+  if (Index == ELF::SHN_XINDEX)
     return getSection(ExtendedSymbolTable.lookup(symb));
-  if (symb->st_shndx >= ELF::SHN_LORESERVE)
+  if (Index == ELF::SHN_UNDEF || Index >= ELF::SHN_LORESERVE)
     return nullptr;
   return getSection(symb->st_shndx);
 }
@@ -529,7 +481,7 @@ ELFFile<ELFT>::getSection(const Elf_Sym *symb) const {
 template <class ELFT>
 const typename ELFFile<ELFT>::Elf_Sym *
 ELFFile<ELFT>::getSymbol(uint32_t Index) const {
-  return &*(begin_symbols() + Index);
+  return &*(symbol_begin() + Index);
 }
 
 template <class ELFT>
@@ -584,20 +536,14 @@ std::pair<const typename ELFFile<ELFT>::Elf_Shdr *,
 ELFFile<ELFT>::getRelocationSymbol(const Elf_Shdr *Sec, const RelT *Rel) const {
   if (!Sec->sh_link)
     return std::make_pair(nullptr, nullptr);
-  const Elf_Shdr *SymTable = getSection(Sec->sh_link);
+  ErrorOr<const Elf_Shdr *> SymTableOrErr = getSection(Sec->sh_link);
+  if (std::error_code EC = SymTableOrErr.getError())
+    report_fatal_error(EC.message());
+  const Elf_Shdr *SymTable = *SymTableOrErr;
   return std::make_pair(
       SymTable, getEntry<Elf_Sym>(SymTable, Rel->getSymbol(isMips64EL())));
 }
 
-// Verify that the last byte in the string table in a null.
-template <class ELFT>
-void ELFFile<ELFT>::VerifyStrTab(const Elf_Shdr *sh) const {
-  const char *strtab = (const char *)base() + sh->sh_offset;
-  if (strtab[sh->sh_size - 1] != 0)
-    // FIXME: Proper error handling.
-    report_fatal_error("String table must end with a null terminator!");
-}
-
 template <class ELFT>
 uint64_t ELFFile<ELFT>::getNumSections() const {
   assert(Header && "Header not initialized!");
@@ -621,11 +567,7 @@ typename ELFFile<ELFT>::uintX_t ELFFile<ELFT>::getStringTableIndex() const {
 
 template <class ELFT>
 ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &EC)
-    : Buf(Object), SectionHeaderTable(nullptr), dot_shstrtab_sec(nullptr),
-      dot_strtab_sec(nullptr), dot_symtab_sec(nullptr),
-      SymbolTableSectionHeaderIndex(nullptr), dot_gnu_version_sec(nullptr),
-      dot_gnu_version_r_sec(nullptr), dot_gnu_version_d_sec(nullptr),
-      dt_soname(nullptr) {
+    : Buf(Object) {
   const uint64_t FileSize = Buf.size();
 
   if (sizeof(Elf_Ehdr) > FileSize) {
@@ -670,28 +612,35 @@ ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &EC)
       }
       SymbolTableSectionHeaderIndex = &Sec;
       break;
-    case ELF::SHT_SYMTAB:
+    case ELF::SHT_SYMTAB: {
       if (dot_symtab_sec) {
         // More than one .symtab!
         EC = object_error::parse_failed;
         return;
       }
       dot_symtab_sec = &Sec;
-      dot_strtab_sec = getSection(Sec.sh_link);
-      break;
+      ErrorOr<const Elf_Shdr *> SectionOrErr = getSection(Sec.sh_link);
+      if ((EC = SectionOrErr.getError()))
+        return;
+      ErrorOr<StringRef> SymtabOrErr = getStringTable(*SectionOrErr);
+      if ((EC = SymtabOrErr.getError()))
+        return;
+      DotStrtab = *SymtabOrErr;
+    } break;
     case ELF::SHT_DYNSYM: {
-      if (DynSymRegion.Addr) {
+      if (DotDynSymSec) {
         // More than one .dynsym!
         EC = object_error::parse_failed;
         return;
       }
-      DynSymRegion.Addr = base() + Sec.sh_offset;
-      DynSymRegion.Size = Sec.sh_size;
-      DynSymRegion.EntSize = Sec.sh_entsize;
-      const Elf_Shdr *DynStr = getSection(Sec.sh_link);
-      DynStrRegion.Addr = base() + DynStr->sh_offset;
-      DynStrRegion.Size = DynStr->sh_size;
-      DynStrRegion.EntSize = DynStr->sh_entsize;
+      DotDynSymSec = &Sec;
+      ErrorOr<const Elf_Shdr *> SectionOrErr = getSection(Sec.sh_link);
+      if ((EC = SectionOrErr.getError()))
+        return;
+      ErrorOr<StringRef> SymtabOrErr = getStringTable(*SectionOrErr);
+      if ((EC = SymtabOrErr.getError()))
+        return;
+      DynSymStrTab = *SymtabOrErr;
       break;
     }
     case ELF::SHT_DYNAMIC:
@@ -732,27 +681,29 @@ ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &EC)
   }
 
   // Get string table sections.
-  dot_shstrtab_sec = getSection(getStringTableIndex());
-  if (dot_shstrtab_sec) {
-    // Verify that the last byte in the string table in a null.
-    VerifyStrTab(dot_shstrtab_sec);
-  }
+  ErrorOr<const Elf_Shdr *> StrTabSecOrErr = getSection(getStringTableIndex());
+  if ((EC = StrTabSecOrErr.getError()))
+    return;
+
+  ErrorOr<StringRef> SymtabOrErr = getStringTable(*StrTabSecOrErr);
+  if ((EC = SymtabOrErr.getError()))
+    return;
+  DotShstrtab = *SymtabOrErr;
 
   // Build symbol name side-mapping if there is one.
   if (SymbolTableSectionHeaderIndex) {
     const Elf_Word *ShndxTable = reinterpret_cast<const Elf_Word*>(base() +
                                       SymbolTableSectionHeaderIndex->sh_offset);
-    for (Elf_Sym_Iter SI = begin_symbols(), SE = end_symbols(); SI != SE;
-         ++SI) {
+    for (const Elf_Sym &S : symbols()) {
       if (*ShndxTable != ELF::SHN_UNDEF)
-        ExtendedSymbolTable[&*SI] = *ShndxTable;
+        ExtendedSymbolTable[&S] = *ShndxTable;
       ++ShndxTable;
     }
   }
 
   // Scan program headers.
-  for (Elf_Phdr_Iter PhdrI = begin_program_headers(),
-                     PhdrE = end_program_headers();
+  for (Elf_Phdr_Iter PhdrI = program_header_begin(),
+                     PhdrE = program_header_end();
        PhdrI != PhdrE; ++PhdrI) {
     if (PhdrI->p_type == ELF::PT_DYNAMIC) {
       DynamicRegion.Addr = base() + PhdrI->p_offset;
@@ -762,55 +713,74 @@ ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &EC)
     }
   }
 
-  EC = std::error_code();
-}
+  // Scan dynamic table.
+  for (Elf_Dyn_Iter DynI = dynamic_table_begin(), DynE = dynamic_table_end();
+       DynI != DynE; ++DynI) {
+    switch (DynI->d_tag) {
+    case ELF::DT_RELA: {
+      uint64_t VBase = 0;
+      const uint8_t *FBase = nullptr;
+      for (Elf_Phdr_Iter PhdrI = program_header_begin(),
+                         PhdrE = program_header_end();
+           PhdrI != PhdrE; ++PhdrI) {
+        if (PhdrI->p_type != ELF::PT_LOAD)
+          continue;
+        if (DynI->getPtr() >= PhdrI->p_vaddr &&
+            DynI->getPtr() < PhdrI->p_vaddr + PhdrI->p_memsz) {
+          VBase = PhdrI->p_vaddr;
+          FBase = base() + PhdrI->p_offset;
+          break;
+        }
+      }
+      if (!VBase)
+        return;
+      DynRelaRegion.Addr = FBase + DynI->getPtr() - VBase;
+      break;
+    }
+    case ELF::DT_RELASZ:
+      DynRelaRegion.Size = DynI->getVal();
+      break;
+    case ELF::DT_RELAENT:
+      DynRelaRegion.EntSize = DynI->getVal();
+    }
+  }
 
-// Get the symbol table index in the symtab section given a symbol
-template <class ELFT>
-uint64_t ELFFile<ELFT>::getSymbolIndex(const Elf_Sym *Sym) const {
-  uintptr_t SymLoc = uintptr_t(Sym);
-  uintptr_t SymTabLoc = uintptr_t(base() + dot_symtab_sec->sh_offset);
-  assert(SymLoc > SymTabLoc && "Symbol not in symbol table!");
-  uint64_t SymOffset = SymLoc - SymTabLoc;
-  assert(SymOffset % dot_symtab_sec->sh_entsize == 0 &&
-         "Symbol not multiple of symbol size!");
-  return SymOffset / dot_symtab_sec->sh_entsize;
+  EC = std::error_code();
 }
 
 template <class ELFT>
-typename ELFFile<ELFT>::Elf_Shdr_Iter ELFFile<ELFT>::begin_sections() const {
-  return Elf_Shdr_Iter(Header->e_shentsize,
-                       (const char *)base() + Header->e_shoff);
+const typename ELFFile<ELFT>::Elf_Shdr *ELFFile<ELFT>::section_begin() const {
+  if (Header->e_shentsize != sizeof(Elf_Shdr))
+    report_fatal_error(
+        "Invalid section header entry size (e_shentsize) in ELF header");
+  return reinterpret_cast<const Elf_Shdr *>(base() + Header->e_shoff);
 }
 
 template <class ELFT>
-typename ELFFile<ELFT>::Elf_Shdr_Iter ELFFile<ELFT>::end_sections() const {
-  return Elf_Shdr_Iter(Header->e_shentsize,
-                       (const char *)base() + Header->e_shoff +
-                           (getNumSections() * Header->e_shentsize));
+const typename ELFFile<ELFT>::Elf_Shdr *ELFFile<ELFT>::section_end() const {
+  return section_begin() + getNumSections();
 }
 
 template <class ELFT>
-typename ELFFile<ELFT>::Elf_Sym_Iter ELFFile<ELFT>::begin_symbols() const {
+const typename ELFFile<ELFT>::Elf_Sym *ELFFile<ELFT>::symbol_begin() const {
   if (!dot_symtab_sec)
-    return Elf_Sym_Iter(0, nullptr, false);
-  return Elf_Sym_Iter(dot_symtab_sec->sh_entsize,
-                      (const char *)base() + dot_symtab_sec->sh_offset, false);
+    return nullptr;
+  if (dot_symtab_sec->sh_entsize != sizeof(Elf_Sym))
+    report_fatal_error("Invalid symbol size");
+  return reinterpret_cast<const Elf_Sym *>(base() + dot_symtab_sec->sh_offset);
 }
 
 template <class ELFT>
-typename ELFFile<ELFT>::Elf_Sym_Iter ELFFile<ELFT>::end_symbols() const {
+const typename ELFFile<ELFT>::Elf_Sym *ELFFile<ELFT>::symbol_end() const {
   if (!dot_symtab_sec)
-    return Elf_Sym_Iter(0, nullptr, false);
-  return Elf_Sym_Iter(dot_symtab_sec->sh_entsize,
-                      (const char *)base() + dot_symtab_sec->sh_offset +
-                          dot_symtab_sec->sh_size,
-                      false);
+    return nullptr;
+  return reinterpret_cast<const Elf_Sym *>(base() + dot_symtab_sec->sh_offset +
+                                           dot_symtab_sec->sh_size);
 }
 
 template <class ELFT>
 typename ELFFile<ELFT>::Elf_Dyn_Iter
-ELFFile<ELFT>::begin_dynamic_table() const {
+ELFFile<ELFT>::dynamic_table_begin() const {
   if (DynamicRegion.Addr)
     return Elf_Dyn_Iter(DynamicRegion.EntSize,
                         (const char *)DynamicRegion.Addr);
@@ -819,14 +789,14 @@ ELFFile<ELFT>::begin_dynamic_table() const {
 
 template <class ELFT>
 typename ELFFile<ELFT>::Elf_Dyn_Iter
-ELFFile<ELFT>::end_dynamic_table(bool NULLEnd) const {
+ELFFile<ELFT>::dynamic_table_end(bool NULLEnd) const {
   if (!DynamicRegion.Addr)
     return Elf_Dyn_Iter(0, nullptr);
   Elf_Dyn_Iter Ret(DynamicRegion.EntSize,
                     (const char *)DynamicRegion.Addr + DynamicRegion.Size);
 
   if (NULLEnd) {
-    Elf_Dyn_Iter Start = begin_dynamic_table();
+    Elf_Dyn_Iter Start = dynamic_table_begin();
     while (Start != Ret && Start->getTag() != ELF::DT_NULL)
       ++Start;
 
@@ -855,7 +825,10 @@ StringRef ELFFile<ELFT>::getLoadName() const {
 template <class ELFT>
 template <typename T>
 const T *ELFFile<ELFT>::getEntry(uint32_t Section, uint32_t Entry) const {
-  return getEntry<T>(getSection(Section), Entry);
+  ErrorOr<const Elf_Shdr *> Sec = getSection(Section);
+  if (std::error_code EC = Sec.getError())
+    report_fatal_error(EC.message());
+  return getEntry<T>(*Sec, Entry);
 }
 
 template <class ELFT>
@@ -867,82 +840,85 @@ const T *ELFFile<ELFT>::getEntry(const Elf_Shdr *Section,
 }
 
 template <class ELFT>
-const typename ELFFile<ELFT>::Elf_Shdr *
-ELFFile<ELFT>::getSection(uint32_t index) const {
-  if (index == 0)
-    return nullptr;
-  if (!SectionHeaderTable || index >= getNumSections())
-    // FIXME: Proper error handling.
-    report_fatal_error("Invalid section index!");
+ErrorOr<const typename ELFFile<ELFT>::Elf_Shdr *>
+ELFFile<ELFT>::getSection(uint32_t Index) const {
+  assert(SectionHeaderTable && "SectionHeaderTable not initialized!");
+  if (Index >= getNumSections())
+    return object_error::invalid_section_index;
 
   return reinterpret_cast<const Elf_Shdr *>(
-         reinterpret_cast<const char *>(SectionHeaderTable)
-         + (index * Header->e_shentsize));
+      reinterpret_cast<const char *>(SectionHeaderTable) +
+      (Index * Header->e_shentsize));
 }
 
 template <class ELFT>
-const char *ELFFile<ELFT>::getString(uint32_t section,
-                                     ELF::Elf32_Word offset) const {
-  return getString(getSection(section), offset);
-}
-
-template <class ELFT>
-const char *ELFFile<ELFT>::getString(const Elf_Shdr *section,
-                                     ELF::Elf32_Word offset) const {
-  assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!");
-  if (offset >= section->sh_size)
-    // FIXME: Proper error handling.
-    report_fatal_error("Symbol name offset outside of string table!");
-  return (const char *)base() + section->sh_offset + offset;
+ErrorOr<StringRef>
+ELFFile<ELFT>::getStringTable(const Elf_Shdr *Section) const {
+  if (Section->sh_type != ELF::SHT_STRTAB)
+    return object_error::parse_failed;
+  uint64_t Offset = Section->sh_offset;
+  uint64_t Size = Section->sh_size;
+  if (Offset + Size > Buf.size())
+    return object_error::parse_failed;
+  StringRef Data((const char *)base() + Section->sh_offset, Size);
+  if (Data[Size - 1] != '\0')
+    return object_error::string_table_non_null_end;
+  return Data;
 }
 
 template <class ELFT>
 const char *ELFFile<ELFT>::getDynamicString(uintX_t Offset) const {
-  if (!DynStrRegion.Addr || Offset >= DynStrRegion.Size)
+  if (!DotDynSymSec || Offset >= DynSymStrTab.size())
     return nullptr;
-  return (const char *)DynStrRegion.Addr + Offset;
+  return (const char *)DynSymStrTab.begin() + Offset;
 }
 
 template <class ELFT>
-ErrorOr<StringRef> ELFFile<ELFT>::getSymbolName(Elf_Sym_Iter Sym) const {
-  if (!Sym.isDynamic())
-    return getSymbolName(dot_symtab_sec, &*Sym);
-
-  if (!DynStrRegion.Addr || Sym->st_name >= DynStrRegion.Size)
-    return object_error::parse_failed;
-  return StringRef(getDynamicString(Sym->st_name));
+ErrorOr<StringRef>
+ELFFile<ELFT>::getStaticSymbolName(const Elf_Sym *Symb) const {
+  return Symb->getName(DotStrtab);
 }
 
 template <class ELFT>
-ErrorOr<StringRef> ELFFile<ELFT>::getSymbolName(const Elf_Shdr *Section,
-                                                const Elf_Sym *Symb) const {
-  if (Symb->st_name == 0)
-    return StringRef("");
+ErrorOr<StringRef>
+ELFFile<ELFT>::getDynamicSymbolName(const Elf_Sym *Symb) const {
+  return StringRef(getDynamicString(Symb->st_name));
+}
 
-  const Elf_Shdr *StrTab = getSection(Section->sh_link);
-  if (Symb->st_name >= StrTab->sh_size)
-    return object_error::parse_failed;
-  return StringRef(getString(StrTab, Symb->st_name));
+template <class ELFT>
+ErrorOr<StringRef> ELFFile<ELFT>::getSymbolName(const Elf_Sym *Symb,
+                                                bool IsDynamic) const {
+  if (IsDynamic)
+    return getDynamicSymbolName(Symb);
+  return getStaticSymbolName(Symb);
 }
 
 template <class ELFT>
 ErrorOr<StringRef>
 ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section) const {
-  if (Section->sh_name >= dot_shstrtab_sec->sh_size)
+  uint32_t Offset = Section->sh_name;
+  if (Offset >= DotShstrtab.size())
     return object_error::parse_failed;
-  return StringRef(getString(dot_shstrtab_sec, Section->sh_name));
+  return StringRef(DotShstrtab.data() + Offset);
 }
 
 template <class ELFT>
 ErrorOr<StringRef> ELFFile<ELFT>::getSymbolVersion(const Elf_Shdr *section,
                                                    const Elf_Sym *symb,
                                                    bool &IsDefault) const {
+  StringRef StrTab;
+  if (section) {
+    ErrorOr<StringRef> StrTabOrErr = getStringTable(section);
+    if (std::error_code EC = StrTabOrErr.getError())
+      return EC;
+    StrTab = *StrTabOrErr;
+  }
   // Handle non-dynamic symbols.
-  if (section != DynSymRegion.Addr && section != nullptr) {
+  if (section != DotDynSymSec && section != nullptr) {
     // Non-dynamic symbols can have versions in their names
     // A name of the form 'foo@V1' indicates version 'V1', non-default.
     // A name of the form 'foo@@V2' indicates version 'V2', default version.
-    ErrorOr<StringRef> SymName = getSymbolName(section, symb);
+    ErrorOr<StringRef> SymName = symb->getName(StrTab);
     if (!SymName)
       return SymName;
     StringRef Name = *SymName;
@@ -969,8 +945,10 @@ ErrorOr<StringRef> ELFFile<ELFT>::getSymbolVersion(const Elf_Shdr *section,
   }
 
   // Determine the position in the symbol table of this entry.
-  size_t entry_index = ((const char *)symb - (const char *)DynSymRegion.Addr) /
-                       DynSymRegion.EntSize;
+  size_t entry_index =
+      (reinterpret_cast<uintptr_t>(symb) - DotDynSymSec->sh_offset -
+       reinterpret_cast<uintptr_t>(base())) /
+      sizeof(Elf_Sym);
 
   // Get the corresponding version index entry
   const Elf_Versym *vs = getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index);
@@ -1005,7 +983,7 @@ ErrorOr<StringRef> ELFFile<ELFT>::getSymbolVersion(const Elf_Shdr *section,
     IsDefault = false;
   }
 
-  if (name_offset >= DynStrRegion.Size)
+  if (name_offset >= DynSymStrTab.size())
     return object_error::parse_failed;
   return StringRef(getDynamicString(name_offset));
 }
diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h
index 7fc56ad..5b9b113 100644
--- a/include/llvm/Object/ELFObjectFile.h
+++ b/include/llvm/Object/ELFObjectFile.h
@@ -35,27 +35,148 @@
 namespace llvm {
 namespace object {
 
+class elf_symbol_iterator;
+class ELFSymbolRef;
+class ELFRelocationRef;
+
 class ELFObjectFileBase : public ObjectFile {
+  friend class ELFSymbolRef;
+  friend class ELFSectionRef;
+  friend class ELFRelocationRef;
+
 protected:
   ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source);
 
-public:
-  virtual ErrorOr<int64_t> getRelocationAddend(DataRefImpl Rel) const = 0;
+  virtual uint64_t getSymbolSize(DataRefImpl Symb) const = 0;
+  virtual uint8_t getSymbolOther(DataRefImpl Symb) const = 0;
+  virtual uint8_t getSymbolELFType(DataRefImpl Symb) const = 0;
 
-  // FIXME: This is a bit of a hack. Every caller should know if it expecting
-  // and addend or not.
-  virtual bool hasRelocationAddend(DataRefImpl Rel) const = 0;
+  virtual uint32_t getSectionType(DataRefImpl Sec) const = 0;
+  virtual uint64_t getSectionFlags(DataRefImpl Sec) const = 0;
 
-  virtual std::pair<symbol_iterator, symbol_iterator>
-  getELFDynamicSymbolIterators() const = 0;
+  virtual ErrorOr<int64_t> getRelocationAddend(DataRefImpl Rel) const = 0;
+public:
 
-  virtual uint64_t getSectionFlags(SectionRef Sec) const = 0;
-  virtual uint32_t getSectionType(SectionRef Sec) const = 0;
+  typedef iterator_range<elf_symbol_iterator> elf_symbol_iterator_range;
+  virtual elf_symbol_iterator_range getDynamicSymbolIterators() const = 0;
+
+  elf_symbol_iterator_range symbols() const;
 
   static inline bool classof(const Binary *v) { return v->isELF(); }
 };
 
+class ELFSectionRef : public SectionRef {
+public:
+  ELFSectionRef(const SectionRef &B) : SectionRef(B) {
+    assert(isa<ELFObjectFileBase>(SectionRef::getObject()));
+  }
+
+  const ELFObjectFileBase *getObject() const {
+    return cast<ELFObjectFileBase>(SectionRef::getObject());
+  }
+
+  uint32_t getType() const {
+    return getObject()->getSectionType(getRawDataRefImpl());
+  }
+
+  uint64_t getFlags() const {
+    return getObject()->getSectionFlags(getRawDataRefImpl());
+  }
+};
+
+class elf_section_iterator : public section_iterator {
+public:
+  elf_section_iterator(const section_iterator &B) : section_iterator(B) {
+    assert(isa<ELFObjectFileBase>(B->getObject()));
+  }
+
+  const ELFSectionRef *operator->() const {
+    return static_cast<const ELFSectionRef *>(section_iterator::operator->());
+  }
+
+  const ELFSectionRef &operator*() const {
+    return static_cast<const ELFSectionRef &>(section_iterator::operator*());
+  }
+};
+
+class ELFSymbolRef : public SymbolRef {
+public:
+  ELFSymbolRef(const SymbolRef &B) : SymbolRef(B) {
+    assert(isa<ELFObjectFileBase>(SymbolRef::getObject()));
+  }
+
+  const ELFObjectFileBase *getObject() const {
+    return cast<ELFObjectFileBase>(BasicSymbolRef::getObject());
+  }
+
+  uint64_t getSize() const {
+    return getObject()->getSymbolSize(getRawDataRefImpl());
+  }
+
+  uint8_t getOther() const {
+    return getObject()->getSymbolOther(getRawDataRefImpl());
+  }
+
+  uint8_t getELFType() const {
+    return getObject()->getSymbolELFType(getRawDataRefImpl());
+  }
+};
+
+class elf_symbol_iterator : public symbol_iterator {
+public:
+  elf_symbol_iterator(const basic_symbol_iterator &B)
+      : symbol_iterator(SymbolRef(B->getRawDataRefImpl(),
+                                  cast<ELFObjectFileBase>(B->getObject()))) {}
+
+  const ELFSymbolRef *operator->() const {
+    return static_cast<const ELFSymbolRef *>(symbol_iterator::operator->());
+  }
+
+  const ELFSymbolRef &operator*() const {
+    return static_cast<const ELFSymbolRef &>(symbol_iterator::operator*());
+  }
+};
+
+class ELFRelocationRef : public RelocationRef {
+public:
+  ELFRelocationRef(const RelocationRef &B) : RelocationRef(B) {
+    assert(isa<ELFObjectFileBase>(RelocationRef::getObject()));
+  }
+
+  const ELFObjectFileBase *getObject() const {
+    return cast<ELFObjectFileBase>(RelocationRef::getObject());
+  }
+
+  ErrorOr<int64_t> getAddend() const {
+    return getObject()->getRelocationAddend(getRawDataRefImpl());
+  }
+};
+
+class elf_relocation_iterator : public relocation_iterator {
+public:
+  elf_relocation_iterator(const relocation_iterator &B)
+      : relocation_iterator(RelocationRef(
+            B->getRawDataRefImpl(), cast<ELFObjectFileBase>(B->getObject()))) {}
+
+  const ELFRelocationRef *operator->() const {
+    return static_cast<const ELFRelocationRef *>(
+        relocation_iterator::operator->());
+  }
+
+  const ELFRelocationRef &operator*() const {
+    return static_cast<const ELFRelocationRef &>(
+        relocation_iterator::operator*());
+  }
+};
+
+inline ELFObjectFileBase::elf_symbol_iterator_range
+ELFObjectFileBase::symbols() const {
+  return elf_symbol_iterator_range(symbol_begin(), symbol_end());
+}
+
 template <class ELFT> class ELFObjectFile : public ELFObjectFileBase {
+  uint64_t getSymbolSize(DataRefImpl Sym) const override;
+
 public:
   LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
 
@@ -68,24 +189,22 @@ public:
   typedef typename ELFFile<ELFT>::Elf_Rela Elf_Rela;
   typedef typename ELFFile<ELFT>::Elf_Dyn Elf_Dyn;
 
-  typedef typename ELFFile<ELFT>::Elf_Sym_Iter Elf_Sym_Iter;
-  typedef typename ELFFile<ELFT>::Elf_Shdr_Iter Elf_Shdr_Iter;
   typedef typename ELFFile<ELFT>::Elf_Dyn_Iter Elf_Dyn_Iter;
 
 protected:
   ELFFile<ELFT> EF;
 
   void moveSymbolNext(DataRefImpl &Symb) const override;
-  std::error_code getSymbolName(DataRefImpl Symb,
-                                StringRef &Res) const override;
+  ErrorOr<StringRef> getSymbolName(DataRefImpl Symb) const override;
   std::error_code getSymbolAddress(DataRefImpl Symb,
                                    uint64_t &Res) const override;
+  uint64_t getSymbolValue(DataRefImpl Symb) const override;
   uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
-  uint64_t getSymbolSize(DataRefImpl Symb) const override;
+  uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
   uint32_t getSymbolFlags(DataRefImpl Symb) const override;
-  std::error_code getSymbolOther(DataRefImpl Symb, uint8_t &Res) const override;
-  std::error_code getSymbolType(DataRefImpl Symb,
-                                SymbolRef::Type &Res) const override;
+  uint8_t getSymbolOther(DataRefImpl Symb) const override;
+  uint8_t getSymbolELFType(DataRefImpl Symb) const override;
+  SymbolRef::Type getSymbolType(DataRefImpl Symb) const override;
   section_iterator getSymbolSection(const Elf_Sym *Symb) const;
   std::error_code getSymbolSection(DataRefImpl Symb,
                                    section_iterator &Res) const override;
@@ -102,62 +221,55 @@ protected:
   bool isSectionData(DataRefImpl Sec) const override;
   bool isSectionBSS(DataRefImpl Sec) const override;
   bool isSectionVirtual(DataRefImpl Sec) const override;
-  bool sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb) const override;
   relocation_iterator section_rel_begin(DataRefImpl Sec) const override;
   relocation_iterator section_rel_end(DataRefImpl Sec) const override;
   section_iterator getRelocatedSection(DataRefImpl Sec) const override;
 
   void moveRelocationNext(DataRefImpl &Rel) const override;
-  std::error_code getRelocationAddress(DataRefImpl Rel,
-                                       uint64_t &Res) const override;
-  std::error_code getRelocationOffset(DataRefImpl Rel,
-                                      uint64_t &Res) const override;
+  ErrorOr<uint64_t> getRelocationAddress(DataRefImpl Rel) const override;
+  uint64_t getRelocationOffset(DataRefImpl Rel) const override;
   symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override;
-  std::error_code getRelocationType(DataRefImpl Rel,
-                                    uint64_t &Res) const override;
-  std::error_code
-  getRelocationTypeName(DataRefImpl Rel,
-                        SmallVectorImpl<char> &Result) const override;
+  uint64_t getRelocationType(DataRefImpl Rel) const override;
+  void getRelocationTypeName(DataRefImpl Rel,
+                             SmallVectorImpl<char> &Result) const override;
 
+  uint32_t getSectionType(DataRefImpl Sec) const override;
+  uint64_t getSectionFlags(DataRefImpl Sec) const override;
   uint64_t getROffset(DataRefImpl Rel) const;
   StringRef getRelocationTypeName(uint32_t Type) const;
 
   /// \brief Get the relocation section that contains \a Rel.
   const Elf_Shdr *getRelSection(DataRefImpl Rel) const {
-    return EF.getSection(Rel.d.a);
+    return *EF.getSection(Rel.d.a);
   }
 
-  const Elf_Rel *getRel(DataRefImpl Rel) const;
-  const Elf_Rela *getRela(DataRefImpl Rela) const;
-
-  Elf_Sym_Iter toELFSymIter(DataRefImpl Symb) const {
-    bool IsDynamic = Symb.p & 1;
-    if (IsDynamic)
-      return Elf_Sym_Iter(
-          EF.begin_dynamic_symbols().getEntSize(),
-          reinterpret_cast<const char *>(Symb.p & ~uintptr_t(1)), IsDynamic);
-    return Elf_Sym_Iter(EF.begin_symbols().getEntSize(),
-                        reinterpret_cast<const char *>(Symb.p), IsDynamic);
+  const Elf_Sym *toELFSymIter(DataRefImpl Sym) const {
+    return EF.template getEntry<Elf_Sym>(Sym.d.a, Sym.d.b);
   }
 
-  DataRefImpl toDRI(Elf_Sym_Iter Symb) const {
+  DataRefImpl toDRI(const Elf_Shdr *SymTable, unsigned SymbolNum) const {
     DataRefImpl DRI;
-    DRI.p = reinterpret_cast<uintptr_t>(Symb.get()) |
-      static_cast<uintptr_t>(Symb.isDynamic());
-    return DRI;
-  }
+    if (!SymTable) {
+      DRI.d.a = 0;
+      DRI.d.b = 0;
+      return DRI;
+    }
+    assert(SymTable->sh_type == ELF::SHT_SYMTAB ||
+           SymTable->sh_type == ELF::SHT_DYNSYM);
 
-  Elf_Shdr_Iter toELFShdrIter(DataRefImpl Sec) const {
-    return Elf_Shdr_Iter(EF.getHeader()->e_shentsize,
-                         reinterpret_cast<const char *>(Sec.p));
-  }
+    uintptr_t SHT = reinterpret_cast<uintptr_t>(EF.section_begin());
+    unsigned SymTableIndex =
+        (reinterpret_cast<uintptr_t>(SymTable) - SHT) / sizeof(Elf_Shdr);
 
-  DataRefImpl toDRI(Elf_Shdr_Iter Sec) const {
-    DataRefImpl DRI;
-    DRI.p = reinterpret_cast<uintptr_t>(Sec.get());
+    DRI.d.a = SymTableIndex;
+    DRI.d.b = SymbolNum;
     return DRI;
   }
 
+  const Elf_Shdr *toELFShdrIter(DataRefImpl Sec) const {
+    return reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  }
+
   DataRefImpl toDRI(const Elf_Shdr *Sec) const {
     DataRefImpl DRI;
     DRI.p = reinterpret_cast<uintptr_t>(Sec);
@@ -197,22 +309,21 @@ protected:
 public:
   ELFObjectFile(MemoryBufferRef Object, std::error_code &EC);
 
+  const Elf_Rel *getRel(DataRefImpl Rel) const;
+  const Elf_Rela *getRela(DataRefImpl Rela) const;
+
   const Elf_Sym *getSymbol(DataRefImpl Symb) const;
 
   basic_symbol_iterator symbol_begin_impl() const override;
   basic_symbol_iterator symbol_end_impl() const override;
 
-  symbol_iterator dynamic_symbol_begin() const;
-  symbol_iterator dynamic_symbol_end() const;
+  elf_symbol_iterator dynamic_symbol_begin() const;
+  elf_symbol_iterator dynamic_symbol_end() const;
 
   section_iterator section_begin() const override;
   section_iterator section_end() const override;
 
   ErrorOr<int64_t> getRelocationAddend(DataRefImpl Rel) const override;
-  bool hasRelocationAddend(DataRefImpl Rel) const override;
-
-  uint64_t getSectionFlags(SectionRef Sec) const override;
-  uint32_t getSectionType(SectionRef Sec) const override;
 
   uint8_t getBytesInAddress() const override;
   StringRef getFileFormatName() const override;
@@ -232,8 +343,7 @@ public:
                                       ELFT::Is64Bits);
   }
 
-  std::pair<symbol_iterator, symbol_iterator>
-  getELFDynamicSymbolIterators() const override;
+  elf_symbol_iterator_range getDynamicSymbolIterators() const override;
 
   bool isRelocatableObject() const override;
 };
@@ -244,59 +354,71 @@ typedef ELFObjectFile<ELFType<support::big, false>> ELF32BEObjectFile;
 typedef ELFObjectFile<ELFType<support::big, true>> ELF64BEObjectFile;
 
 template <class ELFT>
-void ELFObjectFile<ELFT>::moveSymbolNext(DataRefImpl &Symb) const {
-  Symb = toDRI(++toELFSymIter(Symb));
+void ELFObjectFile<ELFT>::moveSymbolNext(DataRefImpl &Sym) const {
+  ++Sym.d.b;
 }
 
 template <class ELFT>
-std::error_code ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Symb,
-                                                   StringRef &Result) const {
-  ErrorOr<StringRef> Name = EF.getSymbolName(toELFSymIter(Symb));
-  if (!Name)
-    return Name.getError();
-  Result = *Name;
-  return std::error_code();
+ErrorOr<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const {
+  const Elf_Sym *ESym = toELFSymIter(Sym);
+  const Elf_Shdr *SymTableSec = *EF.getSection(Sym.d.a);
+  const Elf_Shdr *StringTableSec = *EF.getSection(SymTableSec->sh_link);
+  StringRef SymTable = *EF.getStringTable(StringTableSec);
+  return ESym->getName(SymTable);
 }
 
 template <class ELFT>
-uint64_t ELFObjectFile<ELFT>::getSectionFlags(SectionRef Sec) const {
-  DataRefImpl DRI = Sec.getRawDataRefImpl();
-  return toELFShdrIter(DRI)->sh_flags;
+uint64_t ELFObjectFile<ELFT>::getSectionFlags(DataRefImpl Sec) const {
+  return toELFShdrIter(Sec)->sh_flags;
 }
 
 template <class ELFT>
-uint32_t ELFObjectFile<ELFT>::getSectionType(SectionRef Sec) const {
-  DataRefImpl DRI = Sec.getRawDataRefImpl();
-  return toELFShdrIter(DRI)->sh_type;
+uint32_t ELFObjectFile<ELFT>::getSectionType(DataRefImpl Sec) const {
+  return toELFShdrIter(Sec)->sh_type;
 }
 
 template <class ELFT>
-std::error_code ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb,
-                                                      uint64_t &Result) const {
+uint64_t ELFObjectFile<ELFT>::getSymbolValue(DataRefImpl Symb) const {
   const Elf_Sym *ESym = getSymbol(Symb);
-  switch (EF.getSymbolTableIndex(ESym)) {
+  switch (ESym->st_shndx) {
   case ELF::SHN_COMMON:
   case ELF::SHN_UNDEF:
-    Result = UnknownAddressOrSize;
-    return std::error_code();
+    return UnknownAddress;
   case ELF::SHN_ABS:
-    Result = ESym->st_value;
-    return std::error_code();
-  default:
-    break;
+    return ESym->st_value;
   }
 
   const Elf_Ehdr *Header = EF.getHeader();
-  Result = ESym->st_value;
+  uint64_t Ret = ESym->st_value;
 
   // Clear the ARM/Thumb or microMIPS indicator flag.
   if ((Header->e_machine == ELF::EM_ARM || Header->e_machine == ELF::EM_MIPS) &&
       ESym->getType() == ELF::STT_FUNC)
-    Result &= ~1;
+    Ret &= ~1;
+
+  return Ret;
+}
+
+template <class ELFT>
+std::error_code ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb,
+                                                      uint64_t &Result) const {
+  Result = getSymbolValue(Symb);
+  const Elf_Sym *ESym = getSymbol(Symb);
+  switch (ESym->st_shndx) {
+  case ELF::SHN_COMMON:
+  case ELF::SHN_UNDEF:
+  case ELF::SHN_ABS:
+    return std::error_code();
+  }
+
+  const Elf_Ehdr *Header = EF.getHeader();
 
   if (Header->e_type == ELF::ET_REL) {
-    const typename ELFFile<ELFT>::Elf_Shdr * Section = EF.getSection(ESym);
-    if (Section != nullptr)
+    ErrorOr<const Elf_Shdr *> SectionOrErr = EF.getSection(ESym);
+    if (std::error_code EC = SectionOrErr.getError())
+      return EC;
+    const Elf_Shdr *Section = *SectionOrErr;
+    if (Section)
       Result += Section->sh_addr;
   }
 
@@ -305,59 +427,57 @@ std::error_code ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb,
 
 template <class ELFT>
 uint32_t ELFObjectFile<ELFT>::getSymbolAlignment(DataRefImpl Symb) const {
-  Elf_Sym_Iter Sym = toELFSymIter(Symb);
+  const Elf_Sym *Sym = toELFSymIter(Symb);
   if (Sym->st_shndx == ELF::SHN_COMMON)
     return Sym->st_value;
   return 0;
 }
 
 template <class ELFT>
-uint64_t ELFObjectFile<ELFT>::getSymbolSize(DataRefImpl Symb) const {
+uint64_t ELFObjectFile<ELFT>::getSymbolSize(DataRefImpl Sym) const {
+  return toELFSymIter(Sym)->st_size;
+}
+
+template <class ELFT>
+uint64_t ELFObjectFile<ELFT>::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
   return toELFSymIter(Symb)->st_size;
 }
 
 template <class ELFT>
-std::error_code ELFObjectFile<ELFT>::getSymbolOther(DataRefImpl Symb,
-                                                    uint8_t &Result) const {
-  Result = toELFSymIter(Symb)->st_other;
-  return std::error_code();
+uint8_t ELFObjectFile<ELFT>::getSymbolOther(DataRefImpl Symb) const {
+  return toELFSymIter(Symb)->st_other;
 }
 
 template <class ELFT>
-std::error_code
-ELFObjectFile<ELFT>::getSymbolType(DataRefImpl Symb,
-                                   SymbolRef::Type &Result) const {
+uint8_t ELFObjectFile<ELFT>::getSymbolELFType(DataRefImpl Symb) const {
+  return toELFSymIter(Symb)->getType();
+}
+
+template <class ELFT>
+SymbolRef::Type ELFObjectFile<ELFT>::getSymbolType(DataRefImpl Symb) const {
   const Elf_Sym *ESym = getSymbol(Symb);
 
   switch (ESym->getType()) {
   case ELF::STT_NOTYPE:
-    Result = SymbolRef::ST_Unknown;
-    break;
+    return SymbolRef::ST_Unknown;
   case ELF::STT_SECTION:
-    Result = SymbolRef::ST_Debug;
-    break;
+    return SymbolRef::ST_Debug;
   case ELF::STT_FILE:
-    Result = SymbolRef::ST_File;
-    break;
+    return SymbolRef::ST_File;
   case ELF::STT_FUNC:
-    Result = SymbolRef::ST_Function;
-    break;
+    return SymbolRef::ST_Function;
   case ELF::STT_OBJECT:
   case ELF::STT_COMMON:
   case ELF::STT_TLS:
-    Result = SymbolRef::ST_Data;
-    break;
+    return SymbolRef::ST_Data;
   default:
-    Result = SymbolRef::ST_Other;
-    break;
+    return SymbolRef::ST_Other;
   }
-  return std::error_code();
 }
 
 template <class ELFT>
-uint32_t ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Symb) const {
-  Elf_Sym_Iter EIter = toELFSymIter(Symb);
-  const Elf_Sym *ESym = &*EIter;
+uint32_t ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
+  const Elf_Sym *ESym = toELFSymIter(Sym);
 
   uint32_t Result = SymbolRef::SF_None;
 
@@ -371,14 +491,22 @@ uint32_t ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Symb) const {
     Result |= SymbolRef::SF_Absolute;
 
   if (ESym->getType() == ELF::STT_FILE || ESym->getType() == ELF::STT_SECTION ||
-      EIter == EF.begin_symbols() || EIter == EF.begin_dynamic_symbols())
+      ESym == EF.symbol_begin() || ESym == EF.dynamic_symbol_begin())
     Result |= SymbolRef::SF_FormatSpecific;
 
-  if (EF.getSymbolTableIndex(ESym) == ELF::SHN_UNDEF)
+  if (EF.getHeader()->e_machine == ELF::EM_ARM) {
+    if (ErrorOr<StringRef> NameOrErr = getSymbolName(Sym)) {
+      StringRef Name = *NameOrErr;
+      if (Name.startswith("$d") || Name.startswith("$t") ||
+          Name.startswith("$a"))
+        Result |= SymbolRef::SF_FormatSpecific;
+    }
+  }
+
+  if (ESym->st_shndx == ELF::SHN_UNDEF)
     Result |= SymbolRef::SF_Undefined;
 
-  if (ESym->getType() == ELF::STT_COMMON ||
-      EF.getSymbolTableIndex(ESym) == ELF::SHN_COMMON)
+  if (ESym->getType() == ELF::STT_COMMON || ESym->st_shndx == ELF::SHN_COMMON)
     Result |= SymbolRef::SF_Common;
 
   if (isExportedToOtherDSO(ESym))
@@ -393,14 +521,17 @@ uint32_t ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Symb) const {
 template <class ELFT>
 section_iterator
 ELFObjectFile<ELFT>::getSymbolSection(const Elf_Sym *ESym) const {
-  const Elf_Shdr *ESec = EF.getSection(ESym);
+  ErrorOr<const Elf_Shdr *> ESecOrErr = EF.getSection(ESym);
+  if (std::error_code EC = ESecOrErr.getError())
+    report_fatal_error(EC.message());
+
+  const Elf_Shdr *ESec = *ESecOrErr;
   if (!ESec)
     return section_end();
-  else {
-    DataRefImpl Sec;
-    Sec.p = reinterpret_cast<intptr_t>(ESec);
-    return section_iterator(SectionRef(Sec, this));
-  }
+
+  DataRefImpl Sec;
+  Sec.p = reinterpret_cast<intptr_t>(ESec);
+  return section_iterator(SectionRef(Sec, this));
 }
 
 template <class ELFT>
@@ -413,7 +544,8 @@ ELFObjectFile<ELFT>::getSymbolSection(DataRefImpl Symb,
 
 template <class ELFT>
 void ELFObjectFile<ELFT>::moveSectionNext(DataRefImpl &Sec) const {
-  Sec = toDRI(++toELFShdrIter(Sec));
+  const Elf_Shdr *ESec = toELFShdrIter(Sec);
+  Sec = toDRI(++ESec);
 }
 
 template <class ELFT>
@@ -440,7 +572,7 @@ template <class ELFT>
 std::error_code
 ELFObjectFile<ELFT>::getSectionContents(DataRefImpl Sec,
                                         StringRef &Result) const {
-  Elf_Shdr_Iter EShdr = toELFShdrIter(Sec);
+  const Elf_Shdr *EShdr = toELFShdrIter(Sec);
   Result = StringRef((const char *)base() + EShdr->sh_offset, EShdr->sh_size);
   return std::error_code();
 }
@@ -457,14 +589,14 @@ bool ELFObjectFile<ELFT>::isSectionText(DataRefImpl Sec) const {
 
 template <class ELFT>
 bool ELFObjectFile<ELFT>::isSectionData(DataRefImpl Sec) const {
-  Elf_Shdr_Iter EShdr = toELFShdrIter(Sec);
+  const Elf_Shdr *EShdr = toELFShdrIter(Sec);
   return EShdr->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) &&
          EShdr->sh_type == ELF::SHT_PROGBITS;
 }
 
 template <class ELFT>
 bool ELFObjectFile<ELFT>::isSectionBSS(DataRefImpl Sec) const {
-  Elf_Shdr_Iter EShdr = toELFShdrIter(Sec);
+  const Elf_Shdr *EShdr = toELFShdrIter(Sec);
   return EShdr->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) &&
          EShdr->sh_type == ELF::SHT_NOBITS;
 }
@@ -475,38 +607,40 @@ bool ELFObjectFile<ELFT>::isSectionVirtual(DataRefImpl Sec) const {
 }
 
 template <class ELFT>
-bool ELFObjectFile<ELFT>::sectionContainsSymbol(DataRefImpl Sec,
-                                                DataRefImpl Symb) const {
-  Elf_Sym_Iter ESym = toELFSymIter(Symb);
-
-  uintX_t Index = ESym->st_shndx;
-  bool Reserved = Index >= ELF::SHN_LORESERVE && Index <= ELF::SHN_HIRESERVE;
-
-  return !Reserved && (&*toELFShdrIter(Sec) == EF.getSection(ESym->st_shndx));
-}
-
-template <class ELFT>
 relocation_iterator
 ELFObjectFile<ELFT>::section_rel_begin(DataRefImpl Sec) const {
   DataRefImpl RelData;
-  uintptr_t SHT = reinterpret_cast<uintptr_t>(EF.begin_sections().get());
+  uintptr_t SHT = reinterpret_cast<uintptr_t>(EF.section_begin());
   RelData.d.a = (Sec.p - SHT) / EF.getHeader()->e_shentsize;
   RelData.d.b = 0;
+
+  const Elf_Shdr *S = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  if (S->sh_type != ELF::SHT_RELA && S->sh_type != ELF::SHT_REL)
+    return relocation_iterator(RelocationRef(RelData, this));
+
+  const Elf_Shdr *RelSec = getRelSection(RelData);
+  ErrorOr<const Elf_Shdr *> SymSecOrErr = EF.getSection(RelSec->sh_link);
+  if (std::error_code EC = SymSecOrErr.getError())
+    report_fatal_error(EC.message());
+  const Elf_Shdr *SymSec = *SymSecOrErr;
+  uint32_t SymSecType = SymSec->sh_type;
+  if (SymSecType != ELF::SHT_SYMTAB && SymSecType != ELF::SHT_DYNSYM)
+    report_fatal_error("Invalid symbol table section type!");
+  if (SymSecType == ELF::SHT_DYNSYM)
+    RelData.d.b = 1;
+
   return relocation_iterator(RelocationRef(RelData, this));
 }
 
 template <class ELFT>
 relocation_iterator
 ELFObjectFile<ELFT>::section_rel_end(DataRefImpl Sec) const {
-  DataRefImpl RelData;
-  uintptr_t SHT = reinterpret_cast<uintptr_t>(EF.begin_sections().get());
   const Elf_Shdr *S = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  RelData.d.a = (Sec.p - SHT) / EF.getHeader()->e_shentsize;
+  relocation_iterator Begin = section_rel_begin(Sec);
   if (S->sh_type != ELF::SHT_RELA && S->sh_type != ELF::SHT_REL)
-    RelData.d.b = 0;
-  else
-    RelData.d.b = S->sh_size / S->sh_entsize;
-
+    return Begin;
+  DataRefImpl RelData = Begin->getRawDataRefImpl();
+  RelData.d.b += (S->sh_size / S->sh_entsize) << 1;
   return relocation_iterator(RelocationRef(RelData, this));
 }
 
@@ -516,19 +650,21 @@ ELFObjectFile<ELFT>::getRelocatedSection(DataRefImpl Sec) const {
   if (EF.getHeader()->e_type != ELF::ET_REL)
     return section_end();
 
-  Elf_Shdr_Iter EShdr = toELFShdrIter(Sec);
+  const Elf_Shdr *EShdr = toELFShdrIter(Sec);
   uintX_t Type = EShdr->sh_type;
   if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA)
     return section_end();
 
-  const Elf_Shdr *R = EF.getSection(EShdr->sh_info);
-  return section_iterator(SectionRef(toDRI(R), this));
+  ErrorOr<const Elf_Shdr *> R = EF.getSection(EShdr->sh_info);
+  if (std::error_code EC = R.getError())
+    report_fatal_error(EC.message());
+  return section_iterator(SectionRef(toDRI(*R), this));
 }
 
 // Relocations
 template <class ELFT>
 void ELFObjectFile<ELFT>::moveRelocationNext(DataRefImpl &Rel) const {
-  ++Rel.d.b;
+  Rel.d.b += 2;
 }
 
 template <class ELFT>
@@ -536,96 +672,62 @@ symbol_iterator
 ELFObjectFile<ELFT>::getRelocationSymbol(DataRefImpl Rel) const {
   uint32_t symbolIdx;
   const Elf_Shdr *sec = getRelSection(Rel);
-  switch (sec->sh_type) {
-  default:
-    report_fatal_error("Invalid section type in Rel!");
-  case ELF::SHT_REL: {
+  if (sec->sh_type == ELF::SHT_REL)
     symbolIdx = getRel(Rel)->getSymbol(EF.isMips64EL());
-    break;
-  }
-  case ELF::SHT_RELA: {
+  else
     symbolIdx = getRela(Rel)->getSymbol(EF.isMips64EL());
-    break;
-  }
-  }
   if (!symbolIdx)
     return symbol_end();
 
-  const Elf_Shdr *SymSec = EF.getSection(sec->sh_link);
-
+  bool IsDyn = Rel.d.b & 1;
   DataRefImpl SymbolData;
-  switch (SymSec->sh_type) {
-  default:
-    report_fatal_error("Invalid symbol table section type!");
-  case ELF::SHT_SYMTAB:
-    SymbolData = toDRI(EF.begin_symbols() + symbolIdx);
-    break;
-  case ELF::SHT_DYNSYM:
-    SymbolData = toDRI(EF.begin_dynamic_symbols() + symbolIdx);
-    break;
-  }
-
+  if (IsDyn)
+    SymbolData = toDRI(EF.getDotDynSymSec(), symbolIdx);
+  else
+    SymbolData = toDRI(EF.getDotSymtabSec(), symbolIdx);
   return symbol_iterator(SymbolRef(SymbolData, this));
 }
 
 template <class ELFT>
-std::error_code
-ELFObjectFile<ELFT>::getRelocationAddress(DataRefImpl Rel,
-                                          uint64_t &Result) const {
+ErrorOr<uint64_t>
+ELFObjectFile<ELFT>::getRelocationAddress(DataRefImpl Rel) const {
   uint64_t ROffset = getROffset(Rel);
   const Elf_Ehdr *Header = EF.getHeader();
 
   if (Header->e_type == ELF::ET_REL) {
     const Elf_Shdr *RelocationSec = getRelSection(Rel);
-    const Elf_Shdr *RelocatedSec = EF.getSection(RelocationSec->sh_info);
-    Result = ROffset + RelocatedSec->sh_addr;
-  } else {
-    Result = ROffset;
+    ErrorOr<const Elf_Shdr *> RelocatedSec =
+        EF.getSection(RelocationSec->sh_info);
+    if (std::error_code EC = RelocatedSec.getError())
+      return EC;
+    return ROffset + (*RelocatedSec)->sh_addr;
   }
-
-  return std::error_code();
+  return ROffset;
 }
 
 template <class ELFT>
-std::error_code
-ELFObjectFile<ELFT>::getRelocationOffset(DataRefImpl Rel,
-                                         uint64_t &Result) const {
+uint64_t ELFObjectFile<ELFT>::getRelocationOffset(DataRefImpl Rel) const {
   assert(EF.getHeader()->e_type == ELF::ET_REL &&
          "Only relocatable object files have relocation offsets");
-  Result = getROffset(Rel);
-  return std::error_code();
+  return getROffset(Rel);
 }
 
 template <class ELFT>
 uint64_t ELFObjectFile<ELFT>::getROffset(DataRefImpl Rel) const {
   const Elf_Shdr *sec = getRelSection(Rel);
-  switch (sec->sh_type) {
-  default:
-    report_fatal_error("Invalid section type in Rel!");
-  case ELF::SHT_REL:
+  if (sec->sh_type == ELF::SHT_REL)
     return getRel(Rel)->r_offset;
-  case ELF::SHT_RELA:
-    return getRela(Rel)->r_offset;
-  }
+
+  return getRela(Rel)->r_offset;
 }
 
 template <class ELFT>
-std::error_code ELFObjectFile<ELFT>::getRelocationType(DataRefImpl Rel,
-                                                       uint64_t &Result) const {
+uint64_t ELFObjectFile<ELFT>::getRelocationType(DataRefImpl Rel) const {
   const Elf_Shdr *sec = getRelSection(Rel);
-  switch (sec->sh_type) {
-  default:
-    report_fatal_error("Invalid section type in Rel!");
-  case ELF::SHT_REL: {
-    Result = getRel(Rel)->getType(EF.isMips64EL());
-    break;
-  }
-  case ELF::SHT_RELA: {
-    Result = getRela(Rel)->getType(EF.isMips64EL());
-    break;
-  }
-  }
-  return std::error_code();
+  if (sec->sh_type == ELF::SHT_REL)
+    return getRel(Rel)->getType(EF.isMips64EL());
+  else
+    return getRela(Rel)->getType(EF.isMips64EL());
 }
 
 template <class ELFT>
@@ -634,25 +736,10 @@ StringRef ELFObjectFile<ELFT>::getRelocationTypeName(uint32_t Type) const {
 }
 
 template <class ELFT>
-std::error_code ELFObjectFile<ELFT>::getRelocationTypeName(
+void ELFObjectFile<ELFT>::getRelocationTypeName(
     DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
-  const Elf_Shdr *sec = getRelSection(Rel);
-  uint32_t type;
-  switch (sec->sh_type) {
-  default:
-    return object_error::parse_failed;
-  case ELF::SHT_REL: {
-    type = getRel(Rel)->getType(EF.isMips64EL());
-    break;
-  }
-  case ELF::SHT_RELA: {
-    type = getRela(Rel)->getType(EF.isMips64EL());
-    break;
-  }
-  }
-
+  uint32_t type = getRelocationType(Rel);
   EF.getRelocationTypeName(type, Result);
-  return std::error_code();
 }
 
 template <class ELFT>
@@ -664,11 +751,6 @@ ELFObjectFile<ELFT>::getRelocationAddend(DataRefImpl Rel) const {
 }
 
 template <class ELFT>
-bool ELFObjectFile<ELFT>::hasRelocationAddend(DataRefImpl Rel) const {
-  return getRelSection(Rel)->sh_type == ELF::SHT_RELA;
-}
-
-template <class ELFT>
 const typename ELFFile<ELFT>::Elf_Sym *
 ELFObjectFile<ELFT>::getSymbol(DataRefImpl Symb) const {
   return &*toELFSymIter(Symb);
@@ -677,13 +759,15 @@ ELFObjectFile<ELFT>::getSymbol(DataRefImpl Symb) const {
 template <class ELFT>
 const typename ELFObjectFile<ELFT>::Elf_Rel *
 ELFObjectFile<ELFT>::getRel(DataRefImpl Rel) const {
-  return EF.template getEntry<Elf_Rel>(Rel.d.a, Rel.d.b);
+  assert(getRelSection(Rel)->sh_type == ELF::SHT_REL);
+  return EF.template getEntry<Elf_Rel>(Rel.d.a, Rel.d.b >> 1);
 }
 
 template <class ELFT>
 const typename ELFObjectFile<ELFT>::Elf_Rela *
 ELFObjectFile<ELFT>::getRela(DataRefImpl Rela) const {
-  return EF.template getEntry<Elf_Rela>(Rela.d.a, Rela.d.b);
+  assert(getRelSection(Rela)->sh_type == ELF::SHT_RELA);
+  return EF.template getEntry<Elf_Rela>(Rela.d.a, Rela.d.b >> 1);
 }
 
 template <class ELFT>
@@ -697,38 +781,46 @@ ELFObjectFile<ELFT>::ELFObjectFile(MemoryBufferRef Object, std::error_code &EC)
 
 template <class ELFT>
 basic_symbol_iterator ELFObjectFile<ELFT>::symbol_begin_impl() const {
-  return basic_symbol_iterator(SymbolRef(toDRI(EF.begin_symbols()), this));
+  DataRefImpl Sym = toDRI(EF.getDotSymtabSec(), 0);
+  return basic_symbol_iterator(SymbolRef(Sym, this));
 }
 
 template <class ELFT>
 basic_symbol_iterator ELFObjectFile<ELFT>::symbol_end_impl() const {
-  return basic_symbol_iterator(SymbolRef(toDRI(EF.end_symbols()), this));
+  const Elf_Shdr *SymTab = EF.getDotSymtabSec();
+  if (!SymTab)
+    return symbol_begin_impl();
+  DataRefImpl Sym = toDRI(SymTab, SymTab->sh_size / sizeof(Elf_Sym));
+  return basic_symbol_iterator(SymbolRef(Sym, this));
 }
 
 template <class ELFT>
-symbol_iterator ELFObjectFile<ELFT>::dynamic_symbol_begin() const {
-  return symbol_iterator(SymbolRef(toDRI(EF.begin_dynamic_symbols()), this));
+elf_symbol_iterator ELFObjectFile<ELFT>::dynamic_symbol_begin() const {
+  DataRefImpl Sym = toDRI(EF.getDotDynSymSec(), 0);
+  return symbol_iterator(SymbolRef(Sym, this));
 }
 
 template <class ELFT>
-symbol_iterator ELFObjectFile<ELFT>::dynamic_symbol_end() const {
-  return symbol_iterator(SymbolRef(toDRI(EF.end_dynamic_symbols()), this));
+elf_symbol_iterator ELFObjectFile<ELFT>::dynamic_symbol_end() const {
+  const Elf_Shdr *SymTab = EF.getDotDynSymSec();
+  DataRefImpl Sym = toDRI(SymTab, SymTab->sh_size / sizeof(Elf_Sym));
+  return basic_symbol_iterator(SymbolRef(Sym, this));
 }
 
 template <class ELFT>
 section_iterator ELFObjectFile<ELFT>::section_begin() const {
-  return section_iterator(SectionRef(toDRI(EF.begin_sections()), this));
+  return section_iterator(SectionRef(toDRI(EF.section_begin()), this));
 }
 
 template <class ELFT>
 section_iterator ELFObjectFile<ELFT>::section_end() const {
-  return section_iterator(SectionRef(toDRI(EF.end_sections()), this));
+  return section_iterator(SectionRef(toDRI(EF.section_end()), this));
 }
 
 template <class ELFT>
 StringRef ELFObjectFile<ELFT>::getLoadName() const {
-  Elf_Dyn_Iter DI = EF.begin_dynamic_table();
-  Elf_Dyn_Iter DE = EF.end_dynamic_table();
+  Elf_Dyn_Iter DI = EF.dynamic_table_begin();
+  Elf_Dyn_Iter DE = EF.dynamic_table_end();
 
   while (DI != DE && DI->getTag() != ELF::DT_SONAME)
     ++DI;
@@ -834,21 +926,16 @@ unsigned ELFObjectFile<ELFT>::getArch() const {
 }
 
 template <class ELFT>
-std::pair<symbol_iterator, symbol_iterator>
-ELFObjectFile<ELFT>::getELFDynamicSymbolIterators() const {
-  return std::make_pair(dynamic_symbol_begin(), dynamic_symbol_end());
+ELFObjectFileBase::elf_symbol_iterator_range
+ELFObjectFile<ELFT>::getDynamicSymbolIterators() const {
+  return make_range(dynamic_symbol_begin(), dynamic_symbol_end());
 }
 
 template <class ELFT> bool ELFObjectFile<ELFT>::isRelocatableObject() const {
   return EF.getHeader()->e_type == ELF::ET_REL;
 }
 
-inline std::pair<symbol_iterator, symbol_iterator>
-getELFDynamicSymbolIterators(const SymbolicFile *Obj) {
-  return cast<ELFObjectFileBase>(Obj)->getELFDynamicSymbolIterators();
 }
-
-} // namespace object
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Object/ELFTypes.h b/include/llvm/Object/ELFTypes.h
index 2eda0c1..63e1390 100644
--- a/include/llvm/Object/ELFTypes.h
+++ b/include/llvm/Object/ELFTypes.h
@@ -10,9 +10,11 @@
 #ifndef LLVM_OBJECT_ELFTYPES_H
 #define LLVM_OBJECT_ELFTYPES_H
 
+#include "llvm/Object/Error.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorOr.h"
 
 namespace llvm {
 namespace object {
@@ -24,6 +26,11 @@ template <endianness target_endianness, bool is64Bits> struct ELFType {
   static const bool Is64Bits = is64Bits;
 };
 
+typedef ELFType<support::little, false> ELF32LE;
+typedef ELFType<support::big, false> ELF32BE;
+typedef ELFType<support::little, true> ELF64LE;
+typedef ELFType<support::big, true> ELF64BE;
+
 // Use an alignment of 2 for the typedefs since that is the worst case for
 // ELF files in archives.
 
@@ -197,8 +204,21 @@ struct Elf_Sym_Impl : Elf_Sym_Base<ELFT> {
     return st_shndx >= ELF::SHN_LORESERVE;
   }
   bool isUndefined() const { return st_shndx == ELF::SHN_UNDEF; }
+  bool isExternal() const {
+    return getBinding() != ELF::STB_LOCAL;
+  }
+
+  ErrorOr<StringRef> getName(StringRef StrTab) const;
 };
 
+template <class ELFT>
+ErrorOr<StringRef> Elf_Sym_Impl<ELFT>::getName(StringRef StrTab) const {
+  uint32_t Offset = this->st_name;
+  if (Offset >= StrTab.size())
+    return object_error::parse_failed;
+  return StringRef(StrTab.data() + Offset);
+}
+
 /// Elf_Versym: This is the structure of entries in the SHT_GNU_versym section
 /// (.gnu.version). This structure is identical for ELF32 and ELF64.
 template <class ELFT>
@@ -293,14 +313,14 @@ struct Elf_Dyn_Impl : Elf_Dyn_Base<ELFT> {
   using Elf_Dyn_Base<ELFT>::d_un;
   int64_t getTag() const { return d_tag; }
   uint64_t getVal() const { return d_un.d_val; }
-  uint64_t getPtr() const { return d_un.ptr; }
+  uint64_t getPtr() const { return d_un.d_ptr; }
 };
 
 // Elf_Rel: Elf Relocation
-template <class ELFT, bool isRela> struct Elf_Rel_Base;
+template <class ELFT, bool isRela> struct Elf_Rel_Impl;
 
 template <endianness TargetEndianness>
-struct Elf_Rel_Base<ELFType<TargetEndianness, false>, false> {
+struct Elf_Rel_Impl<ELFType<TargetEndianness, false>, false> {
   LLVM_ELF_IMPORT_TYPES(TargetEndianness, false)
   Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
   Elf_Word r_info;   // Symbol table index and type of relocation to apply
@@ -313,64 +333,46 @@ struct Elf_Rel_Base<ELFType<TargetEndianness, false>, false> {
     assert(!IsMips64EL);
     r_info = R;
   }
-};
-
-template <endianness TargetEndianness>
-struct Elf_Rel_Base<ELFType<TargetEndianness, true>, false> {
-  LLVM_ELF_IMPORT_TYPES(TargetEndianness, true)
-  Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
-  Elf_Xword r_info;  // Symbol table index and type of relocation to apply
 
-  uint64_t getRInfo(bool isMips64EL) const {
-    uint64_t t = r_info;
-    if (!isMips64EL)
-      return t;
-    // Mips64 little endian has a "special" encoding of r_info. Instead of one
-    // 64 bit little endian number, it is a little endian 32 bit number followed
-    // by a 32 bit big endian number.
-    return (t << 32) | ((t >> 8) & 0xff000000) | ((t >> 24) & 0x00ff0000) |
-           ((t >> 40) & 0x0000ff00) | ((t >> 56) & 0x000000ff);
+  // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
+  // and ELF32_R_INFO macros defined in the ELF specification:
+  uint32_t getSymbol(bool isMips64EL) const {
+    return this->getRInfo(isMips64EL) >> 8;
   }
-  void setRInfo(uint64_t R, bool IsMips64EL) {
-    if (IsMips64EL)
-      r_info = (R >> 32) | ((R & 0xff000000) << 8) | ((R & 0x00ff0000) << 24) |
-               ((R & 0x0000ff00) << 40) | ((R & 0x000000ff) << 56);
-    else
-      r_info = R;
+  unsigned char getType(bool isMips64EL) const {
+    return (unsigned char)(this->getRInfo(isMips64EL) & 0x0ff);
+  }
+  void setSymbol(uint32_t s, bool IsMips64EL) {
+    setSymbolAndType(s, getType(), IsMips64EL);
+  }
+  void setType(unsigned char t, bool IsMips64EL) {
+    setSymbolAndType(getSymbol(), t, IsMips64EL);
+  }
+  void setSymbolAndType(uint32_t s, unsigned char t, bool IsMips64EL) {
+    this->setRInfo((s << 8) + t, IsMips64EL);
   }
 };
 
 template <endianness TargetEndianness>
-struct Elf_Rel_Base<ELFType<TargetEndianness, false>, true> {
+struct Elf_Rel_Impl<ELFType<TargetEndianness, false>, true>
+    : public Elf_Rel_Impl<ELFType<TargetEndianness, false>, false> {
   LLVM_ELF_IMPORT_TYPES(TargetEndianness, false)
-  Elf_Addr r_offset;  // Location (file byte offset, or program virtual addr)
-  Elf_Word r_info;    // Symbol table index and type of relocation to apply
   Elf_Sword r_addend; // Compute value for relocatable field by adding this
-
-  uint32_t getRInfo(bool isMips64EL) const {
-    assert(!isMips64EL);
-    return r_info;
-  }
-  void setRInfo(uint32_t R, bool IsMips64EL) {
-    assert(!IsMips64EL);
-    r_info = R;
-  }
 };
 
 template <endianness TargetEndianness>
-struct Elf_Rel_Base<ELFType<TargetEndianness, true>, true> {
+struct Elf_Rel_Impl<ELFType<TargetEndianness, true>, false> {
   LLVM_ELF_IMPORT_TYPES(TargetEndianness, true)
-  Elf_Addr r_offset;   // Location (file byte offset, or program virtual addr)
-  Elf_Xword r_info;    // Symbol table index and type of relocation to apply
-  Elf_Sxword r_addend; // Compute value for relocatable field by adding this.
+  Elf_Addr r_offset; // Location (file byte offset, or program virtual addr)
+  Elf_Xword r_info;  // Symbol table index and type of relocation to apply
 
   uint64_t getRInfo(bool isMips64EL) const {
-    // Mips64 little endian has a "special" encoding of r_info. Instead of one
-    // 64 bit little endian number, it is a little endian 32 bit number followed
-    // by a 32 bit big endian number.
     uint64_t t = r_info;
     if (!isMips64EL)
       return t;
+    // Mips64 little endian has a "special" encoding of r_info. Instead of one
+    // 64 bit little endian number, it is a little endian 32 bit number followed
+    // by a 32 bit big endian number.
     return (t << 32) | ((t >> 8) & 0xff000000) | ((t >> 24) & 0x00ff0000) |
            ((t >> 40) & 0x0000ff00) | ((t >> 56) & 0x000000ff);
   }
@@ -381,14 +383,6 @@ struct Elf_Rel_Base<ELFType<TargetEndianness, true>, true> {
     else
       r_info = R;
   }
-};
-
-template <class ELFT, bool isRela> struct Elf_Rel_Impl;
-
-template <endianness TargetEndianness, bool isRela>
-struct Elf_Rel_Impl<ELFType<TargetEndianness, true>, isRela>
-    : Elf_Rel_Base<ELFType<TargetEndianness, true>, isRela> {
-  LLVM_ELF_IMPORT_TYPES(TargetEndianness, true)
 
   // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
   // and ELF64_R_INFO macros defined in the ELF specification:
@@ -409,28 +403,11 @@ struct Elf_Rel_Impl<ELFType<TargetEndianness, true>, isRela>
   }
 };
 
-template <endianness TargetEndianness, bool isRela>
-struct Elf_Rel_Impl<ELFType<TargetEndianness, false>, isRela>
-    : Elf_Rel_Base<ELFType<TargetEndianness, false>, isRela> {
-  LLVM_ELF_IMPORT_TYPES(TargetEndianness, false)
-
-  // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
-  // and ELF32_R_INFO macros defined in the ELF specification:
-  uint32_t getSymbol(bool isMips64EL) const {
-    return this->getRInfo(isMips64EL) >> 8;
-  }
-  unsigned char getType(bool isMips64EL) const {
-    return (unsigned char)(this->getRInfo(isMips64EL) & 0x0ff);
-  }
-  void setSymbol(uint32_t s, bool IsMips64EL) {
-    setSymbolAndType(s, getType(), IsMips64EL);
-  }
-  void setType(unsigned char t, bool IsMips64EL) {
-    setSymbolAndType(getSymbol(), t, IsMips64EL);
-  }
-  void setSymbolAndType(uint32_t s, unsigned char t, bool IsMips64EL) {
-    this->setRInfo((s << 8) + t, IsMips64EL);
-  }
+template <endianness TargetEndianness>
+struct Elf_Rel_Impl<ELFType<TargetEndianness, true>, true>
+    : public Elf_Rel_Impl<ELFType<TargetEndianness, true>, false> {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, true)
+  Elf_Sxword r_addend; // Compute value for relocatable field by adding this.
 };
 
 template <class ELFT>
diff --git a/include/llvm/Object/Error.h b/include/llvm/Object/Error.h
index c9db1b8..aa320bb 100644
--- a/include/llvm/Object/Error.h
+++ b/include/llvm/Object/Error.h
@@ -27,6 +27,8 @@ enum class object_error {
   invalid_file_type,
   parse_failed,
   unexpected_eof,
+  string_table_non_null_end,
+  invalid_section_index,
   bitcode_section_not_found,
   macho_small_load_command,
   macho_load_segment_too_many_sections,
diff --git a/include/llvm/Object/IRObjectFile.h b/include/llvm/Object/IRObjectFile.h
index f713570..ef65528 100644
--- a/include/llvm/Object/IRObjectFile.h
+++ b/include/llvm/Object/IRObjectFile.h
@@ -68,7 +68,7 @@ public:
   static ErrorOr<std::unique_ptr<IRObjectFile>> create(MemoryBufferRef Object,
                                                        LLVMContext &Context);
 };
-} // namespace object
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index 4350a75..f4edfd0 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -197,8 +197,9 @@ public:
                   std::error_code &EC);
 
   void moveSymbolNext(DataRefImpl &Symb) const override;
-  std::error_code getSymbolName(DataRefImpl Symb,
-                                StringRef &Res) const override;
+
+  uint64_t getNValue(DataRefImpl Sym) const;
+  ErrorOr<StringRef> getSymbolName(DataRefImpl Symb) const override;
 
   // MachO specific.
   std::error_code getIndirectName(DataRefImpl Symb, StringRef &Res) const;
@@ -206,13 +207,15 @@ public:
 
   std::error_code getSymbolAddress(DataRefImpl Symb,
                                    uint64_t &Res) const override;
+  uint64_t getSymbolValue(DataRefImpl Symb) const override;
   uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
-  uint64_t getSymbolSize(DataRefImpl Symb) const override;
-  std::error_code getSymbolType(DataRefImpl Symb,
-                                SymbolRef::Type &Res) const override;
+  uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
+  SymbolRef::Type getSymbolType(DataRefImpl Symb) const override;
   uint32_t getSymbolFlags(DataRefImpl Symb) const override;
   std::error_code getSymbolSection(DataRefImpl Symb,
                                    section_iterator &Res) const override;
+  unsigned getSymbolSectionID(SymbolRef Symb) const;
+  unsigned getSectionID(SectionRef Sec) const;
 
   void moveSectionNext(DataRefImpl &Sec) const override;
   std::error_code getSectionName(DataRefImpl Sec,
@@ -226,24 +229,17 @@ public:
   bool isSectionData(DataRefImpl Sec) const override;
   bool isSectionBSS(DataRefImpl Sec) const override;
   bool isSectionVirtual(DataRefImpl Sec) const override;
-  bool sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb) const override;
   relocation_iterator section_rel_begin(DataRefImpl Sec) const override;
   relocation_iterator section_rel_end(DataRefImpl Sec) const override;
 
   void moveRelocationNext(DataRefImpl &Rel) const override;
-  std::error_code getRelocationAddress(DataRefImpl Rel,
-                                       uint64_t &Res) const override;
-  std::error_code getRelocationOffset(DataRefImpl Rel,
-                                      uint64_t &Res) const override;
+  ErrorOr<uint64_t> getRelocationAddress(DataRefImpl Rel) const override;
+  uint64_t getRelocationOffset(DataRefImpl Rel) const override;
   symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override;
   section_iterator getRelocationSection(DataRefImpl Rel) const;
-  std::error_code getRelocationType(DataRefImpl Rel,
-                                    uint64_t &Res) const override;
-  std::error_code
-  getRelocationTypeName(DataRefImpl Rel,
-                        SmallVectorImpl<char> &Result) const override;
-  std::error_code getRelocationHidden(DataRefImpl Rel,
-                                      bool &Result) const override;
+  uint64_t getRelocationType(DataRefImpl Rel) const override;
+  void getRelocationTypeName(DataRefImpl Rel,
+                             SmallVectorImpl<char> &Result) const override;
   uint8_t getRelocationLength(DataRefImpl Rel) const;
 
   // MachO specific.
@@ -503,8 +499,8 @@ inline const ObjectFile *DiceRef::getObjectFile() const {
   return OwningObject;
 }
 
-} // namespace object
-} // namespace llvm
+}
+}
 
 #endif
 
diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h
index ebc8b90..a11d381 100644
--- a/include/llvm/Object/MachOUniversal.h
+++ b/include/llvm/Object/MachOUniversal.h
@@ -109,10 +109,10 @@ public:
   }
 
   ErrorOr<std::unique_ptr<MachOObjectFile>>
-  getObjectForArch(Triple::ArchType Arch) const;
+  getObjectForArch(StringRef ArchName) const;
 };
 
-} // namespace object
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index e00fe0e..62eab10 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -35,8 +35,8 @@ class symbol_iterator;
 class SectionRef;
 typedef content_iterator<SectionRef> section_iterator;
 
-/// RelocationRef - This is a value type class that represents a single
-/// relocation in the list of relocations in the object file.
+/// This is a value type class that represents a single relocation in the list
+/// of relocations in the object file.
 class RelocationRef {
   DataRefImpl RelocationPimpl;
   const ObjectFile *OwningObject;
@@ -50,29 +50,23 @@ public:
 
   void moveNext();
 
-  std::error_code getAddress(uint64_t &Result) const;
-  std::error_code getOffset(uint64_t &Result) const;
+  ErrorOr<uint64_t> getAddress() const;
+  uint64_t getOffset() const;
   symbol_iterator getSymbol() const;
-  std::error_code getType(uint64_t &Result) const;
-
-  /// @brief Indicates whether this relocation should hidden when listing
-  /// relocations, usually because it is the trailing part of a multipart
-  /// relocation that will be printed as part of the leading relocation.
-  std::error_code getHidden(bool &Result) const;
+  uint64_t getType() const;
 
   /// @brief Get a string that represents the type of this relocation.
   ///
   /// This is for display purposes only.
-  std::error_code getTypeName(SmallVectorImpl<char> &Result) const;
-
+  void getTypeName(SmallVectorImpl<char> &Result) const;
 
   DataRefImpl getRawDataRefImpl() const;
-  const ObjectFile *getObjectFile() const;
+  const ObjectFile *getObject() const;
 };
 typedef content_iterator<RelocationRef> relocation_iterator;
 
-/// SectionRef - This is a value type class that represents a single section in
-/// the list of sections in the object file.
+/// This is a value type class that represents a single section in the list of
+/// sections in the object file.
 class SectionRef {
   friend class SymbolRef;
   DataRefImpl SectionPimpl;
@@ -116,8 +110,8 @@ public:
   const ObjectFile *getObject() const;
 };
 
-/// SymbolRef - This is a value type class that represents a single symbol in
-/// the list of symbols in the object file.
+/// This is a value type class that represents a single symbol in the list of
+/// symbols in the object file.
 class SymbolRef : public BasicSymbolRef {
   friend class SectionRef;
 
@@ -134,16 +128,23 @@ public:
   };
 
   SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner);
+  SymbolRef(const BasicSymbolRef &B) : BasicSymbolRef(B) {
+    assert(isa<ObjectFile>(BasicSymbolRef::getObject()));
+  }
 
-  std::error_code getName(StringRef &Result) const;
+  ErrorOr<StringRef> getName() const;
   /// Returns the symbol virtual address (i.e. address at which it will be
   /// mapped).
   std::error_code getAddress(uint64_t &Result) const;
+
+  /// Return the value of the symbol depending on the object this can be an
+  /// offset or a virtual address.
+  uint64_t getValue() const;
+
   /// @brief Get the alignment of this symbol as the actual value (not log 2).
   uint32_t getAlignment() const;
-  uint64_t getSize() const;
-  std::error_code getType(SymbolRef::Type &Result) const;
-  std::error_code getOther(uint8_t &Result) const;
+  uint64_t getCommonSize() const;
+  SymbolRef::Type getType() const;
 
   /// @brief Get section this symbol is defined in reference to. Result is
   /// end_sections() if it is undefined or is an absolute symbol.
@@ -170,9 +171,9 @@ public:
   }
 };
 
-/// ObjectFile - This class is the base class for all object file types.
-/// Concrete instances of this object are created by createObjectFile, which
-/// figures out which type to create.
+/// This class is the base class for all object file types. Concrete instances
+/// of this object are created by createObjectFile, which figures out which type
+/// to create.
 class ObjectFile : public SymbolicFile {
   virtual void anchor();
   ObjectFile() = delete;
@@ -194,22 +195,17 @@ protected:
   // Implementations assume that the DataRefImpl is valid and has not been
   // modified externally. It's UB otherwise.
   friend class SymbolRef;
-  virtual std::error_code getSymbolName(DataRefImpl Symb,
-                                        StringRef &Res) const = 0;
+  virtual ErrorOr<StringRef> getSymbolName(DataRefImpl Symb) const = 0;
   std::error_code printSymbolName(raw_ostream &OS,
                                   DataRefImpl Symb) const override;
   virtual std::error_code getSymbolAddress(DataRefImpl Symb,
                                            uint64_t &Res) const = 0;
+  virtual uint64_t getSymbolValue(DataRefImpl Symb) const = 0;
   virtual uint32_t getSymbolAlignment(DataRefImpl Symb) const;
-  virtual uint64_t getSymbolSize(DataRefImpl Symb) const = 0;
-  virtual std::error_code getSymbolType(DataRefImpl Symb,
-                                        SymbolRef::Type &Res) const = 0;
+  virtual uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const = 0;
+  virtual SymbolRef::Type getSymbolType(DataRefImpl Symb) const = 0;
   virtual std::error_code getSymbolSection(DataRefImpl Symb,
                                            section_iterator &Res) const = 0;
-  virtual std::error_code getSymbolOther(DataRefImpl Symb,
-                                         uint8_t &Res) const {
-    return object_error::invalid_file_type;
-  }
 
   // Same as above for SectionRef.
   friend class SectionRef;
@@ -226,8 +222,6 @@ protected:
   virtual bool isSectionBSS(DataRefImpl Sec) const = 0;
   // A section is 'virtual' if its contents aren't present in the object image.
   virtual bool isSectionVirtual(DataRefImpl Sec) const = 0;
-  virtual bool sectionContainsSymbol(DataRefImpl Sec,
-                                     DataRefImpl Symb) const = 0;
   virtual relocation_iterator section_rel_begin(DataRefImpl Sec) const = 0;
   virtual relocation_iterator section_rel_end(DataRefImpl Sec) const = 0;
   virtual section_iterator getRelocatedSection(DataRefImpl Sec) const;
@@ -235,23 +229,19 @@ protected:
   // Same as above for RelocationRef.
   friend class RelocationRef;
   virtual void moveRelocationNext(DataRefImpl &Rel) const = 0;
-  virtual std::error_code getRelocationAddress(DataRefImpl Rel,
-                                               uint64_t &Res) const = 0;
-  virtual std::error_code getRelocationOffset(DataRefImpl Rel,
-                                              uint64_t &Res) const = 0;
+  virtual ErrorOr<uint64_t> getRelocationAddress(DataRefImpl Rel) const = 0;
+  virtual uint64_t getRelocationOffset(DataRefImpl Rel) const = 0;
   virtual symbol_iterator getRelocationSymbol(DataRefImpl Rel) const = 0;
-  virtual std::error_code getRelocationType(DataRefImpl Rel,
-                                            uint64_t &Res) const = 0;
-  virtual std::error_code
-  getRelocationTypeName(DataRefImpl Rel,
-                        SmallVectorImpl<char> &Result) const = 0;
-  virtual std::error_code getRelocationHidden(DataRefImpl Rel,
-                                              bool &Result) const {
-    Result = false;
-    return std::error_code();
-  }
+  virtual uint64_t getRelocationType(DataRefImpl Rel) const = 0;
+  virtual void getRelocationTypeName(DataRefImpl Rel,
+                                     SmallVectorImpl<char> &Result) const = 0;
 
 public:
+  uint64_t getCommonSymbolSize(DataRefImpl Symb) const {
+    assert(getSymbolFlags(Symb) & SymbolRef::SF_Common);
+    return getCommonSymbolSizeImpl(Symb);
+  }
+
   typedef iterator_range<symbol_iterator> symbol_iterator_range;
   symbol_iterator_range symbols() const {
     return symbol_iterator_range(symbol_begin(), symbol_end());
@@ -314,32 +304,32 @@ public:
 inline SymbolRef::SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner)
     : BasicSymbolRef(SymbolP, Owner) {}
 
-inline std::error_code SymbolRef::getName(StringRef &Result) const {
-  return getObject()->getSymbolName(getRawDataRefImpl(), Result);
+inline ErrorOr<StringRef> SymbolRef::getName() const {
+  return getObject()->getSymbolName(getRawDataRefImpl());
 }
 
 inline std::error_code SymbolRef::getAddress(uint64_t &Result) const {
   return getObject()->getSymbolAddress(getRawDataRefImpl(), Result);
 }
 
+inline uint64_t SymbolRef::getValue() const {
+  return getObject()->getSymbolValue(getRawDataRefImpl());
+}
+
 inline uint32_t SymbolRef::getAlignment() const {
   return getObject()->getSymbolAlignment(getRawDataRefImpl());
 }
 
-inline uint64_t SymbolRef::getSize() const {
-  return getObject()->getSymbolSize(getRawDataRefImpl());
+inline uint64_t SymbolRef::getCommonSize() const {
+  return getObject()->getCommonSymbolSize(getRawDataRefImpl());
 }
 
 inline std::error_code SymbolRef::getSection(section_iterator &Result) const {
   return getObject()->getSymbolSection(getRawDataRefImpl(), Result);
 }
 
-inline std::error_code SymbolRef::getType(SymbolRef::Type &Result) const {
-  return getObject()->getSymbolType(getRawDataRefImpl(), Result);
-}
-
-inline std::error_code SymbolRef::getOther(uint8_t &Result) const {
-  return getObject()->getSymbolOther(getRawDataRefImpl(), Result);
+inline SymbolRef::Type SymbolRef::getType() const {
+  return getObject()->getSymbolType(getRawDataRefImpl());
 }
 
 inline const ObjectFile *SymbolRef::getObject() const {
@@ -406,11 +396,6 @@ inline bool SectionRef::isVirtual() const {
   return OwningObject->isSectionVirtual(SectionPimpl);
 }
 
-inline bool SectionRef::containsSymbol(SymbolRef S) const {
-  return OwningObject->sectionContainsSymbol(SectionPimpl,
-                                             S.getRawDataRefImpl());
-}
-
 inline relocation_iterator SectionRef::relocation_begin() const {
   return OwningObject->section_rel_begin(SectionPimpl);
 }
@@ -445,36 +430,31 @@ inline void RelocationRef::moveNext() {
   return OwningObject->moveRelocationNext(RelocationPimpl);
 }
 
-inline std::error_code RelocationRef::getAddress(uint64_t &Result) const {
-  return OwningObject->getRelocationAddress(RelocationPimpl, Result);
+inline ErrorOr<uint64_t> RelocationRef::getAddress() const {
+  return OwningObject->getRelocationAddress(RelocationPimpl);
 }
 
-inline std::error_code RelocationRef::getOffset(uint64_t &Result) const {
-  return OwningObject->getRelocationOffset(RelocationPimpl, Result);
+inline uint64_t RelocationRef::getOffset() const {
+  return OwningObject->getRelocationOffset(RelocationPimpl);
 }
 
 inline symbol_iterator RelocationRef::getSymbol() const {
   return OwningObject->getRelocationSymbol(RelocationPimpl);
 }
 
-inline std::error_code RelocationRef::getType(uint64_t &Result) const {
-  return OwningObject->getRelocationType(RelocationPimpl, Result);
+inline uint64_t RelocationRef::getType() const {
+  return OwningObject->getRelocationType(RelocationPimpl);
 }
 
-inline std::error_code
-RelocationRef::getTypeName(SmallVectorImpl<char> &Result) const {
+inline void RelocationRef::getTypeName(SmallVectorImpl<char> &Result) const {
   return OwningObject->getRelocationTypeName(RelocationPimpl, Result);
 }
 
-inline std::error_code RelocationRef::getHidden(bool &Result) const {
-  return OwningObject->getRelocationHidden(RelocationPimpl, Result);
-}
-
 inline DataRefImpl RelocationRef::getRawDataRefImpl() const {
   return RelocationPimpl;
 }
 
-inline const ObjectFile *RelocationRef::getObjectFile() const {
+inline const ObjectFile *RelocationRef::getObject() const {
   return OwningObject;
 }
 
diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h
index f80ee0a..950e2ed 100644
--- a/include/llvm/Object/RelocVisitor.h
+++ b/include/llvm/Object/RelocVisitor.h
@@ -240,16 +240,14 @@ private:
   }
 
   int64_t getELFAddend(RelocationRef R) {
-    const auto *Obj = cast<ELFObjectFileBase>(R.getObjectFile());
-    DataRefImpl DRI = R.getRawDataRefImpl();
-    ErrorOr<int64_t> AddendOrErr = Obj->getRelocationAddend(DRI);
+    ErrorOr<int64_t> AddendOrErr = ELFRelocationRef(R).getAddend();
     if (std::error_code EC = AddendOrErr.getError())
       report_fatal_error(EC.message());
     return *AddendOrErr;
   }
 
   uint8_t getLengthMachO64(RelocationRef R) {
-    const MachOObjectFile *Obj = cast<MachOObjectFile>(R.getObjectFile());
+    const MachOObjectFile *Obj = cast<MachOObjectFile>(R.getObject());
     return Obj->getRelocationLength(R.getRawDataRefImpl());
   }
 
@@ -267,8 +265,7 @@ private:
   }
 
   RelocToApply visitELF_386_PC32(RelocationRef R, uint64_t Value) {
-    uint64_t Address;
-    R.getOffset(Address);
+    uint64_t Address = R.getOffset();
     return RelocToApply(Value - Address, 4);
   }
 
@@ -282,8 +279,7 @@ private:
   }
   RelocToApply visitELF_X86_64_PC32(RelocationRef R, uint64_t Value) {
     int64_t Addend = getELFAddend(R);
-    uint64_t Address;
-    R.getOffset(Address);
+    uint64_t Address = R.getOffset();
     return RelocToApply(Value + Addend - Address, 4);
   }
   RelocToApply visitELF_X86_64_32(RelocationRef R, uint64_t Value) {
@@ -412,6 +408,6 @@ private:
   }
 };
 
-} // namespace object
-} // namespace llvm
+}
+}
 #endif
diff --git a/include/llvm/Object/StackMapParser.h b/include/llvm/Object/StackMapParser.h
new file mode 100644
index 0000000..276eab6
--- /dev/null
+++ b/include/llvm/Object/StackMapParser.h
@@ -0,0 +1,442 @@
+//===-------- StackMapParser.h - StackMap Parsing Support -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_STACKMAPPARSER_H
+#define LLVM_CODEGEN_STACKMAPPARSER_H
+
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+template <support::endianness Endianness>
+class StackMapV1Parser {
+public:
+
+  template <typename AccessorT>
+  class AccessorIterator {
+  public:
+
+    AccessorIterator(AccessorT A) : A(A) {}
+    AccessorIterator& operator++() { A = A.next(); return *this; }
+    AccessorIterator operator++(int) {
+      auto tmp = *this;
+      ++*this;
+      return tmp;
+    }
+
+    bool operator==(const AccessorIterator &Other) {
+      return A.P == Other.A.P;
+    }
+
+    bool operator!=(const AccessorIterator &Other) { return !(*this == Other); }
+
+    AccessorT& operator*() { return A; }
+    AccessorT* operator->() { return &A; }
+
+  private:
+    AccessorT A;
+  };
+
+  /// Accessor for function records.
+  class FunctionAccessor {
+    friend class StackMapV1Parser;
+  public:
+
+    /// Get the function address.
+    uint64_t getFunctionAddress() const {
+      return read<uint64_t>(P);
+    }
+
+    /// Get the function's stack size.
+    uint32_t getStackSize() const {
+      return read<uint64_t>(P + sizeof(uint64_t));
+    }
+
+  private:
+    FunctionAccessor(const uint8_t *P) : P(P) {}
+
+    const static int FunctionAccessorSize = 2 * sizeof(uint64_t);
+
+    FunctionAccessor next() const {
+      return FunctionAccessor(P + FunctionAccessorSize);
+    }
+
+    const uint8_t *P;
+  };
+
+  /// Accessor for constants.
+  class ConstantAccessor {
+    friend class StackMapV1Parser;
+  public:
+
+    /// Return the value of this constant.
+    uint64_t getValue() const { return read<uint64_t>(P); }
+
+  private:
+
+    ConstantAccessor(const uint8_t *P) : P(P) {}
+
+    const static int ConstantAccessorSize = sizeof(uint64_t);
+
+    ConstantAccessor next() const {
+      return ConstantAccessor(P + ConstantAccessorSize);
+    }
+
+    const uint8_t *P;
+  };
+
+  // Forward-declare RecordAccessor so we can friend it below.
+  class RecordAccessor;
+
+  enum class LocationKind : uint8_t {
+    Register = 1, Direct = 2, Indirect = 3, Constant = 4, ConstantIndex = 5
+  };
+
+
+  /// Accessor for location records.
+  class LocationAccessor {
+    friend class StackMapV1Parser;
+    friend class RecordAccessor;
+  public:
+
+    /// Get the Kind for this location.
+    LocationKind getKind() const {
+      return LocationKind(P[KindOffset]);
+    }
+
+    /// Get the Dwarf register number for this location.
+    uint16_t getDwarfRegNum() const {
+      return read<uint16_t>(P + DwarfRegNumOffset);
+    }
+
+    /// Get the small-constant for this location. (Kind must be Constant).
+    uint32_t getSmallConstant() const {
+      assert(getKind() == LocationKind::Constant && "Not a small constant.");
+      return read<uint32_t>(P + SmallConstantOffset);
+    }
+
+    /// Get the constant-index for this location. (Kind must be ConstantIndex).
+    uint32_t getConstantIndex() const {
+      assert(getKind() == LocationKind::ConstantIndex &&
+             "Not a constant-index.");
+      return read<uint32_t>(P + SmallConstantOffset);
+    }
+
+    /// Get the offset for this location. (Kind must be Direct or Indirect).
+    int32_t getOffset() const {
+      assert((getKind() == LocationKind::Direct ||
+              getKind() == LocationKind::Indirect) &&
+             "Not direct or indirect.");
+      return read<int32_t>(P + SmallConstantOffset);
+    }
+
+  private:
+
+    LocationAccessor(const uint8_t *P) : P(P) {}
+
+    LocationAccessor next() const {
+      return LocationAccessor(P + LocationAccessorSize);
+    }
+
+    static const int KindOffset = 0;
+    static const int DwarfRegNumOffset = KindOffset + sizeof(uint16_t);
+    static const int SmallConstantOffset = DwarfRegNumOffset + sizeof(uint16_t);
+    static const int LocationAccessorSize = sizeof(uint64_t);
+
+    const uint8_t *P;
+  };
+
+  /// Accessor for stackmap live-out fields.
+  class LiveOutAccessor {
+    friend class StackMapV1Parser;
+    friend class RecordAccessor;
+  public:
+
+    /// Get the Dwarf register number for this live-out.
+    uint16_t getDwarfRegNum() const {
+      return read<uint16_t>(P + DwarfRegNumOffset);
+    }
+
+    /// Get the size in bytes of live [sub]register.
+    unsigned getSizeInBytes() const {
+      return read<uint8_t>(P + SizeOffset);
+    }
+
+  private:
+
+    LiveOutAccessor(const uint8_t *P) : P(P) {}
+
+    LiveOutAccessor next() const {
+      return LiveOutAccessor(P + LiveOutAccessorSize);
+    }
+
+    static const int DwarfRegNumOffset = 0;
+    static const int SizeOffset =
+      DwarfRegNumOffset + sizeof(uint16_t) + sizeof(uint8_t);
+    static const int LiveOutAccessorSize = sizeof(uint32_t);
+
+    const uint8_t *P;
+  };
+
+  /// Accessor for stackmap records.
+  class RecordAccessor {
+    friend class StackMapV1Parser;
+  public:
+
+    typedef AccessorIterator<LocationAccessor> location_iterator;
+    typedef AccessorIterator<LiveOutAccessor> liveout_iterator;
+
+    /// Get the patchpoint/stackmap ID for this record.
+    uint64_t getID() const {
+      return read<uint64_t>(P + PatchpointIDOffset);
+    }
+
+    /// Get the instruction offset (from the start of the containing function)
+    /// for this record.
+    uint32_t getInstructionOffset() const {
+      return read<uint32_t>(P + InstructionOffsetOffset);
+    }
+
+    /// Get the number of locations contained in this record.
+    uint16_t getNumLocations() const {
+      return read<uint16_t>(P + NumLocationsOffset);
+    }
+
+    /// Get the location with the given index.
+    LocationAccessor getLocation(unsigned LocationIndex) const {
+      unsigned LocationOffset =
+        LocationListOffset + LocationIndex * LocationSize;
+      return LocationAccessor(P + LocationOffset);
+    }
+
+    /// Begin iterator for locations.
+    location_iterator location_begin() const {
+      return location_iterator(getLocation(0));
+    }
+
+    /// End iterator for locations.
+    location_iterator location_end() const {
+      return location_iterator(getLocation(getNumLocations()));
+    }
+
+    /// Iterator range for locations.
+    iterator_range<location_iterator> locations() const {
+      return make_range(location_begin(), location_end());
+    }
+
+    /// Get the number of liveouts contained in this record.
+    uint16_t getNumLiveOuts() const {
+      return read<uint16_t>(P + getNumLiveOutsOffset());
+    }
+
+    /// Get the live-out with the given index.
+    LiveOutAccessor getLiveOut(unsigned LiveOutIndex) const {
+      unsigned LiveOutOffset =
+        getNumLiveOutsOffset() + sizeof(uint16_t) + LiveOutIndex * LiveOutSize;
+      return LiveOutAccessor(P + LiveOutOffset);
+    }
+
+    /// Begin iterator for live-outs.
+    liveout_iterator liveouts_begin() const {
+      return liveout_iterator(getLiveOut(0));
+    }
+
+
+    /// End iterator for live-outs.
+    liveout_iterator liveouts_end() const {
+      return liveout_iterator(getLiveOut(getNumLiveOuts()));
+    }
+
+    /// Iterator range for live-outs.
+    iterator_range<liveout_iterator> liveouts() const {
+      return make_range(liveouts_begin(), liveouts_end());
+    }
+
+  private:
+
+    RecordAccessor(const uint8_t *P) : P(P) {}
+
+    unsigned getNumLiveOutsOffset() const {
+      return LocationListOffset + LocationSize * getNumLocations() +
+             sizeof(uint16_t);
+    }
+
+    unsigned getSizeInBytes() const {
+      unsigned RecordSize =
+        getNumLiveOutsOffset() + sizeof(uint16_t) + getNumLiveOuts() * LiveOutSize;
+      return (RecordSize + 7) & ~0x7;
+    }
+
+    RecordAccessor next() const {
+      return RecordAccessor(P + getSizeInBytes());
+    }
+
+    static const unsigned PatchpointIDOffset = 0;
+    static const unsigned InstructionOffsetOffset =
+      PatchpointIDOffset + sizeof(uint64_t);
+    static const unsigned NumLocationsOffset =
+      InstructionOffsetOffset + sizeof(uint32_t) + sizeof(uint16_t);
+    static const unsigned LocationListOffset =
+      NumLocationsOffset + sizeof(uint16_t);
+    static const unsigned LocationSize = sizeof(uint64_t);
+    static const unsigned LiveOutSize = sizeof(uint32_t);
+
+    const uint8_t *P;
+  };
+
+  /// Construct a parser for a version-1 stackmap. StackMap data will be read
+  /// from the given array.
+  StackMapV1Parser(ArrayRef<uint8_t> StackMapSection)
+      : StackMapSection(StackMapSection) {
+    ConstantsListOffset = FunctionListOffset + getNumFunctions() * FunctionSize;
+
+    assert(StackMapSection[0] == 1 &&
+           "StackMapV1Parser can only parse version 1 stackmaps");
+
+    unsigned CurrentRecordOffset =
+      ConstantsListOffset + getNumConstants() * ConstantSize;
+
+    for (unsigned I = 0, E = getNumRecords(); I != E; ++I) {
+      StackMapRecordOffsets.push_back(CurrentRecordOffset);
+      CurrentRecordOffset +=
+        RecordAccessor(&StackMapSection[CurrentRecordOffset]).getSizeInBytes();
+    }
+  }
+
+  typedef AccessorIterator<FunctionAccessor> function_iterator;
+  typedef AccessorIterator<ConstantAccessor> constant_iterator;
+  typedef AccessorIterator<RecordAccessor> record_iterator;
+
+  /// Get the version number of this stackmap. (Always returns 1).
+  unsigned getVersion() const { return 1; }
+
+  /// Get the number of functions in the stack map.
+  uint32_t getNumFunctions() const {
+    return read<uint32_t>(&StackMapSection[NumFunctionsOffset]);
+  }
+
+  /// Get the number of large constants in the stack map.
+  uint32_t getNumConstants() const {
+    return read<uint32_t>(&StackMapSection[NumConstantsOffset]);
+  }
+
+  /// Get the number of stackmap records in the stackmap.
+  uint32_t getNumRecords() const {
+    return read<uint32_t>(&StackMapSection[NumRecordsOffset]);
+  }
+
+  /// Return an FunctionAccessor for the given function index.
+  FunctionAccessor getFunction(unsigned FunctionIndex) const {
+    return FunctionAccessor(StackMapSection.data() +
+                            getFunctionOffset(FunctionIndex));
+  }
+
+  /// Begin iterator for functions.
+  function_iterator functions_begin() const {
+    return function_iterator(getFunction(0));
+  }
+
+  /// End iterator for functions.
+  function_iterator functions_end() const {
+    return function_iterator(
+             FunctionAccessor(StackMapSection.data() +
+                              getFunctionOffset(getNumFunctions())));
+  }
+
+  /// Iterator range for functions.
+  iterator_range<function_iterator> functions() const {
+    return make_range(functions_begin(), functions_end());
+  }
+
+  /// Return the large constant at the given index.
+  ConstantAccessor getConstant(unsigned ConstantIndex) const {
+    return ConstantAccessor(StackMapSection.data() +
+                            getConstantOffset(ConstantIndex));
+  }
+
+  /// Begin iterator for constants.
+  constant_iterator constants_begin() const {
+    return constant_iterator(getConstant(0));
+  }
+
+  /// End iterator for constants.
+  constant_iterator constants_end() const {
+    return constant_iterator(
+             ConstantAccessor(StackMapSection.data() +
+                              getConstantOffset(getNumConstants())));
+  }
+
+  /// Iterator range for constants.
+  iterator_range<constant_iterator> constants() const {
+    return make_range(constants_begin(), constants_end());
+  }
+
+  /// Return a RecordAccessor for the given record index.
+  RecordAccessor getRecord(unsigned RecordIndex) const {
+    std::size_t RecordOffset = StackMapRecordOffsets[RecordIndex];
+    return RecordAccessor(StackMapSection.data() + RecordOffset);
+  }
+
+  /// Begin iterator for records.
+  record_iterator records_begin() const {
+    if (getNumRecords() == 0)
+      return record_iterator(RecordAccessor(nullptr));
+    return record_iterator(getRecord(0));
+  }
+
+  /// End iterator for records.
+  record_iterator records_end() const {
+    // Records need to be handled specially, since we cache the start addresses
+    // for them: We can't just compute the 1-past-the-end address, we have to
+    // look at the last record and use the 'next' method.
+    if (getNumRecords() == 0)
+      return record_iterator(RecordAccessor(nullptr));
+    return record_iterator(getRecord(getNumRecords() - 1).next());
+  }
+
+  /// Iterator range for records.
+  iterator_range<record_iterator> records() const {
+    return make_range(records_begin(), records_end());
+  }
+
+private:
+
+  template <typename T>
+  static T read(const uint8_t *P) {
+    return support::endian::read<T, Endianness, 1>(P);
+  }
+
+  static const unsigned HeaderOffset = 0;
+  static const unsigned NumFunctionsOffset = HeaderOffset + sizeof(uint32_t);
+  static const unsigned NumConstantsOffset = NumFunctionsOffset + sizeof(uint32_t);
+  static const unsigned NumRecordsOffset = NumConstantsOffset + sizeof(uint32_t);
+  static const unsigned FunctionListOffset = NumRecordsOffset + sizeof(uint32_t);
+
+  static const unsigned FunctionSize = 2 * sizeof(uint64_t);
+  static const unsigned ConstantSize = sizeof(uint64_t);
+
+  std::size_t getFunctionOffset(unsigned FunctionIndex) const {
+    return FunctionListOffset + FunctionIndex * FunctionSize;
+  }
+
+  std::size_t getConstantOffset(unsigned ConstantIndex) const {
+    return ConstantsListOffset + ConstantIndex * ConstantSize;
+  }
+
+  ArrayRef<uint8_t> StackMapSection;
+  unsigned ConstantsListOffset;
+  std::vector<unsigned> StackMapRecordOffsets;
+};
+
+}
+
+#endif
diff --git a/include/llvm/Object/SymbolSize.h b/include/llvm/Object/SymbolSize.h
new file mode 100644
index 0000000..f2ce70f
--- /dev/null
+++ b/include/llvm/Object/SymbolSize.h
@@ -0,0 +1,23 @@
+//===- SymbolSize.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_SYMBOLSIZE_H
+#define LLVM_OBJECT_SYMBOLSIZE_H
+
+#include "llvm/Object/ObjectFile.h"
+
+namespace llvm {
+namespace object {
+std::vector<std::pair<SymbolRef, uint64_t>>
+computeSymbolSizes(const ObjectFile &O);
+}
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Object/SymbolicFile.h b/include/llvm/Object/SymbolicFile.h
index bf46599..3a38231 100644
--- a/include/llvm/Object/SymbolicFile.h
+++ b/include/llvm/Object/SymbolicFile.h
@@ -115,7 +115,7 @@ public:
 
 typedef content_iterator<BasicSymbolRef> basic_symbol_iterator;
 
-const uint64_t UnknownAddressOrSize = ~0ULL;
+const uint64_t UnknownAddress = ~0ULL;
 
 class SymbolicFile : public Binary {
 public:
@@ -195,7 +195,7 @@ inline const SymbolicFile *BasicSymbolRef::getObject() const {
   return OwningObject;
 }
 
-} // namespace object
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/Option/Arg.h b/include/llvm/Option/Arg.h
index 5f6941a..e1b72b6 100644
--- a/include/llvm/Option/Arg.h
+++ b/include/llvm/Option/Arg.h
@@ -93,9 +93,8 @@ public:
     return Values[N];
   }
 
-  SmallVectorImpl<const char*> &getValues() {
-    return Values;
-  }
+  SmallVectorImpl<const char *> &getValues() { return Values; }
+  const SmallVectorImpl<const char *> &getValues() const { return Values; }
 
   bool containsValue(StringRef Value) const {
     for (unsigned i = 0, e = getNumValues(); i != e; ++i)
diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h
index 23b0451..ef40057 100644
--- a/include/llvm/Option/ArgList.h
+++ b/include/llvm/Option/ArgList.h
@@ -14,6 +14,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Option/Arg.h"
 #include "llvm/Option/OptSpecifier.h"
 #include "llvm/Option/Option.h"
 #include <list>
@@ -23,7 +24,6 @@
 
 namespace llvm {
 namespace opt {
-class Arg;
 class ArgList;
 class Option;
 
@@ -92,10 +92,6 @@ public:
 /// check for the presence of Arg instances for a particular Option
 /// and to iterate over groups of arguments.
 class ArgList {
-private:
-  ArgList(const ArgList &) = delete;
-  void operator=(const ArgList &) = delete;
-
 public:
   typedef SmallVector<Arg*, 16> arglist_type;
   typedef arglist_type::iterator iterator;
@@ -108,12 +104,23 @@ private:
   arglist_type Args;
 
 protected:
-  // Default ctor provided explicitly as it is not provided implicitly due to
-  // the presence of the (deleted) copy ctor above.
-  ArgList() { }
-  // Virtual to provide a vtable anchor and because -Wnon-virtua-dtor warns, not
-  // because this type is ever actually destroyed polymorphically.
-  virtual ~ArgList();
+  // Make the default special members protected so they won't be used to slice
+  // derived objects, but can still be used by derived objects to implement
+  // their own special members.
+  ArgList() = default;
+  // Explicit move operations to ensure the container is cleared post-move
+  // otherwise it could lead to a double-delete in the case of moving of an
+  // InputArgList which deletes the contents of the container. If we could fix
+  // up the ownership here (delegate storage/ownership to the derived class so
+  // it can be a container of unique_ptr) this would be simpler.
+  ArgList(ArgList &&RHS) : Args(std::move(RHS.Args)) { RHS.Args.clear(); }
+  ArgList &operator=(ArgList &&RHS) {
+    Args = std::move(RHS.Args);
+    RHS.Args.clear();
+    return *this;
+  }
+  // Protect the dtor to ensure this type is never destroyed polymorphically.
+  ~ArgList() = default;
 
 public:
 
@@ -299,7 +306,7 @@ public:
   /// @}
 };
 
-class InputArgList : public ArgList  {
+class InputArgList final : public ArgList {
 private:
   /// List of argument strings used by the contained Args.
   ///
@@ -318,9 +325,24 @@ private:
   /// The number of original input argument strings.
   unsigned NumInputArgStrings;
 
+  /// Release allocated arguments.
+  void releaseMemory();
+
 public:
   InputArgList(const char* const *ArgBegin, const char* const *ArgEnd);
-  ~InputArgList() override;
+  InputArgList(InputArgList &&RHS)
+      : ArgList(std::move(RHS)), ArgStrings(std::move(RHS.ArgStrings)),
+        SynthesizedStrings(std::move(RHS.SynthesizedStrings)),
+        NumInputArgStrings(RHS.NumInputArgStrings) {}
+  InputArgList &operator=(InputArgList &&RHS) {
+    releaseMemory();
+    ArgList::operator=(std::move(RHS));
+    ArgStrings = std::move(RHS.ArgStrings);
+    SynthesizedStrings = std::move(RHS.SynthesizedStrings);
+    NumInputArgStrings = RHS.NumInputArgStrings;
+    return *this;
+  }
+  ~InputArgList() { releaseMemory(); }
 
   const char *getArgString(unsigned Index) const override {
     return ArgStrings[Index];
@@ -346,7 +368,7 @@ public:
 
 /// DerivedArgList - An ordered collection of driver arguments,
 /// whose storage may be in another argument list.
-class DerivedArgList : public ArgList {
+class DerivedArgList final : public ArgList {
   const InputArgList &BaseArgs;
 
   /// The list of arguments we synthesized.
@@ -355,7 +377,6 @@ class DerivedArgList : public ArgList {
 public:
   /// Construct a new derived arg list from \p BaseArgs.
   DerivedArgList(const InputArgList &BaseArgs);
-  ~DerivedArgList() override;
 
   const char *getArgString(unsigned Index) const override {
     return BaseArgs.getArgString(Index);
diff --git a/include/llvm/Option/OptSpecifier.h b/include/llvm/Option/OptSpecifier.h
index f9b121e..0b2aaae 100644
--- a/include/llvm/Option/OptSpecifier.h
+++ b/include/llvm/Option/OptSpecifier.h
@@ -35,7 +35,7 @@ namespace opt {
     bool operator==(OptSpecifier Opt) const { return ID == Opt.getID(); }
     bool operator!=(OptSpecifier Opt) const { return !(*this == Opt); }
   };
-} // namespace opt
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/Option/OptTable.h b/include/llvm/Option/OptTable.h
index a7ff469..96f51cf 100644
--- a/include/llvm/Option/OptTable.h
+++ b/include/llvm/Option/OptTable.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_OPTION_OPTTABLE_H
 #define LLVM_OPTION_OPTTABLE_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Option/OptSpecifier.h"
 
@@ -141,8 +142,6 @@ public:
   /// The only error that can occur in this routine is if an argument is
   /// missing values; in this case \p MissingArgCount will be non-zero.
   ///
-  /// \param ArgBegin - The beginning of the argument vector.
-  /// \param ArgEnd - The end of the argument vector.
   /// \param MissingArgIndex - On error, the index of the option which could
   /// not be parsed.
   /// \param MissingArgCount - On error, the number of missing options.
@@ -152,12 +151,9 @@ public:
   /// is the default and means exclude nothing.
   /// \return An InputArgList; on error this will contain all the options
   /// which could be parsed.
-  InputArgList *ParseArgs(const char* const *ArgBegin,
-                          const char* const *ArgEnd,
-                          unsigned &MissingArgIndex,
-                          unsigned &MissingArgCount,
-                          unsigned FlagsToInclude = 0,
-                          unsigned FlagsToExclude = 0) const;
+  InputArgList ParseArgs(ArrayRef<const char *> Args, unsigned &MissingArgIndex,
+                         unsigned &MissingArgCount, unsigned FlagsToInclude = 0,
+                         unsigned FlagsToExclude = 0) const;
 
   /// \brief Render the help text for an option table.
   ///
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index ccd6f27..3c4d838 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -369,7 +369,7 @@ protected:
 /// @brief This is the storage for the -time-passes option.
 extern bool TimePassesIsEnabled;
 
-} // namespace llvm
+} // End llvm namespace
 
 // Include support files that contain important APIs commonly used by Passes,
 // but that we want to separate out to make it easier to read the header files.
diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h
index d356097..0b318fc 100644
--- a/include/llvm/PassAnalysisSupport.h
+++ b/include/llvm/PassAnalysisSupport.h
@@ -27,28 +27,27 @@
 namespace llvm {
 
 //===----------------------------------------------------------------------===//
-// AnalysisUsage - Represent the analysis usage information of a pass.  This
-// tracks analyses that the pass REQUIRES (must be available when the pass
-// runs), REQUIRES TRANSITIVE (must be available throughout the lifetime of the
-// pass), and analyses that the pass PRESERVES (the pass does not invalidate the
-// results of these analyses).  This information is provided by a pass to the
-// Pass infrastructure through the getAnalysisUsage virtual function.
-//
+/// Represent the analysis usage information of a pass.  This tracks analyses
+/// that the pass REQUIRES (must be available when the pass runs), REQUIRES
+/// TRANSITIVE (must be available throughout the lifetime of the pass), and
+/// analyses that the pass PRESERVES (the pass does not invalidate the results
+/// of these analyses).  This information is provided by a pass to the Pass
+/// infrastructure through the getAnalysisUsage virtual function.
+///
 class AnalysisUsage {
 public:
   typedef SmallVector<AnalysisID, 32> VectorType;
 
 private:
-  // Sets of analyses required and preserved by a pass
+  /// Sets of analyses required and preserved by a pass
   VectorType Required, RequiredTransitive, Preserved;
   bool PreservesAll;
 
 public:
   AnalysisUsage() : PreservesAll(false) {}
 
-  // addRequired - Add the specified ID to the required set of the usage info
-  // for a pass.
-  //
+  ///@{
+  /// Add the specified ID to the required set of the usage info for a pass.
   AnalysisUsage &addRequiredID(const void *ID);
   AnalysisUsage &addRequiredID(char &ID);
   template<class PassClass>
@@ -61,10 +60,10 @@ public:
   AnalysisUsage &addRequiredTransitive() {
     return addRequiredTransitiveID(PassClass::ID);
   }
+  ///@}
 
-  // addPreserved - Add the specified ID to the set of analyses preserved by
-  // this pass
-  //
+  ///@{
+  /// Add the specified ID to the set of analyses preserved by this pass.
   AnalysisUsage &addPreservedID(const void *ID) {
     Preserved.push_back(ID);
     return *this;
@@ -73,29 +72,28 @@ public:
     Preserved.push_back(&ID);
     return *this;
   }
+  ///@}
 
-  // addPreserved - Add the specified Pass class to the set of analyses
-  // preserved by this pass.
-  //
+  /// Add the specified Pass class to the set of analyses preserved by this pass.
   template<class PassClass>
   AnalysisUsage &addPreserved() {
     Preserved.push_back(&PassClass::ID);
     return *this;
   }
 
-  // addPreserved - Add the Pass with the specified argument string to the set
-  // of analyses preserved by this pass. If no such Pass exists, do nothing.
-  // This can be useful when a pass is trivially preserved, but may not be
-  // linked in. Be careful about spelling!
-  //
+  /// Add the Pass with the specified argument string to the set of analyses
+  /// preserved by this pass. If no such Pass exists, do nothing. This can be
+  /// useful when a pass is trivially preserved, but may not be linked in. Be
+  /// careful about spelling!
   AnalysisUsage &addPreserved(StringRef Arg);
 
-  // setPreservesAll - Set by analyses that do not transform their input at all
+  /// Set by analyses that do not transform their input at all
   void setPreservesAll() { PreservesAll = true; }
+
+  /// Determine whether a pass said it does not transform its input at all
   bool getPreservesAll() const { return PreservesAll; }
 
-  /// setPreservesCFG - This function should be called by the pass, iff they do
-  /// not:
+  /// This function should be called by the pass, iff they do not:
   ///
   ///  1. Add or remove basic blocks from the function
   ///  2. Modify terminator instructions in any way.
@@ -113,10 +111,10 @@ public:
 };
 
 //===----------------------------------------------------------------------===//
-// AnalysisResolver - Simple interface used by Pass objects to pull all
-// analysis information out of pass manager that is responsible to manage
-// the pass.
-//
+/// AnalysisResolver - Simple interface used by Pass objects to pull all
+/// analysis information out of pass manager that is responsible to manage
+/// the pass.
+///
 class PMDataManager;
 class AnalysisResolver {
 private:
@@ -124,10 +122,10 @@ private:
 
 public:
   explicit AnalysisResolver(PMDataManager &P) : PM(P) { }
-  
+
   inline PMDataManager &getPMDataManager() { return PM; }
 
-  // Find pass that is implementing PI.
+  /// Find pass that is implementing PI.
   Pass *findImplPass(AnalysisID PI) {
     Pass *ResultPass = nullptr;
     for (unsigned i = 0; i < AnalysisImpls.size() ; ++i) {
@@ -139,7 +137,7 @@ public:
     return ResultPass;
   }
 
-  // Find pass that is implementing PI. Initialize pass for Function F.
+  /// Find pass that is implementing PI. Initialize pass for Function F.
   Pass *findImplPass(Pass *P, AnalysisID PI, Function &F);
 
   void addAnalysisImplsPair(AnalysisID PI, Pass *P) {
@@ -149,21 +147,20 @@ public:
     AnalysisImpls.push_back(pir);
   }
 
-  /// clearAnalysisImpls - Clear cache that is used to connect a pass to the
-  /// the analysis (PassInfo).
+  /// Clear cache that is used to connect a pass to the the analysis (PassInfo).
   void clearAnalysisImpls() {
     AnalysisImpls.clear();
   }
 
-  // getAnalysisIfAvailable - Return analysis result or null if it doesn't exist
+  /// Return analysis result or null if it doesn't exist.
   Pass *getAnalysisIfAvailable(AnalysisID ID, bool Direction) const;
 
 private:
-  // AnalysisImpls - This keeps track of which passes implements the interfaces
-  // that are required by the current pass (to implement getAnalysis()).
+  /// This keeps track of which passes implements the interfaces that are
+  /// required by the current pass (to implement getAnalysis()).
   std::vector<std::pair<AnalysisID, Pass*> > AnalysisImpls;
 
-  // PassManager that is used to resolve analysis info
+  /// PassManager that is used to resolve analysis info
   PMDataManager &PM;
 };
 
@@ -240,7 +237,7 @@ AnalysisType &Pass::getAnalysisID(AnalysisID PI, Function &F) {
   // vector.
   Pass *ResultPass = Resolver->findImplPass(this, PI, F);
   assert(ResultPass && "Unable to find requested analysis info");
-  
+
   // Because the AnalysisType may not be a subclass of pass (for
   // AnalysisGroups), we use getAdjustedAnalysisPointer here to potentially
   // adjust the return pointer (because the class may multiply inherit, once
@@ -248,6 +245,6 @@ AnalysisType &Pass::getAnalysisID(AnalysisID PI, Function &F) {
   return *(AnalysisType*)ResultPass->getAdjustedAnalysisPointer(PI);
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/PassInfo.h b/include/llvm/PassInfo.h
index 6a2f942..d107618 100644
--- a/include/llvm/PassInfo.h
+++ b/include/llvm/PassInfo.h
@@ -142,6 +142,6 @@ private:
   PassInfo(const PassInfo &) = delete;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/PassRegistry.h b/include/llvm/PassRegistry.h
index 0d2cd24..8c28ef5 100644
--- a/include/llvm/PassRegistry.h
+++ b/include/llvm/PassRegistry.h
@@ -95,6 +95,6 @@ public:
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassRegistry, LLVMPassRegistryRef)
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h
index af1a195..6cb6516 100644
--- a/include/llvm/PassSupport.h
+++ b/include/llvm/PassSupport.h
@@ -245,6 +245,6 @@ struct PassRegistrationListener {
 };
 
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Passes/PassBuilder.h b/include/llvm/Passes/PassBuilder.h
index bbf80f8..1e605e3 100644
--- a/include/llvm/Passes/PassBuilder.h
+++ b/include/llvm/Passes/PassBuilder.h
@@ -100,6 +100,6 @@ private:
                                bool VerifyEachPass, bool DebugLogging);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/ProfileData/CoverageMapping.h b/include/llvm/ProfileData/CoverageMapping.h
index 94e655c..3488e79 100644
--- a/include/llvm/ProfileData/CoverageMapping.h
+++ b/include/llvm/ProfileData/CoverageMapping.h
@@ -410,7 +410,7 @@ public:
   /// \brief Load the coverage mapping from the given files.
   static ErrorOr<std::unique_ptr<CoverageMapping>>
   load(StringRef ObjectFilename, StringRef ProfileFilename,
-       Triple::ArchType Arch = Triple::ArchType::UnknownArch);
+       StringRef Arch = StringRef());
 
   /// \brief The number of functions that couldn't have their profiles mapped.
   ///
diff --git a/include/llvm/ProfileData/CoverageMappingReader.h b/include/llvm/ProfileData/CoverageMappingReader.h
index 020edbd..38fb468 100644
--- a/include/llvm/ProfileData/CoverageMappingReader.h
+++ b/include/llvm/ProfileData/CoverageMappingReader.h
@@ -171,7 +171,7 @@ private:
 public:
   static ErrorOr<std::unique_ptr<BinaryCoverageReader>>
   create(std::unique_ptr<MemoryBuffer> &ObjectBuffer,
-         Triple::ArchType Arch = Triple::ArchType::UnknownArch);
+         StringRef Arch);
 
   std::error_code readNextRecord(CoverageMappingRecord &Record) override;
 };
diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h
index eafb768..77055ba 100644
--- a/include/llvm/ProfileData/InstrProf.h
+++ b/include/llvm/ProfileData/InstrProf.h
@@ -16,7 +16,10 @@
 #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
 #define LLVM_PROFILEDATA_INSTRPROF_H_
 
+#include "llvm/ADT/StringRef.h"
+#include <cstdint>
 #include <system_error>
+#include <vector>
 
 namespace llvm {
 const std::error_category &instrprof_category();
@@ -41,6 +44,16 @@ inline std::error_code make_error_code(instrprof_error E) {
   return std::error_code(static_cast<int>(E), instrprof_category());
 }
 
+/// Profiling information for a single function.
+struct InstrProfRecord {
+  InstrProfRecord() {}
+  InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t> Counts)
+      : Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
+  StringRef Name;
+  uint64_t Hash;
+  std::vector<uint64_t> Counts;
+};
+
 } // end namespace llvm
 
 namespace std {
diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h
index 63a6ac6..f937e7d 100644
--- a/include/llvm/ProfileData/InstrProfReader.h
+++ b/include/llvm/ProfileData/InstrProfReader.h
@@ -29,16 +29,6 @@ namespace llvm {
 
 class InstrProfReader;
 
-/// Profiling information for a single function.
-struct InstrProfRecord {
-  InstrProfRecord() {}
-  InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t> Counts)
-      : Name(Name), Hash(Hash), Counts(Counts) {}
-  StringRef Name;
-  uint64_t Hash;
-  ArrayRef<uint64_t> Counts;
-};
-
 /// A file format agnostic iterator over profiling data.
 class InstrProfIterator : public std::iterator<std::input_iterator_tag,
                                                InstrProfRecord> {
@@ -114,8 +104,6 @@ private:
   std::unique_ptr<MemoryBuffer> DataBuffer;
   /// Iterator over the profile data.
   line_iterator Line;
-  /// The current set of counter values.
-  std::vector<uint64_t> Counts;
 
   TextInstrProfReader(const TextInstrProfReader &) = delete;
   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
@@ -141,8 +129,6 @@ class RawInstrProfReader : public InstrProfReader {
 private:
   /// The profile data file contents.
   std::unique_ptr<MemoryBuffer> DataBuffer;
-  /// The current set of counter values.
-  std::vector<uint64_t> Counts;
   struct ProfileData {
     const uint32_t NameSize;
     const uint32_t NumCounters;
@@ -206,17 +192,16 @@ enum class HashT : uint32_t;
 /// Trait for lookups into the on-disk hash table for the binary instrprof
 /// format.
 class InstrProfLookupTrait {
-  std::vector<uint64_t> DataBuffer;
+  std::vector<InstrProfRecord> DataBuffer;
   IndexedInstrProf::HashT HashType;
+  unsigned FormatVersion;
+
 public:
-  InstrProfLookupTrait(IndexedInstrProf::HashT HashType) : HashType(HashType) {}
+  InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
+      : HashType(HashType), FormatVersion(FormatVersion) {}
+
+  typedef ArrayRef<InstrProfRecord> data_type;
 
-  struct data_type {
-    data_type(StringRef Name, ArrayRef<uint64_t> Data)
-        : Name(Name), Data(Data) {}
-    StringRef Name;
-    ArrayRef<uint64_t> Data;
-  };
   typedef StringRef internal_key_type;
   typedef StringRef external_key_type;
   typedef uint64_t hash_value_type;
@@ -239,22 +224,9 @@ public:
     return StringRef((const char *)D, N);
   }
 
-  data_type ReadData(StringRef K, const unsigned char *D, offset_type N) {
-    DataBuffer.clear();
-    if (N % sizeof(uint64_t))
-      // The data is corrupt, don't try to read it.
-      return data_type("", DataBuffer);
-
-    using namespace support;
-    // We just treat the data as opaque here. It's simpler to handle in
-    // IndexedInstrProfReader.
-    unsigned NumEntries = N / sizeof(uint64_t);
-    DataBuffer.reserve(NumEntries);
-    for (unsigned I = 0; I < NumEntries; ++I)
-      DataBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
-    return data_type(K, DataBuffer);
-  }
+  data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
 };
+
 typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
     InstrProfReaderIndex;
 
@@ -267,8 +239,6 @@ private:
   std::unique_ptr<InstrProfReaderIndex> Index;
   /// Iterator over the profile data.
   InstrProfReaderIndex::data_iterator RecordIterator;
-  /// Offset into our current data set.
-  size_t CurrentOffset;
   /// The file format version of the profile data.
   uint64_t FormatVersion;
   /// The maximal execution count among all functions.
@@ -278,7 +248,7 @@ private:
   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
 public:
   IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
-      : DataBuffer(std::move(DataBuffer)), Index(nullptr), CurrentOffset(0) {}
+      : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
 
   /// Return true if the given buffer is in an indexed instrprof format.
   static bool hasFormat(const MemoryBuffer &DataBuffer);
diff --git a/include/llvm/Support/ARMEHABI.h b/include/llvm/Support/ARMEHABI.h
index db045a8..9b052df 100644
--- a/include/llvm/Support/ARMEHABI.h
+++ b/include/llvm/Support/ARMEHABI.h
@@ -127,8 +127,8 @@ namespace EHABI {
 
     NUM_PERSONALITY_INDEX
   };
-} // namespace EHABI
-} // namespace ARM
-} // namespace llvm
+}
+}
+}
 
 #endif
diff --git a/include/llvm/Support/ARMWinEH.h b/include/llvm/Support/ARMWinEH.h
index 0b37903..1463629 100644
--- a/include/llvm/Support/ARMWinEH.h
+++ b/include/llvm/Support/ARMWinEH.h
@@ -375,8 +375,8 @@ struct ExceptionDataRecord {
 inline size_t HeaderWords(const ExceptionDataRecord &XR) {
   return (XR.Data[0] & 0xff800000) ? 1 : 2;
 }
-} // namespace WinEH
-} // namespace ARM
-} // namespace llvm
+}
+}
+}
 
 #endif
diff --git a/include/llvm/Support/ArrayRecycler.h b/include/llvm/Support/ArrayRecycler.h
index 5907c79..36f644a 100644
--- a/include/llvm/Support/ArrayRecycler.h
+++ b/include/llvm/Support/ArrayRecycler.h
@@ -138,6 +138,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/Support/Atomic.h b/include/llvm/Support/Atomic.h
index a3cec47..9ec23e8 100644
--- a/include/llvm/Support/Atomic.h
+++ b/include/llvm/Support/Atomic.h
@@ -33,7 +33,7 @@ namespace llvm {
     cas_flag AtomicAdd(volatile cas_flag* ptr, cas_flag val);
     cas_flag AtomicMul(volatile cas_flag* ptr, cas_flag val);
     cas_flag AtomicDiv(volatile cas_flag* ptr, cas_flag val);
-  } // namespace sys
-} // namespace llvm
+  }
+}
 
 #endif
diff --git a/include/llvm/Support/BlockFrequency.h b/include/llvm/Support/BlockFrequency.h
index 20b2782..4304a25 100644
--- a/include/llvm/Support/BlockFrequency.h
+++ b/include/llvm/Support/BlockFrequency.h
@@ -69,6 +69,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/BranchProbability.h b/include/llvm/Support/BranchProbability.h
index df89d2d..a6429dd 100644
--- a/include/llvm/Support/BranchProbability.h
+++ b/include/llvm/Support/BranchProbability.h
@@ -84,6 +84,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const BranchProbability &Prob) {
   return Prob.print(OS);
 }
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/COM.h b/include/llvm/Support/COM.h
index 45559b0..a2d5a7a 100644
--- a/include/llvm/Support/COM.h
+++ b/include/llvm/Support/COM.h
@@ -30,7 +30,7 @@ private:
   InitializeCOMRAII(const InitializeCOMRAII &) = delete;
   void operator=(const InitializeCOMRAII &) = delete;
 };
-} // namespace sys
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h
index e84676a..6ba5efa 100644
--- a/include/llvm/Support/Casting.h
+++ b/include/llvm/Support/Casting.h
@@ -321,6 +321,6 @@ dyn_cast_or_null(Y *Val) {
   return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/CodeGen.h b/include/llvm/Support/CodeGen.h
index 1eca568..243f2dd 100644
--- a/include/llvm/Support/CodeGen.h
+++ b/include/llvm/Support/CodeGen.h
@@ -90,6 +90,6 @@ namespace llvm {
     }
     llvm_unreachable("Bad CodeModel!");
   }
-} // namespace llvm
+}  // end llvm namespace
 
 #endif
diff --git a/include/llvm/Support/CrashRecoveryContext.h b/include/llvm/Support/CrashRecoveryContext.h
index 13aff7a..c08c3c1 100644
--- a/include/llvm/Support/CrashRecoveryContext.h
+++ b/include/llvm/Support/CrashRecoveryContext.h
@@ -199,6 +199,6 @@ public:
     cleanup = 0;
   }
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/DOTGraphTraits.h b/include/llvm/Support/DOTGraphTraits.h
index 3d21129..95e37c0 100644
--- a/include/llvm/Support/DOTGraphTraits.h
+++ b/include/llvm/Support/DOTGraphTraits.h
@@ -161,6 +161,6 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits {
   DOTGraphTraits (bool simple=false) : DefaultDOTGraphTraits (simple) {}
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/DataStream.h b/include/llvm/Support/DataStream.h
index 9a4daec..a544316 100644
--- a/include/llvm/Support/DataStream.h
+++ b/include/llvm/Support/DataStream.h
@@ -33,6 +33,6 @@ public:
 
 std::unique_ptr<DataStreamer> getDataFileStreamer(const std::string &Filename,
                                                   std::string *Err);
-} // namespace llvm
+}
 
 #endif  // LLVM_SUPPORT_DATASTREAM_H_
diff --git a/include/llvm/Support/Debug.h b/include/llvm/Support/Debug.h
index 2f3fe77..fff4f98 100644
--- a/include/llvm/Support/Debug.h
+++ b/include/llvm/Support/Debug.h
@@ -91,6 +91,6 @@ raw_ostream &dbgs();
 //
 #define DEBUG(X) DEBUG_WITH_TYPE(DEBUG_TYPE, X)
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h
index c3d94d1..17e9c15 100644
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/Support/Dwarf.h
@@ -239,6 +239,11 @@ enum Attribute : uint16_t {
   DW_AT_GNU_pubnames = 0x2134,
   DW_AT_GNU_pubtypes = 0x2135,
 
+  // LLVM project extensions.
+  DW_AT_LLVM_include_path = 0x3e00,
+  DW_AT_LLVM_config_macros = 0x3e01,
+  DW_AT_LLVM_isysroot = 0x3e02,
+
   // Apple extensions.
   DW_AT_APPLE_optimized = 0x3fe1,
   DW_AT_APPLE_flags = 0x3fe2,
diff --git a/include/llvm/Support/DynamicLibrary.h b/include/llvm/Support/DynamicLibrary.h
index d6ff904..a7d2221 100644
--- a/include/llvm/Support/DynamicLibrary.h
+++ b/include/llvm/Support/DynamicLibrary.h
@@ -99,7 +99,7 @@ namespace sys {
     static void AddSymbol(StringRef symbolName, void *symbolValue);
   };
 
-} // namespace sys
-} // namespace llvm
+} // End sys namespace
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index e23fcbb..94a4bfb 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -308,7 +308,8 @@ enum {
   EM_COGE          = 216, // Cognitive Smart Memory Processor
   EM_COOL          = 217, // iCelero CoolEngine
   EM_NORC          = 218, // Nanoradio Optimized RISC
-  EM_CSR_KALIMBA   = 219  // CSR Kalimba architecture family
+  EM_CSR_KALIMBA   = 219, // CSR Kalimba architecture family
+  EM_AMDGPU        = 224  // AMD GPU architecture
 };
 
 // Object file classes.
@@ -346,6 +347,7 @@ enum {
   ELFOSABI_FENIXOS = 16,      // FenixOS
   ELFOSABI_CLOUDABI = 17,     // Nuxi CloudABI
   ELFOSABI_C6000_ELFABI = 64, // Bare-metal TMS320C6000
+  ELFOSABI_AMDGPU_HSA = 64,   // AMD HSA runtime
   ELFOSABI_C6000_LINUX = 65,  // Linux TMS320C6000
   ELFOSABI_ARM = 97,          // ARM
   ELFOSABI_STANDALONE = 255   // Standalone (embedded) application
@@ -822,9 +824,9 @@ enum {
   STT_FILE    = 4,   // Local, absolute symbol that refers to a file
   STT_COMMON  = 5,   // An uninitialized common block
   STT_TLS     = 6,   // Thread local data object
-  STT_LOOS    = 7,   // Lowest operating system-specific symbol type
-  STT_HIOS    = 8,   // Highest operating system-specific symbol type
   STT_GNU_IFUNC = 10, // GNU indirect function
+  STT_LOOS    = 10,  // Lowest operating system-specific symbol type
+  STT_HIOS    = 12,  // Highest operating system-specific symbol type
   STT_LOPROC  = 13,  // Lowest processor-specific symbol type
   STT_HIPROC  = 15   // Highest processor-specific symbol type
 };
diff --git a/include/llvm/Support/Errc.h b/include/llvm/Support/Errc.h
index 7efca02..80bfe2a 100644
--- a/include/llvm/Support/Errc.h
+++ b/include/llvm/Support/Errc.h
@@ -78,7 +78,7 @@ enum class errc {
 inline std::error_code make_error_code(errc E) {
   return std::error_code(static_cast<int>(E), std::generic_category());
 }
-} // namespace llvm
+}
 
 namespace std {
 template <> struct is_error_code_enum<llvm::errc> : std::true_type {};
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
index 427d8ea..9afd52d 100644
--- a/include/llvm/Support/ErrorHandling.h
+++ b/include/llvm/Support/ErrorHandling.h
@@ -84,7 +84,7 @@ namespace llvm {
   LLVM_ATTRIBUTE_NORETURN void
   llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr,
                             unsigned line=0);
-} // namespace llvm
+}
 
 /// Marks that the current location is not supposed to be reachable.
 /// In !NDEBUG builds, prints the message and location info to stderr.
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index 5a857e4..a736c32 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -724,7 +724,7 @@ namespace detail {
     intptr_t IterationHandle;
     directory_entry CurrentEntry;
   };
-} // namespace detail
+}
 
 /// directory_iterator - Iterates through the entries in path. There is no
 /// operator++ because we need an error_code. If it's really needed we can make
@@ -786,7 +786,7 @@ namespace detail {
     uint16_t Level;
     bool HasNoPushRequest;
   };
-} // namespace detail
+}
 
 /// recursive_directory_iterator - Same as directory_iterator except for it
 /// recurses down into child directories.
diff --git a/include/llvm/Support/FileUtilities.h b/include/llvm/Support/FileUtilities.h
index 8a790de..2ee2c60 100644
--- a/include/llvm/Support/FileUtilities.h
+++ b/include/llvm/Support/FileUtilities.h
@@ -73,6 +73,6 @@ namespace llvm {
     /// will not be removed when the object is destroyed.
     void releaseFile() { DeleteIt = false; }
   };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/FormattedStream.h b/include/llvm/Support/FormattedStream.h
index 145d898..4a135cd 100644
--- a/include/llvm/Support/FormattedStream.h
+++ b/include/llvm/Support/FormattedStream.h
@@ -156,7 +156,7 @@ formatted_raw_ostream &ferrs();
 /// debug output.  Use it like: fdbgs() << "foo" << "bar";
 formatted_raw_ostream &fdbgs();
 
-} // namespace llvm
+} // end llvm namespace
 
 
 #endif
diff --git a/include/llvm/Support/GCOV.h b/include/llvm/Support/GCOV.h
index 138b9db..c2e34bd 100644
--- a/include/llvm/Support/GCOV.h
+++ b/include/llvm/Support/GCOV.h
@@ -435,6 +435,6 @@ private:
   FileCoverageList FileCoverages;
   FuncCoverageMap FuncCoverages;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h
index cd59f82..63678bb 100644
--- a/include/llvm/Support/GenericDomTree.h
+++ b/include/llvm/Support/GenericDomTree.h
@@ -772,6 +772,6 @@ bool DominatorTreeBase<NodeT>::properlyDominates(const NodeT *A,
                    getNode(const_cast<NodeT *>(B)));
 }
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h
index 76e3cc8..7c065f9 100644
--- a/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/include/llvm/Support/GenericDomTreeConstruction.h
@@ -288,6 +288,6 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
   DT.updateDFSNumbers();
 }
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index 04b4084..b1af3d7 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -353,9 +353,9 @@ void ViewGraph(const GraphType &G, const Twine &Name,
   if (Filename.empty())
     return;
 
-  DisplayGraph(Filename, true, Program);
+  DisplayGraph(Filename, false, Program);
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/Host.h b/include/llvm/Support/Host.h
index f2519df..8f4bf3c 100644
--- a/include/llvm/Support/Host.h
+++ b/include/llvm/Support/Host.h
@@ -68,7 +68,7 @@ namespace sys {
   ///
   /// \return - True on success.
   bool getHostCPUFeatures(StringMap<bool> &Features);
-} // namespace sys
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/Support/LineIterator.h b/include/llvm/Support/LineIterator.h
index d0f7d30..9d4cd3b 100644
--- a/include/llvm/Support/LineIterator.h
+++ b/include/llvm/Support/LineIterator.h
@@ -83,6 +83,6 @@ private:
   /// \brief Advance the iterator to the next line.
   void advance();
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/MD5.h b/include/llvm/Support/MD5.h
index 8658c8e..f6e1e92 100644
--- a/include/llvm/Support/MD5.h
+++ b/include/llvm/Support/MD5.h
@@ -65,6 +65,6 @@ private:
   const uint8_t *body(ArrayRef<uint8_t> Data);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h
index 1187e05..addd34e 100644
--- a/include/llvm/Support/ManagedStatic.h
+++ b/include/llvm/Support/ManagedStatic.h
@@ -106,6 +106,6 @@ struct llvm_shutdown_obj {
   ~llvm_shutdown_obj() { llvm_shutdown(); }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 7c63aaa..2cf7e0e 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -642,6 +642,6 @@ inline int64_t SignExtend64(uint64_t X, unsigned B) {
 }
 
 extern const float huge_valf;
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/Memory.h b/include/llvm/Support/Memory.h
index 6abb17a..b4305cb 100644
--- a/include/llvm/Support/Memory.h
+++ b/include/llvm/Support/Memory.h
@@ -155,7 +155,7 @@ namespace sys {
     /// as writable.
     static bool setRangeWritable(const void *Addr, size_t Size);
   };
-} // namespace sys
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/Support/MemoryObject.h b/include/llvm/Support/MemoryObject.h
index deff6c1..e0c8749 100644
--- a/include/llvm/Support/MemoryObject.h
+++ b/include/llvm/Support/MemoryObject.h
@@ -63,6 +63,6 @@ public:
   virtual bool isValidAddress(uint64_t address) const = 0;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/MipsABIFlags.h b/include/llvm/Support/MipsABIFlags.h
index 8740823..93f6b41 100644
--- a/include/llvm/Support/MipsABIFlags.h
+++ b/include/llvm/Support/MipsABIFlags.h
@@ -96,7 +96,7 @@ enum Val_GNU_MIPS_ABI_MSA {
   Val_GNU_MIPS_ABI_MSA_ANY = 0, // not tagged
   Val_GNU_MIPS_ABI_MSA_128 = 1  // 128-bit MSA
 };
-} // namespace Mips
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/Support/Mutex.h b/include/llvm/Support/Mutex.h
index 47f0ab6..0f4e61a 100644
--- a/include/llvm/Support/Mutex.h
+++ b/include/llvm/Support/Mutex.h
@@ -152,7 +152,7 @@ namespace llvm
     };
 
     typedef SmartScopedLock<false> ScopedLock;
-  } // namespace sys
-} // namespace llvm
+  }
+}
 
 #endif
diff --git a/include/llvm/Support/MutexGuard.h b/include/llvm/Support/MutexGuard.h
index ea58617..07b64b6 100644
--- a/include/llvm/Support/MutexGuard.h
+++ b/include/llvm/Support/MutexGuard.h
@@ -36,6 +36,6 @@ namespace llvm {
     /// is held.
     bool holds(const sys::Mutex& lock) const { return &M == &lock; }
   };
-} // namespace llvm
+}
 
 #endif // LLVM_SUPPORT_MUTEXGUARD_H
diff --git a/include/llvm/Support/PluginLoader.h b/include/llvm/Support/PluginLoader.h
index da4324e..bdbb134 100644
--- a/include/llvm/Support/PluginLoader.h
+++ b/include/llvm/Support/PluginLoader.h
@@ -32,6 +32,6 @@ namespace llvm {
     LoadOpt("load", cl::ZeroOrMore, cl::value_desc("pluginfilename"),
             cl::desc("Load the specified plugin"));
 #endif
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/Process.h b/include/llvm/Support/Process.h
index 089894c..cfdd06c 100644
--- a/include/llvm/Support/Process.h
+++ b/include/llvm/Support/Process.h
@@ -184,7 +184,7 @@ public:
   static unsigned GetRandomNumber();
 };
 
-} // namespace sys
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h
index 5f1bc12..b89a0f7 100644
--- a/include/llvm/Support/Program.h
+++ b/include/llvm/Support/Program.h
@@ -187,7 +187,7 @@ struct ProcessInfo {
       ///< string is non-empty upon return an error occurred while invoking the
       ///< program.
       );
-  } // namespace sys
-} // namespace llvm
+  }
+}
 
 #endif
diff --git a/include/llvm/Support/RWMutex.h b/include/llvm/Support/RWMutex.h
index 5299708..4be9313 100644
--- a/include/llvm/Support/RWMutex.h
+++ b/include/llvm/Support/RWMutex.h
@@ -171,7 +171,7 @@ namespace llvm
       }
     };
     typedef SmartScopedWriter<false> ScopedWriter;
-  } // namespace sys
-} // namespace llvm
+  }
+}
 
 #endif
diff --git a/include/llvm/Support/RandomNumberGenerator.h b/include/llvm/Support/RandomNumberGenerator.h
index 316778b..7446558 100644
--- a/include/llvm/Support/RandomNumberGenerator.h
+++ b/include/llvm/Support/RandomNumberGenerator.h
@@ -53,6 +53,6 @@ private:
 
   friend class Module;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/Recycler.h b/include/llvm/Support/Recycler.h
index a909b9d..e97f36a 100644
--- a/include/llvm/Support/Recycler.h
+++ b/include/llvm/Support/Recycler.h
@@ -123,6 +123,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/RecyclingAllocator.h b/include/llvm/Support/RecyclingAllocator.h
index fded4ed..001d1cf 100644
--- a/include/llvm/Support/RecyclingAllocator.h
+++ b/include/llvm/Support/RecyclingAllocator.h
@@ -57,7 +57,7 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 template<class AllocatorType, class T, size_t Size, size_t Align>
 inline void *operator new(size_t size,
diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h
index 15f20a6..31b35ed 100644
--- a/include/llvm/Support/Regex.h
+++ b/include/llvm/Support/Regex.h
@@ -100,6 +100,6 @@ namespace llvm {
     struct llvm_regex *preg;
     int error;
   };
-} // namespace llvm
+}
 
 #endif // LLVM_SUPPORT_REGEX_H
diff --git a/include/llvm/Support/Registry.h b/include/llvm/Support/Registry.h
index 7eb1090..95c4e96 100644
--- a/include/llvm/Support/Registry.h
+++ b/include/llvm/Support/Registry.h
@@ -228,6 +228,6 @@ namespace llvm {
   template <typename T, typename U>
   typename Registry<T,U>::listener *Registry<T,U>::ListenerTail;
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h
index 0cb421b..7e165d7 100644
--- a/include/llvm/Support/Signals.h
+++ b/include/llvm/Support/Signals.h
@@ -62,7 +62,7 @@ namespace sys {
   /// different thread on some platforms.
   /// @brief Register a function to be called when ctrl-c is pressed.
   void SetInterruptFunction(void (*IF)());
-} // namespace sys
-} // namespace llvm
+} // End sys namespace
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h
index 5eef9a0..1f8b1a018 100644
--- a/include/llvm/Support/SourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -280,6 +280,6 @@ public:
              bool ShowKindLabel = true) const;
 };
 
-} // namespace llvm
+}  // end llvm namespace
 
 #endif
diff --git a/include/llvm/Support/StreamingMemoryObject.h b/include/llvm/Support/StreamingMemoryObject.h
index fe0cc7e..7cb6438 100644
--- a/include/llvm/Support/StreamingMemoryObject.h
+++ b/include/llvm/Support/StreamingMemoryObject.h
@@ -89,5 +89,5 @@ private:
 MemoryObject *getNonStreamedMemoryObject(
     const unsigned char *Start, const unsigned char *End);
 
-} // namespace llvm
+}
 #endif  // STREAMINGMEMORYOBJECT_H_
diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h
index 3aa826b..2ec0c3b 100644
--- a/include/llvm/Support/StringPool.h
+++ b/include/llvm/Support/StringPool.h
@@ -133,6 +133,6 @@ namespace llvm {
     inline bool operator!=(const PooledStringPtr &That) const { return S != That.S; }
   };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/StringSaver.h b/include/llvm/Support/StringSaver.h
index c7a2e8f..f3853ee 100644
--- a/include/llvm/Support/StringSaver.h
+++ b/include/llvm/Support/StringSaver.h
@@ -38,5 +38,5 @@ class BumpPtrStringSaver final : public StringSaver {
 public:
   BumpPtrStringSaver(BumpPtrAllocator &Alloc) : StringSaver(Alloc) {}
 };
-} // namespace llvm
+}
 #endif
diff --git a/include/llvm/Support/SystemUtils.h b/include/llvm/Support/SystemUtils.h
index f8c5dc8..2997b1b 100644
--- a/include/llvm/Support/SystemUtils.h
+++ b/include/llvm/Support/SystemUtils.h
@@ -27,6 +27,6 @@ bool CheckBitcodeOutputToConsole(
   bool print_warning = true     ///< Control whether warnings are printed
 );
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/TargetParser.h b/include/llvm/Support/TargetParser.h
index 777ee20..dab7248 100644
--- a/include/llvm/Support/TargetParser.h
+++ b/include/llvm/Support/TargetParser.h
@@ -36,7 +36,11 @@ namespace ARM {
     FK_VFP,
     FK_VFPV2,
     FK_VFPV3,
+    FK_VFPV3_FP16,
     FK_VFPV3_D16,
+    FK_VFPV3_D16_FP16,
+    FK_VFPV3XD,
+    FK_VFPV3XD_FP16,
     FK_VFPV4,
     FK_VFPV4_D16,
     FK_FPV4_SP_D16,
@@ -44,6 +48,7 @@ namespace ARM {
     FK_FPV5_SP_D16,
     FK_FP_ARMV8,
     FK_NEON,
+    FK_NEON_FP16,
     FK_NEON_VFPV4,
     FK_NEON_FP_ARMV8,
     FK_CRYPTO_NEON_FP_ARMV8,
@@ -51,6 +56,16 @@ namespace ARM {
     FK_LAST
   };
 
+  // FPU Version
+  enum FPUVersion {
+    FV_NONE = 0,
+    FV_VFPV2,
+    FV_VFPV3,
+    FV_VFPV3_FP16,
+    FV_VFPV4,
+    FV_VFPV5
+  };
+
   // An FPU name implies one of three levels of Neon support:
   enum NeonSupportLevel {
     NS_None = 0, ///< No Neon
diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h
index 1c11ef3..d2e8b95 100644
--- a/include/llvm/Support/TargetRegistry.h
+++ b/include/llvm/Support/TargetRegistry.h
@@ -1178,6 +1178,6 @@ private:
     return new MCCodeEmitterImpl();
   }
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/TargetSelect.h b/include/llvm/Support/TargetSelect.h
index 96ecf0b..a86e953 100644
--- a/include/llvm/Support/TargetSelect.h
+++ b/include/llvm/Support/TargetSelect.h
@@ -161,6 +161,6 @@ namespace llvm {
 #endif
   }  
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/ThreadLocal.h b/include/llvm/Support/ThreadLocal.h
index db61f5c..427a67e 100644
--- a/include/llvm/Support/ThreadLocal.h
+++ b/include/llvm/Support/ThreadLocal.h
@@ -57,7 +57,7 @@ namespace llvm {
       // erase - Removes the pointer associated with the current thread.
       void erase() { removeInstance(); }
     };
-  } // namespace sys
-} // namespace llvm
+  }
+}
 
 #endif
diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h
index 365fb9e..3cca1d6 100644
--- a/include/llvm/Support/Threading.h
+++ b/include/llvm/Support/Threading.h
@@ -34,6 +34,6 @@ namespace llvm {
   /// the thread stack.
   void llvm_execute_on_thread(void (*UserFn)(void*), void *UserData,
                               unsigned RequestedStackSize = 0);
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Support/TimeValue.h b/include/llvm/Support/TimeValue.h
index a9efb1b..6bca58b 100644
--- a/include/llvm/Support/TimeValue.h
+++ b/include/llvm/Support/TimeValue.h
@@ -380,7 +380,7 @@ inline TimeValue operator - (const TimeValue &tv1, const TimeValue &tv2) {
   return difference;
 }
 
-} // namespace sys
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/Support/Timer.h b/include/llvm/Support/Timer.h
index 56fbccc..2cd30e2 100644
--- a/include/llvm/Support/Timer.h
+++ b/include/llvm/Support/Timer.h
@@ -184,6 +184,6 @@ private:
   void PrintQueuedTimers(raw_ostream &OS);
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/ToolOutputFile.h b/include/llvm/Support/ToolOutputFile.h
index e7a6545..1be26c2 100644
--- a/include/llvm/Support/ToolOutputFile.h
+++ b/include/llvm/Support/ToolOutputFile.h
@@ -58,6 +58,6 @@ public:
   void keep() { Installer.Keep = true; }
 };
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/Support/UniqueLock.h b/include/llvm/Support/UniqueLock.h
index c5f37a7..529284d 100644
--- a/include/llvm/Support/UniqueLock.h
+++ b/include/llvm/Support/UniqueLock.h
@@ -62,6 +62,6 @@ namespace llvm {
 
     bool owns_lock() { return locked; }
   };
-} // namespace llvm
+}
 
 #endif // LLVM_SUPPORT_UNIQUE_LOCK_H
diff --git a/include/llvm/Support/Valgrind.h b/include/llvm/Support/Valgrind.h
index 7eabca9..cebf75c 100644
--- a/include/llvm/Support/Valgrind.h
+++ b/include/llvm/Support/Valgrind.h
@@ -67,7 +67,7 @@ namespace sys {
   #define TsanIgnoreWritesBegin()
   #define TsanIgnoreWritesEnd()
 #endif
-} // namespace sys
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/include/llvm/Support/Watchdog.h b/include/llvm/Support/Watchdog.h
index 5642ae2..01e1d92 100644
--- a/include/llvm/Support/Watchdog.h
+++ b/include/llvm/Support/Watchdog.h
@@ -32,7 +32,7 @@ namespace llvm {
       Watchdog(const Watchdog &other) = delete;
       Watchdog &operator=(const Watchdog &other) = delete;
     };
-  } // namespace sys
-} // namespace llvm
+  }
+}
 
 #endif
diff --git a/include/llvm/Support/circular_raw_ostream.h b/include/llvm/Support/circular_raw_ostream.h
index 89d6421..19f9c2c 100644
--- a/include/llvm/Support/circular_raw_ostream.h
+++ b/include/llvm/Support/circular_raw_ostream.h
@@ -152,7 +152,7 @@ namespace llvm
         delete TheStream;
     }
   };
-} // namespace llvm
+} // end llvm namespace
 
 
 #endif
diff --git a/include/llvm/Support/raw_os_ostream.h b/include/llvm/Support/raw_os_ostream.h
index c13e779..a983aeb 100644
--- a/include/llvm/Support/raw_os_ostream.h
+++ b/include/llvm/Support/raw_os_ostream.h
@@ -37,6 +37,6 @@ public:
   ~raw_os_ostream() override;
 };
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index 4b4f933..b593171 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -545,6 +545,6 @@ public:
   ~buffer_ostream() { OS << str(); }
 };
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h
index 6e2e202..45465ae 100644
--- a/include/llvm/Support/type_traits.h
+++ b/include/llvm/Support/type_traits.h
@@ -91,7 +91,7 @@ struct add_const_past_pointer<
   typedef const typename std::remove_pointer<T>::type *type;
 };
 
-} // namespace llvm
+}
 
 #ifdef LLVM_DEFINED_HAS_FEATURE
 #undef __has_feature
diff --git a/include/llvm/TableGen/Error.h b/include/llvm/TableGen/Error.h
index 2ecc9d2..3df658d 100644
--- a/include/llvm/TableGen/Error.h
+++ b/include/llvm/TableGen/Error.h
@@ -34,6 +34,6 @@ LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc,
 extern SourceMgr SrcMgr;
 extern unsigned ErrorsPrinted;
 
-} // namespace llvm
+} // end namespace "llvm"
 
 #endif
diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
index c5a4301..717a2a4 100644
--- a/include/llvm/TableGen/Record.h
+++ b/include/llvm/TableGen/Record.h
@@ -1161,7 +1161,7 @@ class Record {
   // Tracks Record instances. Not owned by Record.
   RecordKeeper &TrackedRecords;
 
-  DefInit *TheInit;
+  std::unique_ptr<DefInit> TheInit;
   bool IsAnonymous;
 
   // Class-instance values can be used by other defs.  For example, Struct<i>
@@ -1184,8 +1184,7 @@ public:
   explicit Record(Init *N, ArrayRef<SMLoc> locs, RecordKeeper &records,
                   bool Anonymous = false) :
     ID(LastID++), Name(N), Locs(locs.begin(), locs.end()),
-    TrackedRecords(records), TheInit(nullptr), IsAnonymous(Anonymous),
-    ResolveFirst(false) {
+    TrackedRecords(records), IsAnonymous(Anonymous), ResolveFirst(false) {
     init();
   }
   explicit Record(const std::string &N, ArrayRef<SMLoc> locs,
@@ -1194,12 +1193,13 @@ public:
 
 
   // When copy-constructing a Record, we must still guarantee a globally unique
-  // ID number.  All other fields can be copied normally.
+  // ID number.  Don't copy TheInit either since it's owned by the original
+  // record. All other fields can be copied normally.
   Record(const Record &O) :
     ID(LastID++), Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs),
     Values(O.Values), SuperClasses(O.SuperClasses),
     SuperClassRanges(O.SuperClassRanges), TrackedRecords(O.TrackedRecords),
-    TheInit(O.TheInit), IsAnonymous(O.IsAnonymous),
+    IsAnonymous(O.IsAnonymous),
     ResolveFirst(O.ResolveFirst) { }
 
   static unsigned getNewUID() { return LastID++; }
@@ -1589,6 +1589,6 @@ Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
 Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
                   const std::string &Name, const std::string &Scoper);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/TableGen/StringMatcher.h b/include/llvm/TableGen/StringMatcher.h
index 5a77f5e..b438779 100644
--- a/include/llvm/TableGen/StringMatcher.h
+++ b/include/llvm/TableGen/StringMatcher.h
@@ -49,6 +49,6 @@ private:
                                 unsigned CharNo, unsigned IndentCount) const;
 };
 
-} // namespace llvm
+} // end llvm namespace.
 
 #endif
diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h
index 11a2cfd..9d4e7a0 100644
--- a/include/llvm/Target/TargetCallingConv.h
+++ b/include/llvm/Target/TargetCallingConv.h
@@ -190,8 +190,8 @@ namespace ISD {
       ArgVT = argvt;
     }
   };
-} // namespace ISD
+}
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h
index 2e8fe21..0e31724 100644
--- a/include/llvm/Target/TargetFrameLowering.h
+++ b/include/llvm/Target/TargetFrameLowering.h
@@ -283,6 +283,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index ec7aef3..8b314f4 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -1270,6 +1270,6 @@ private:
   unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetIntrinsicInfo.h b/include/llvm/Target/TargetIntrinsicInfo.h
index 3732959..c630f5b 100644
--- a/include/llvm/Target/TargetIntrinsicInfo.h
+++ b/include/llvm/Target/TargetIntrinsicInfo.h
@@ -60,6 +60,6 @@ public:
                                    unsigned numTys = 0) const = 0;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index a536e00..277487f 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -1235,11 +1235,10 @@ protected:
     HasExtractBitsInsn = hasExtractInsn;
   }
 
-  /// Tells the code generator not to expand sequence of operations into a
-  /// separate sequences that increases the amount of flow control.
-  void setJumpIsExpensive(bool isExpensive = true) {
-    JumpIsExpensive = isExpensive;
-  }
+  /// Tells the code generator not to expand logic operations on comparison
+  /// predicates into separate sequences that increase the amount of flow
+  /// control.
+  void setJumpIsExpensive(bool isExpensive = true);
 
   /// Tells the code generator that integer divide is expensive, and if
   /// possible, should be replaced by an alternate sequence of instructions not
@@ -1597,6 +1596,35 @@ public:
     return false;
   }
 
+  /// \brief Get the maximum supported factor for interleaved memory accesses.
+  /// Default to be the minimum interleave factor: 2.
+  virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; }
+
+  /// \brief Lower an interleaved load to target specific intrinsics. Return
+  /// true on success.
+  ///
+  /// \p LI is the vector load instruction.
+  /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector.
+  /// \p Indices is the corresponding indices for each shufflevector.
+  /// \p Factor is the interleave factor.
+  virtual bool lowerInterleavedLoad(LoadInst *LI,
+                                    ArrayRef<ShuffleVectorInst *> Shuffles,
+                                    ArrayRef<unsigned> Indices,
+                                    unsigned Factor) const {
+    return false;
+  }
+
+  /// \brief Lower an interleaved store to target specific intrinsics. Return
+  /// true on success.
+  ///
+  /// \p SI is the vector store instruction.
+  /// \p SVI is the shufflevector to RE-interleave the stored vector.
+  /// \p Factor is the interleave factor.
+  virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+                                     unsigned Factor) const {
+    return false;
+  }
+
   /// Return true if zero-extending the specific node Val to type VT2 is free
   /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or
   /// because it's folded such as X86 zero-extending loads).
@@ -2689,8 +2717,6 @@ public:
   //===--------------------------------------------------------------------===//
   // Div utility functions
   //
-  SDValue BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
-                         SelectionDAG &DAG) const;
   SDValue BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
                     bool IsAfterLegalization,
                     std::vector<SDNode *> *Created) const;
@@ -2801,6 +2827,6 @@ void GetReturnInfo(Type* ReturnType, AttributeSet attr,
                    SmallVectorImpl<ISD::OutputArg> &Outs,
                    const TargetLowering &TLI);
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index 2a17bd2..5b626c2 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -64,12 +64,6 @@ public:
                                     const TargetMachine &TM,
                                     const MCSymbol *Sym) const;
 
-  /// Extract the dependent library name from a linker option string. Returns
-  /// StringRef() if the option does not specify a library.
-  virtual StringRef getDepLibFromLinkerOpt(StringRef LinkerOption) const {
-    return StringRef();
-  }
-
   /// Emit the module flags that the platform cares about.
   virtual void emitModuleFlags(MCStreamer &Streamer,
                                ArrayRef<Module::ModuleFlagEntry> Flags,
@@ -188,6 +182,9 @@ public:
     return nullptr;
   }
 
+  virtual void emitLinkerFlagsForGlobal(raw_ostream &OS, const GlobalValue *GV,
+                                        const Mangler &Mang) const {}
+
 protected:
   virtual MCSection *SelectSectionForGlobal(const GlobalValue *GV,
                                             SectionKind Kind, Mangler &Mang,
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 7681575..64a923b 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -273,6 +273,6 @@ public:
                          bool DisableVerify = true) override;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index f27411e..d52cb60 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -72,7 +72,7 @@ namespace llvm {
           UseInitArray(false), DisableIntegratedAS(false),
           CompressDebugSections(false), FunctionSections(false),
           DataSections(false), UniqueSectionNames(true), TrapUnreachable(false),
-          TrapFuncName(), FloatABIType(FloatABI::Default),
+          FloatABIType(FloatABI::Default),
           AllowFPOpFusion(FPOpFusion::Standard), Reciprocals(TargetRecip()),
           JTType(JumpTable::Single),
           ThreadModel(ThreadModel::POSIX) {}
@@ -172,12 +172,6 @@ namespace llvm {
     /// Emit target-specific trap instruction for 'unreachable' IR instructions.
     unsigned TrapUnreachable : 1;
 
-    /// getTrapFunctionName - If this returns a non-empty string, this means
-    /// isel should lower Intrinsic::trap to a call to the specified function
-    /// name instead of an ISD::TRAP node.
-    std::string TrapFuncName;
-    StringRef getTrapFunctionName() const;
-
     /// FloatABIType - This setting is set by -float-abi=xxx option is specfied
     /// on the command line. This setting may either be Default, Soft, or Hard.
     /// Default selects the target's default behavior. Soft selects the ABI for
@@ -237,7 +231,6 @@ inline bool operator==(const TargetOptions &LHS,
     ARE_EQUAL(PositionIndependentExecutable) &&
     ARE_EQUAL(UseInitArray) &&
     ARE_EQUAL(TrapUnreachable) &&
-    ARE_EQUAL(TrapFuncName) &&
     ARE_EQUAL(FloatABIType) &&
     ARE_EQUAL(AllowFPOpFusion) &&
     ARE_EQUAL(Reciprocals) &&
@@ -252,6 +245,6 @@ inline bool operator!=(const TargetOptions &LHS,
   return !(LHS == RHS);
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetRecip.h b/include/llvm/Target/TargetRecip.h
index c3beb40..4cc3672 100644
--- a/include/llvm/Target/TargetRecip.h
+++ b/include/llvm/Target/TargetRecip.h
@@ -68,6 +68,6 @@ private:
   void parseIndividualParams(const std::vector<std::string> &Args);
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index 1377b38..0ee936a 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -469,6 +469,10 @@ public:
     return nullptr;
   }
 
+  /// Return all the call-preserved register masks defined for this target.
+  virtual ArrayRef<const uint32_t *> getRegMasks() const = 0;
+  virtual ArrayRef<const char *> getRegMaskNames() const = 0;
+
   /// getReservedRegs - Returns a bitset indexed by physical register number
   /// indicating if a register is a special register that has particular uses
   /// and should be considered unavailable at all times, e.g. SP, RA. This is
@@ -998,6 +1002,6 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
   return OS;
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 7a788ce..4abbe37 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -341,6 +341,7 @@ def externalsym : SDNode<"ISD::ExternalSymbol",       SDTPtrLeaf, [],
                          "ExternalSymbolSDNode">;
 def texternalsym: SDNode<"ISD::TargetExternalSymbol", SDTPtrLeaf, [],
                          "ExternalSymbolSDNode">;
+def mcsym: SDNode<"ISD::MCSymbol", SDTPtrLeaf, [], "MCSymbolSDNode">;
 def blockaddress : SDNode<"ISD::BlockAddress",        SDTPtrLeaf, [],
                          "BlockAddressSDNode">;
 def tblockaddress: SDNode<"ISD::TargetBlockAddress",  SDTPtrLeaf, [],
diff --git a/include/llvm/Target/TargetSelectionDAGInfo.h b/include/llvm/Target/TargetSelectionDAGInfo.h
index c3343ca..bacdd95 100644
--- a/include/llvm/Target/TargetSelectionDAGInfo.h
+++ b/include/llvm/Target/TargetSelectionDAGInfo.h
@@ -163,6 +163,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h
index 640e1123..e42c56a 100644
--- a/include/llvm/Target/TargetSubtargetInfo.h
+++ b/include/llvm/Target/TargetSubtargetInfo.h
@@ -178,6 +178,6 @@ public:
   virtual bool enableSubRegLiveness() const { return false; }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index 59cd921..fbd999c 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -203,6 +203,6 @@ ModulePass *createBarrierNoopPass();
 /// to bitsets.
 ModulePass *createLowerBitSetsPass();
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h
index 4abb92d..6a644ad 100644
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@@ -86,6 +86,6 @@ private:
   bool shouldInline(CallSite CS);
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/InstCombine/InstCombine.h b/include/llvm/Transforms/InstCombine/InstCombine.h
index cfb3156..f48ec13 100644
--- a/include/llvm/Transforms/InstCombine/InstCombine.h
+++ b/include/llvm/Transforms/InstCombine/InstCombine.h
@@ -41,6 +41,6 @@ public:
   PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 4447d0d..250e389 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -136,6 +136,6 @@ FunctionPass *createBoundsCheckingPass();
 /// protect against stack-based overflow vulnerabilities.
 FunctionPass *createSafeStackPass();
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/ObjCARC.h b/include/llvm/Transforms/ObjCARC.h
index 367cdf6..1897adc 100644
--- a/include/llvm/Transforms/ObjCARC.h
+++ b/include/llvm/Transforms/ObjCARC.h
@@ -43,6 +43,6 @@ Pass *createObjCARCContractPass();
 //
 Pass *createObjCARCOptPass();
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 99fff37..4676c95 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -486,6 +486,6 @@ FunctionPass *createNaryReassociatePass();
 //
 FunctionPass *createLoopDistributePass();
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Scalar/EarlyCSE.h b/include/llvm/Transforms/Scalar/EarlyCSE.h
index 5cd4a69..e3dd3c0 100644
--- a/include/llvm/Transforms/Scalar/EarlyCSE.h
+++ b/include/llvm/Transforms/Scalar/EarlyCSE.h
@@ -34,6 +34,6 @@ public:
   PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h b/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
index ce36742..4028320 100644
--- a/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
+++ b/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
@@ -35,6 +35,6 @@ public:
   PreservedAnalyses run(Function &F);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Transforms/Scalar/SimplifyCFG.h b/include/llvm/Transforms/Scalar/SimplifyCFG.h
index d8b638d..ef28e0f 100644
--- a/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -41,6 +41,6 @@ public:
   PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Transforms/Utils/ASanStackFrameLayout.h b/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
index 7f6a264..4e4f02c 100644
--- a/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
+++ b/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
@@ -59,6 +59,6 @@ void ComputeASanStackFrameLayout(
     // The result is put here.
     ASanStackFrameLayout *Layout);
 
-} // namespace llvm
+} // llvm namespace
 
 #endif  // LLVM_TRANSFORMS_UTILS_ASANSTACKFRAMELAYOUT_H
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 3004f9e..9b919b6 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -64,14 +64,16 @@ void ReplaceInstWithValue(BasicBlock::InstListType &BIL,
                           BasicBlock::iterator &BI, Value *V);
 
 // ReplaceInstWithInst - Replace the instruction specified by BI with the
-// instruction specified by I.  The original instruction is deleted and BI is
+// instruction specified by I. Copies DebugLoc from BI to I, if I doesn't
+// already have a DebugLoc. The original instruction is deleted and BI is
 // updated to point to the new instruction.
 //
 void ReplaceInstWithInst(BasicBlock::InstListType &BIL,
                          BasicBlock::iterator &BI, Instruction *I);
 
 // ReplaceInstWithInst - Replace the instruction specified by From with the
-// instruction specified by To.
+// instruction specified by To. Copies DebugLoc from BI to I, if I doesn't
+// already have a DebugLoc.
 //
 void ReplaceInstWithInst(Instruction *From, Instruction *To);
 
@@ -308,6 +310,6 @@ void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
 /// entered if the condition is false.
 Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
                       BasicBlock *&IfFalse);
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h
index 5081229..879f295 100644
--- a/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -111,6 +111,6 @@ namespace llvm {
   /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
   Value *EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
                     const DataLayout &DL, const TargetLibraryInfo *TLI);
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 9ba6bea..cb187ec 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -233,6 +233,6 @@ bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
 bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
                     bool InsertLifetime = true);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/CodeExtractor.h b/include/llvm/Transforms/Utils/CodeExtractor.h
index c3c2f3e..3a96d95 100644
--- a/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -121,6 +121,6 @@ namespace llvm {
                                     ValueSet &inputs,
                                     ValueSet &outputs);
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Transforms/Utils/CtorUtils.h b/include/llvm/Transforms/Utils/CtorUtils.h
index 1213324..63e564d 100644
--- a/include/llvm/Transforms/Utils/CtorUtils.h
+++ b/include/llvm/Transforms/Utils/CtorUtils.h
@@ -27,6 +27,6 @@ class Module;
 bool optimizeGlobalCtorsList(Module &M,
                              function_ref<bool(Function *)> ShouldRemove);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/GlobalStatus.h b/include/llvm/Transforms/Utils/GlobalStatus.h
index 658449c..c366095 100644
--- a/include/llvm/Transforms/Utils/GlobalStatus.h
+++ b/include/llvm/Transforms/Utils/GlobalStatus.h
@@ -77,6 +77,6 @@ struct GlobalStatus {
 
   GlobalStatus();
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Transforms/Utils/IntegerDivision.h b/include/llvm/Transforms/Utils/IntegerDivision.h
index 5ba6685..0ec3321 100644
--- a/include/llvm/Transforms/Utils/IntegerDivision.h
+++ b/include/llvm/Transforms/Utils/IntegerDivision.h
@@ -68,6 +68,6 @@ namespace llvm {
   /// @brief Replace Rem with generated code.
   bool expandDivisionUpTo64Bits(BinaryOperator *Div);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 1063f5f..a1bb367a 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -291,6 +291,6 @@ void combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> Kn
 /// the given edge.  Returns the number of replacements made.
 unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
                                   const BasicBlockEdge &Edge);
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h
index 3aa40cf..15747bc 100644
--- a/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/include/llvm/Transforms/Utils/LoopUtils.h
@@ -263,6 +263,6 @@ void computeLICMSafetyInfo(LICMSafetyInfo *, Loop *);
 /// variable. Returns true if this is an induction PHI along with the step
 /// value.
 bool isInductionPHI(PHINode *, ScalarEvolution *, ConstantInt *&);
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Transforms/Utils/ModuleUtils.h b/include/llvm/Transforms/Utils/ModuleUtils.h
index 120d14a..622265b 100644
--- a/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -57,6 +57,6 @@ Function *checkSanitizerInterfaceFunction(Constant *FuncOrBitcast);
 std::pair<Function *, Function *> createSanitizerCtorAndInitFunctions(
     Module &M, StringRef CtorName, StringRef InitName,
     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs);
-} // namespace llvm
+} // End llvm namespace
 
 #endif //  LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
diff --git a/include/llvm/Transforms/Utils/PromoteMemToReg.h b/include/llvm/Transforms/Utils/PromoteMemToReg.h
index 6c3d2ea..d0602bf 100644
--- a/include/llvm/Transforms/Utils/PromoteMemToReg.h
+++ b/include/llvm/Transforms/Utils/PromoteMemToReg.h
@@ -45,6 +45,6 @@ void PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
                      AliasSetTracker *AST = nullptr,
                      AssumptionCache *AC = nullptr);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index 5179d58..1c7b2c58 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -173,6 +173,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
index 1b9cb48..ed0841c 100644
--- a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
+++ b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
@@ -455,6 +455,6 @@ public:
 
 #undef DEBUG_TYPE // "ssaupdater"
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index d7c8338..4115960 100644
--- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -166,6 +166,6 @@ private:
   /// function by checking for an existing function with name FuncName + f
   bool hasFloatVersion(StringRef FuncName);
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/SymbolRewriter.h b/include/llvm/Transforms/Utils/SymbolRewriter.h
index d798358..5ccee98 100644
--- a/include/llvm/Transforms/Utils/SymbolRewriter.h
+++ b/include/llvm/Transforms/Utils/SymbolRewriter.h
@@ -108,7 +108,7 @@ private:
                                          yaml::MappingNode *V,
                                          RewriteDescriptorList *DL);
 };
-} // namespace SymbolRewriter
+}
 
 template <>
 struct ilist_traits<SymbolRewriter::RewriteDescriptor>
@@ -147,6 +147,6 @@ public:
 
 ModulePass *createRewriteSymbolsPass();
 ModulePass *createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &);
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
index b19c6fa..550292f 100644
--- a/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
+++ b/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
@@ -47,6 +47,6 @@ public:
 
 Pass *createUnifyFunctionExitNodesPass();
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h
index ba58668..7f2cf8d7 100644
--- a/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -37,6 +37,6 @@ bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
                              LPPassManager *LPM);
 
 MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
-} // namespace llvm
+}
 
 #endif
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index 737ad4f..047ab81 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -96,6 +96,6 @@ namespace llvm {
                                    Materializer));
   }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/VectorUtils.h b/include/llvm/Transforms/Utils/VectorUtils.h
deleted file mode 100644
index 6a35247..0000000
--- a/include/llvm/Transforms/Utils/VectorUtils.h
+++ /dev/null
@@ -1,205 +0,0 @@
-//===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -*- C++ -*-=====//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines some vectorizer utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
-#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
-
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-
-namespace llvm {
-
-/// \brief Identify if the intrinsic is trivially vectorizable.
-///
-/// This method returns true if the intrinsic's argument types are all
-/// scalars for the scalar form of the intrinsic and all vectors for
-/// the vector form of the intrinsic.
-static inline bool isTriviallyVectorizable(Intrinsic::ID ID) {
-  switch (ID) {
-  case Intrinsic::sqrt:
-  case Intrinsic::sin:
-  case Intrinsic::cos:
-  case Intrinsic::exp:
-  case Intrinsic::exp2:
-  case Intrinsic::log:
-  case Intrinsic::log10:
-  case Intrinsic::log2:
-  case Intrinsic::fabs:
-  case Intrinsic::minnum:
-  case Intrinsic::maxnum:
-  case Intrinsic::copysign:
-  case Intrinsic::floor:
-  case Intrinsic::ceil:
-  case Intrinsic::trunc:
-  case Intrinsic::rint:
-  case Intrinsic::nearbyint:
-  case Intrinsic::round:
-  case Intrinsic::bswap:
-  case Intrinsic::ctpop:
-  case Intrinsic::pow:
-  case Intrinsic::fma:
-  case Intrinsic::fmuladd:
-  case Intrinsic::ctlz:
-  case Intrinsic::cttz:
-  case Intrinsic::powi:
-    return true;
-  default:
-    return false;
-  }
-}
-
-static inline bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
-                                         unsigned ScalarOpdIdx) {
-  switch (ID) {
-    case Intrinsic::ctlz:
-    case Intrinsic::cttz:
-    case Intrinsic::powi:
-      return (ScalarOpdIdx == 1);
-    default:
-      return false;
-  }
-}
-
-static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
-                                              Intrinsic::ID ValidIntrinsicID) {
-  if (I.getNumArgOperands() != 1 ||
-      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
-      I.getType() != I.getArgOperand(0)->getType() ||
-      !I.onlyReadsMemory())
-    return Intrinsic::not_intrinsic;
-
-  return ValidIntrinsicID;
-}
-
-static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
-                                               Intrinsic::ID ValidIntrinsicID) {
-  if (I.getNumArgOperands() != 2 ||
-      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
-      !I.getArgOperand(1)->getType()->isFloatingPointTy() ||
-      I.getType() != I.getArgOperand(0)->getType() ||
-      I.getType() != I.getArgOperand(1)->getType() ||
-      !I.onlyReadsMemory())
-    return Intrinsic::not_intrinsic;
-
-  return ValidIntrinsicID;
-}
-
-static Intrinsic::ID
-getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
-  // If we have an intrinsic call, check if it is trivially vectorizable.
-  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
-    Intrinsic::ID ID = II->getIntrinsicID();
-    if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
-        ID == Intrinsic::lifetime_end || ID == Intrinsic::assume)
-      return ID;
-    else
-      return Intrinsic::not_intrinsic;
-  }
-
-  if (!TLI)
-    return Intrinsic::not_intrinsic;
-
-  LibFunc::Func Func;
-  Function *F = CI->getCalledFunction();
-  // We're going to make assumptions on the semantics of the functions, check
-  // that the target knows that it's available in this environment and it does
-  // not have local linkage.
-  if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
-    return Intrinsic::not_intrinsic;
-
-  // Otherwise check if we have a call to a function that can be turned into a
-  // vector intrinsic.
-  switch (Func) {
-  default:
-    break;
-  case LibFunc::sin:
-  case LibFunc::sinf:
-  case LibFunc::sinl:
-    return checkUnaryFloatSignature(*CI, Intrinsic::sin);
-  case LibFunc::cos:
-  case LibFunc::cosf:
-  case LibFunc::cosl:
-    return checkUnaryFloatSignature(*CI, Intrinsic::cos);
-  case LibFunc::exp:
-  case LibFunc::expf:
-  case LibFunc::expl:
-    return checkUnaryFloatSignature(*CI, Intrinsic::exp);
-  case LibFunc::exp2:
-  case LibFunc::exp2f:
-  case LibFunc::exp2l:
-    return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
-  case LibFunc::log:
-  case LibFunc::logf:
-  case LibFunc::logl:
-    return checkUnaryFloatSignature(*CI, Intrinsic::log);
-  case LibFunc::log10:
-  case LibFunc::log10f:
-  case LibFunc::log10l:
-    return checkUnaryFloatSignature(*CI, Intrinsic::log10);
-  case LibFunc::log2:
-  case LibFunc::log2f:
-  case LibFunc::log2l:
-    return checkUnaryFloatSignature(*CI, Intrinsic::log2);
-  case LibFunc::fabs:
-  case LibFunc::fabsf:
-  case LibFunc::fabsl:
-    return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
-  case LibFunc::fmin:
-  case LibFunc::fminf:
-  case LibFunc::fminl:
-    return checkBinaryFloatSignature(*CI, Intrinsic::minnum);
-  case LibFunc::fmax:
-  case LibFunc::fmaxf:
-  case LibFunc::fmaxl:
-    return checkBinaryFloatSignature(*CI, Intrinsic::maxnum);
-  case LibFunc::copysign:
-  case LibFunc::copysignf:
-  case LibFunc::copysignl:
-    return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
-  case LibFunc::floor:
-  case LibFunc::floorf:
-  case LibFunc::floorl:
-    return checkUnaryFloatSignature(*CI, Intrinsic::floor);
-  case LibFunc::ceil:
-  case LibFunc::ceilf:
-  case LibFunc::ceill:
-    return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
-  case LibFunc::trunc:
-  case LibFunc::truncf:
-  case LibFunc::truncl:
-    return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
-  case LibFunc::rint:
-  case LibFunc::rintf:
-  case LibFunc::rintl:
-    return checkUnaryFloatSignature(*CI, Intrinsic::rint);
-  case LibFunc::nearbyint:
-  case LibFunc::nearbyintf:
-  case LibFunc::nearbyintl:
-    return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
-  case LibFunc::round:
-  case LibFunc::roundf:
-  case LibFunc::roundl:
-    return checkUnaryFloatSignature(*CI, Intrinsic::round);
-  case LibFunc::pow:
-  case LibFunc::powf:
-  case LibFunc::powl:
-    return checkBinaryFloatSignature(*CI, Intrinsic::pow);
-  }
-
-  return Intrinsic::not_intrinsic;
-}
-
-} // namespace llvm
-
-#endif
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
index aab2790..aec3993 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -139,6 +139,6 @@ Pass *createSLPVectorizerPass();
 bool vectorizeBasicBlock(Pass *P, BasicBlock &BB,
                          const VectorizeConfig &C = VectorizeConfig());
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap
index a9e6daf..dcc5ce1 100644
--- a/include/llvm/module.modulemap
+++ b/include/llvm/module.modulemap
@@ -124,6 +124,7 @@ module LLVM_IR {
   textual header "IR/DebugInfoFlags.def"
   textual header "IR/Instruction.def"
   textual header "IR/Metadata.def"
+  textual header "IR/Value.def"
 }
 
 module LLVM_IRReader { requires cplusplus umbrella "IRReader" module * { export * } }
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index d44653e..ad0727a 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -48,8 +48,8 @@ char AliasAnalysis::ID = 0;
 // Default chaining methods
 //===----------------------------------------------------------------------===//
 
-AliasAnalysis::AliasResult AliasAnalysis::alias(const MemoryLocation &LocA,
-                                                const MemoryLocation &LocB) {
+AliasResult AliasAnalysis::alias(const MemoryLocation &LocA,
+                                 const MemoryLocation &LocB) {
   assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
   return AA->alias(LocA, LocB);
 }
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 0112186..9b6a5a4 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -115,7 +115,7 @@ namespace {
       return AliasAnalysis::getModRefInfo(CS1,CS2);
     }
   };
-} // namespace
+}
 
 char AliasAnalysisCounter::ID = 0;
 INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
@@ -125,9 +125,8 @@ ModulePass *llvm::createAliasAnalysisCounterPass() {
   return new AliasAnalysisCounter();
 }
 
-AliasAnalysis::AliasResult
-AliasAnalysisCounter::alias(const MemoryLocation &LocA,
-                            const MemoryLocation &LocB) {
+AliasResult AliasAnalysisCounter::alias(const MemoryLocation &LocA,
+                                        const MemoryLocation &LocB) {
   AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
 
   const char *AliasString = nullptr;
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 1501b5f..5d1b001 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -76,7 +76,7 @@ namespace {
     bool runOnFunction(Function &F) override;
     bool doFinalization(Module &M) override;
   };
-} // namespace
+}
 
 char AAEval::ID = 0;
 INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",
@@ -196,20 +196,20 @@ bool AAEval::runOnFunction(Function &F) {
       if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
 
       switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
-      case AliasAnalysis::NoAlias:
+      case NoAlias:
         PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent());
         ++NoAliasCount;
         break;
-      case AliasAnalysis::MayAlias:
+      case MayAlias:
         PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent());
         ++MayAliasCount;
         break;
-      case AliasAnalysis::PartialAlias:
+      case PartialAlias:
         PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2,
                      F.getParent());
         ++PartialAliasCount;
         break;
-      case AliasAnalysis::MustAlias:
+      case MustAlias:
         PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
         ++MustAliasCount;
         break;
@@ -225,22 +225,22 @@ bool AAEval::runOnFunction(Function &F) {
            I2 != E2; ++I2) {
         switch (AA.alias(MemoryLocation::get(cast<LoadInst>(*I1)),
                          MemoryLocation::get(cast<StoreInst>(*I2)))) {
-        case AliasAnalysis::NoAlias:
+        case NoAlias:
           PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
                                 F.getParent());
           ++NoAliasCount;
           break;
-        case AliasAnalysis::MayAlias:
+        case MayAlias:
           PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
                                 F.getParent());
           ++MayAliasCount;
           break;
-        case AliasAnalysis::PartialAlias:
+        case PartialAlias:
           PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
                                 F.getParent());
           ++PartialAliasCount;
           break;
-        case AliasAnalysis::MustAlias:
+        case MustAlias:
           PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
                                 F.getParent());
           ++MustAliasCount;
@@ -255,22 +255,22 @@ bool AAEval::runOnFunction(Function &F) {
       for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) {
         switch (AA.alias(MemoryLocation::get(cast<StoreInst>(*I1)),
                          MemoryLocation::get(cast<StoreInst>(*I2)))) {
-        case AliasAnalysis::NoAlias:
+        case NoAlias:
           PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2,
                                 F.getParent());
           ++NoAliasCount;
           break;
-        case AliasAnalysis::MayAlias:
+        case MayAlias:
           PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2,
                                 F.getParent());
           ++MayAliasCount;
           break;
-        case AliasAnalysis::PartialAlias:
+        case PartialAlias:
           PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2,
                                 F.getParent());
           ++PartialAliasCount;
           break;
-        case AliasAnalysis::MustAlias:
+        case MustAlias:
           PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2,
                                 F.getParent());
           ++MustAliasCount;
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index fde0eeb..1ef49fc 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -130,7 +130,7 @@ namespace {
     }
 
   };
-} // namespace
+}
 
 char AliasDebugger::ID = 0;
 INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index f7a803c..bf8cda1 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -32,11 +32,11 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
   assert(!Forward && "This set is a forwarding set!!");
 
   // Update the alias and access types of this set...
-  AccessTy |= AS.AccessTy;
-  AliasTy  |= AS.AliasTy;
+  Access |= AS.Access;
+  Alias  |= AS.Alias;
   Volatile |= AS.Volatile;
 
-  if (AliasTy == MustAlias) {
+  if (Alias == SetMustAlias) {
     // Check that these two merged sets really are must aliases.  Since both
     // used to be must-alias sets, we can just check any pointer from each set
     // for aliasing.
@@ -47,8 +47,8 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
     // If the pointers are not a must-alias pair, this set becomes a may alias.
     if (AA.alias(MemoryLocation(L->getValue(), L->getSize(), L->getAAInfo()),
                  MemoryLocation(R->getValue(), R->getSize(), R->getAAInfo())) !=
-        AliasAnalysis::MustAlias)
-      AliasTy = MayAlias;
+        MustAlias)
+      Alias = SetMayAlias;
   }
 
   bool ASHadUnknownInsts = !AS.UnknownInsts.empty();
@@ -101,14 +101,14 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
   if (isMustAlias() && !KnownMustAlias)
     if (PointerRec *P = getSomePointer()) {
       AliasAnalysis &AA = AST.getAliasAnalysis();
-      AliasAnalysis::AliasResult Result =
+      AliasResult Result =
           AA.alias(MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()),
                    MemoryLocation(Entry.getValue(), Size, AAInfo));
-      if (Result != AliasAnalysis::MustAlias)
-        AliasTy = MayAlias;
+      if (Result != MustAlias)
+        Alias = SetMayAlias;
       else                  // First entry of must alias must have maximum size!
         P->updateSizeAndAAInfo(Size, AAInfo);
-      assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!");
+      assert(Result != NoAlias && "Cannot be part of must set!");
     }
 
   Entry.setAliasSet(this);
@@ -128,14 +128,14 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
   UnknownInsts.emplace_back(I);
 
   if (!I->mayWriteToMemory()) {
-    AliasTy = MayAlias;
-    AccessTy |= Refs;
+    Alias = SetMayAlias;
+    Access |= RefAccess;
     return;
   }
 
   // FIXME: This should use mod/ref information to make this not suck so bad
-  AliasTy = MayAlias;
-  AccessTy = ModRef;
+  Alias = SetMayAlias;
+  Access = ModRefAccess;
 }
 
 /// aliasesPointer - Return true if the specified pointer "may" (or must)
@@ -144,7 +144,7 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
 bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
                               const AAMDNodes &AAInfo,
                               AliasAnalysis &AA) const {
-  if (AliasTy == MustAlias) {
+  if (Alias == SetMustAlias) {
     assert(UnknownInsts.empty() && "Illegal must alias set!");
 
     // If this is a set of MustAliases, only check to see if the pointer aliases
@@ -296,7 +296,7 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
 
 bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
   bool NewPtr;
-  addPointer(Ptr, Size, AAInfo, AliasSet::NoModRef, NewPtr);
+  addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess, NewPtr);
   return NewPtr;
 }
 
@@ -307,11 +307,11 @@ bool AliasSetTracker::add(LoadInst *LI) {
   AAMDNodes AAInfo;
   LI->getAAMetadata(AAInfo);
 
-  AliasSet::AccessType ATy = AliasSet::Refs;
+  AliasSet::AccessLattice Access = AliasSet::RefAccess;
   bool NewPtr;
   AliasSet &AS = addPointer(LI->getOperand(0),
                             AA.getTypeStoreSize(LI->getType()),
-                            AAInfo, ATy, NewPtr);
+                            AAInfo, Access, NewPtr);
   if (LI->isVolatile()) AS.setVolatile();
   return NewPtr;
 }
@@ -322,12 +322,12 @@ bool AliasSetTracker::add(StoreInst *SI) {
   AAMDNodes AAInfo;
   SI->getAAMetadata(AAInfo);
 
-  AliasSet::AccessType ATy = AliasSet::Mods;
+  AliasSet::AccessLattice Access = AliasSet::ModAccess;
   bool NewPtr;
   Value *Val = SI->getOperand(0);
   AliasSet &AS = addPointer(SI->getOperand(1),
                             AA.getTypeStoreSize(Val->getType()),
-                            AAInfo, ATy, NewPtr);
+                            AAInfo, Access, NewPtr);
   if (SI->isVolatile()) AS.setVolatile();
   return NewPtr;
 }
@@ -338,7 +338,7 @@ bool AliasSetTracker::add(VAArgInst *VAAI) {
 
   bool NewPtr;
   addPointer(VAAI->getOperand(0), MemoryLocation::UnknownSize, AAInfo,
-             AliasSet::ModRef, NewPtr);
+             AliasSet::ModRefAccess, NewPtr);
   return NewPtr;
 }
 
@@ -397,7 +397,7 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
     for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
       AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
                                    ASI.getAAInfo(),
-                                   (AliasSet::AccessType)AS.AccessTy, X);
+                                   (AliasSet::AccessLattice)AS.Access, X);
       if (AS.isVolatile()) NewAS.setVolatile();
     }
   }
@@ -572,13 +572,13 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
 
 void AliasSet::print(raw_ostream &OS) const {
   OS << "  AliasSet[" << (const void*)this << ", " << RefCount << "] ";
-  OS << (AliasTy == MustAlias ? "must" : "may") << " alias, ";
-  switch (AccessTy) {
-  case NoModRef: OS << "No access "; break;
-  case Refs    : OS << "Ref       "; break;
-  case Mods    : OS << "Mod       "; break;
-  case ModRef  : OS << "Mod/Ref   "; break;
-  default: llvm_unreachable("Bad value for AccessTy!");
+  OS << (Alias == SetMustAlias ? "must" : "may") << " alias, ";
+  switch (Access) {
+  case NoAccess:     OS << "No access "; break;
+  case RefAccess:    OS << "Ref       "; break;
+  case ModAccess:    OS << "Mod       "; break;
+  case ModRefAccess: OS << "Mod/Ref   "; break;
+  default: llvm_unreachable("Bad value for Access!");
   }
   if (isVolatile()) OS << "[volatile] ";
   if (Forward)
@@ -666,7 +666,7 @@ namespace {
       return false;
     }
   };
-} // namespace
+}
 
 char AliasSetPrinter::ID = 0;
 INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index d11a748..8e81225 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -182,7 +182,7 @@ namespace {
       return !operator==(Other);
     }
   };
-} // namespace
+}
 
 
 /// GetLinearExpression - Analyze the specified value as a linear expression:
@@ -838,10 +838,11 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
 
 /// \brief Provide ad-hoc rules to disambiguate accesses through two GEP
 /// operators, both having the exact same pointer operand.
-static AliasAnalysis::AliasResult
-aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
-                         const GEPOperator *GEP2, uint64_t V2Size,
-                         const DataLayout &DL) {
+static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
+                                            uint64_t V1Size,
+                                            const GEPOperator *GEP2,
+                                            uint64_t V2Size,
+                                            const DataLayout &DL) {
 
   assert(GEP1->getPointerOperand() == GEP2->getPointerOperand() &&
          "Expected GEPs with the same pointer operand");
@@ -851,13 +852,13 @@ aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
   // We also need at least two indices (the pointer, and the struct field).
   if (GEP1->getNumIndices() != GEP2->getNumIndices() ||
       GEP1->getNumIndices() < 2)
-    return AliasAnalysis::MayAlias;
+    return MayAlias;
 
   // If we don't know the size of the accesses through both GEPs, we can't
   // determine whether the struct fields accessed can't alias.
   if (V1Size == MemoryLocation::UnknownSize ||
       V2Size == MemoryLocation::UnknownSize)
-    return AliasAnalysis::MayAlias;
+    return MayAlias;
 
   ConstantInt *C1 =
       dyn_cast<ConstantInt>(GEP1->getOperand(GEP1->getNumOperands() - 1));
@@ -868,7 +869,7 @@ aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
   // If they're identical, the other indices might be also be dynamically
   // equal, so the GEPs can alias.
   if (!C1 || !C2 || C1 == C2)
-    return AliasAnalysis::MayAlias;
+    return MayAlias;
 
   // Find the last-indexed type of the GEP, i.e., the type you'd get if
   // you stripped the last index.
@@ -886,7 +887,7 @@ aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
   for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) {
     if (!isa<ArrayType>(GetElementPtrInst::getIndexedType(
             GEP1->getSourceElementType(), IntermediateIndices)))
-      return AliasAnalysis::MayAlias;
+      return MayAlias;
     IntermediateIndices.push_back(GEP1->getOperand(i + 1));
   }
 
@@ -895,7 +896,7 @@ aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
           GEP1->getSourceElementType(), IntermediateIndices));
 
   if (!LastIndexedStruct)
-    return AliasAnalysis::MayAlias;
+    return MayAlias;
 
   // We know that:
   // - both GEPs begin indexing from the exact same pointer;
@@ -924,9 +925,9 @@ aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
 
   if (EltsDontOverlap(V1Off, V1Size, V2Off, V2Size) ||
       EltsDontOverlap(V2Off, V2Size, V1Off, V1Size))
-    return AliasAnalysis::NoAlias;
+    return NoAlias;
 
-  return AliasAnalysis::MayAlias;
+  return MayAlias;
 }
 
 /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
@@ -934,13 +935,10 @@ aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
 /// anything about V2.  UnderlyingV1 is GetUnderlyingObject(GEP1, DL),
 /// UnderlyingV2 is the same for V2.
 ///
-AliasAnalysis::AliasResult
-BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
-                             const AAMDNodes &V1AAInfo,
-                             const Value *V2, uint64_t V2Size,
-                             const AAMDNodes &V2AAInfo,
-                             const Value *UnderlyingV1,
-                             const Value *UnderlyingV2) {
+AliasResult BasicAliasAnalysis::aliasGEP(
+    const GEPOperator *GEP1, uint64_t V1Size, const AAMDNodes &V1AAInfo,
+    const Value *V2, uint64_t V2Size, const AAMDNodes &V2AAInfo,
+    const Value *UnderlyingV1, const Value *UnderlyingV2) {
   int64_t GEP1BaseOffset;
   bool GEP1MaxLookupReached;
   SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
@@ -1196,26 +1194,25 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
   return PartialAlias;
 }
 
-static AliasAnalysis::AliasResult
-MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) {
+static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
   // If the results agree, take it.
   if (A == B)
     return A;
   // A mix of PartialAlias and MustAlias is PartialAlias.
-  if ((A == AliasAnalysis::PartialAlias && B == AliasAnalysis::MustAlias) ||
-      (B == AliasAnalysis::PartialAlias && A == AliasAnalysis::MustAlias))
-    return AliasAnalysis::PartialAlias;
+  if ((A == PartialAlias && B == MustAlias) ||
+      (B == PartialAlias && A == MustAlias))
+    return PartialAlias;
   // Otherwise, we don't know anything.
-  return AliasAnalysis::MayAlias;
+  return MayAlias;
 }
 
 /// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
 /// instruction against another.
-AliasAnalysis::AliasResult
-BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
-                                const AAMDNodes &SIAAInfo,
-                                const Value *V2, uint64_t V2Size,
-                                const AAMDNodes &V2AAInfo) {
+AliasResult BasicAliasAnalysis::aliasSelect(const SelectInst *SI,
+                                            uint64_t SISize,
+                                            const AAMDNodes &SIAAInfo,
+                                            const Value *V2, uint64_t V2Size,
+                                            const AAMDNodes &V2AAInfo) {
   // If the values are Selects with the same condition, we can do a more precise
   // check: just check for aliases between the values on corresponding arms.
   if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
@@ -1245,11 +1242,10 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
 
 // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
 // against another.
-AliasAnalysis::AliasResult
-BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
-                             const AAMDNodes &PNAAInfo,
-                             const Value *V2, uint64_t V2Size,
-                             const AAMDNodes &V2AAInfo) {
+AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
+                                         const AAMDNodes &PNAAInfo,
+                                         const Value *V2, uint64_t V2Size,
+                                         const AAMDNodes &V2AAInfo) {
   // Track phi nodes we have visited. We use this information when we determine
   // value equivalence.
   VisitedPhiBBs.insert(PN->getParent());
@@ -1331,11 +1327,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
 // aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases,
 // such as array references.
 //
-AliasAnalysis::AliasResult
-BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
-                               AAMDNodes V1AAInfo,
-                               const Value *V2, uint64_t V2Size,
-                               AAMDNodes V2AAInfo) {
+AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
+                                           AAMDNodes V1AAInfo, const Value *V2,
+                                           uint64_t V2Size,
+                                           AAMDNodes V2AAInfo) {
   // If either of the memory references is empty, it doesn't matter what the
   // pointer values are.
   if (V1Size == 0 || V2Size == 0)
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index daa77b8..6ceda06 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -598,7 +598,7 @@ template <> struct GraphTraits<IrreducibleGraph> {
   static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
   static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
 };
-} // namespace llvm
+}
 
 /// \brief Find extra irreducible headers.
 ///
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index 8ecd70b..e15109b 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -126,10 +126,9 @@ static bool loopContainsBoth(const LoopInfo *LI,
   return L1 != nullptr && L1 == L2;
 }
 
-static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist,
-                                        BasicBlock *StopBB,
-                                        const DominatorTree *DT,
-                                        const LoopInfo *LI) {
+bool llvm::isPotentiallyReachableFromMany(
+    SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB,
+    const DominatorTree *DT, const LoopInfo *LI) {
   // When the stop block is unreachable, it's dominated from everywhere,
   // regardless of whether there's a path between the two blocks.
   if (DT && !DT->isReachableFromEntry(StopBB))
@@ -179,8 +178,8 @@ bool llvm::isPotentiallyReachable(const BasicBlock *A, const BasicBlock *B,
   SmallVector<BasicBlock*, 32> Worklist;
   Worklist.push_back(const_cast<BasicBlock*>(A));
 
-  return isPotentiallyReachableInner(Worklist, const_cast<BasicBlock*>(B),
-                                     DT, LI);
+  return isPotentiallyReachableFromMany(Worklist, const_cast<BasicBlock *>(B),
+                                        DT, LI);
 }
 
 bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
@@ -230,7 +229,6 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
   if (B->getParent() == &A->getParent()->getParent()->getEntryBlock())
     return false;
 
-  return isPotentiallyReachableInner(Worklist,
-                                     const_cast<BasicBlock*>(B->getParent()),
-                                     DT, LI);
+  return isPotentiallyReachableFromMany(
+      Worklist, const_cast<BasicBlock *>(B->getParent()), DT, LI);
 }
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index edd02c2..c86f1f5 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -40,7 +40,7 @@ namespace {
       AU.setPreservesAll();
     }
   };
-} // namespace
+}
 
 char CFGViewer::ID = 0;
 INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true)
@@ -63,7 +63,7 @@ namespace {
       AU.setPreservesAll();
     }
   };
-} // namespace
+}
 
 char CFGOnlyViewer::ID = 0;
 INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
@@ -97,7 +97,7 @@ namespace {
       AU.setPreservesAll();
     }
   };
-} // namespace
+}
 
 char CFGPrinter::ID = 0;
 INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", 
@@ -130,7 +130,7 @@ namespace {
       AU.setPreservesAll();
     }
   };
-} // namespace
+}
 
 char CFGOnlyPrinter::ID = 0;
 INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp
index d937c0b..fe1c088 100644
--- a/lib/Analysis/CFLAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAliasAnalysis.cpp
@@ -725,7 +725,7 @@ public:
 
 typedef WeightedBidirectionalGraph<std::pair<EdgeType, StratifiedAttrs>> GraphT;
 typedef DenseMap<Value *, GraphT::Node> NodeMapT;
-} // namespace
+}
 
 // -- Setting up/registering CFLAA pass -- //
 char CFLAliasAnalysis::ID = 0;
@@ -1109,8 +1109,8 @@ void CFLAliasAnalysis::scan(Function *Fn) {
   Handles.push_front(FunctionHandle(Fn, this));
 }
 
-AliasAnalysis::AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,
-                                                   const MemoryLocation &LocB) {
+AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,
+                                    const MemoryLocation &LocB) {
   auto *ValA = const_cast<Value *>(LocA.Ptr);
   auto *ValB = const_cast<Value *>(LocB.Ptr);
 
@@ -1121,7 +1121,7 @@ AliasAnalysis::AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,
     // The only times this is known to happen are when globals + InlineAsm
     // are involved
     DEBUG(dbgs() << "CFLAA: could not extract parent function information.\n");
-    return AliasAnalysis::MayAlias;
+    return MayAlias;
   }
 
   if (MaybeFnA.hasValue()) {
@@ -1139,11 +1139,11 @@ AliasAnalysis::AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,
   auto &Sets = MaybeInfo->Sets;
   auto MaybeA = Sets.find(ValA);
   if (!MaybeA.hasValue())
-    return AliasAnalysis::MayAlias;
+    return MayAlias;
 
   auto MaybeB = Sets.find(ValB);
   if (!MaybeB.hasValue())
-    return AliasAnalysis::MayAlias;
+    return MayAlias;
 
   auto SetA = *MaybeA;
   auto SetB = *MaybeB;
@@ -1160,7 +1160,7 @@ AliasAnalysis::AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,
   // the sets has no values that could legally be altered by changing the value
   // of an argument or global, then we don't have to be as conservative.
   if (AttrsA.any() && AttrsB.any())
-    return AliasAnalysis::MayAlias;
+    return MayAlias;
 
   // We currently unify things even if the accesses to them may not be in
   // bounds, so we can't return partial alias here because we don't
@@ -1171,9 +1171,9 @@ AliasAnalysis::AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,
   // differentiate
 
   if (SetA.Index == SetB.Index)
-    return AliasAnalysis::MayAlias;
+    return MayAlias;
 
-  return AliasAnalysis::NoAlias;
+  return NoAlias;
 }
 
 bool CFLAliasAnalysis::doInitialization(Module &M) {
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index b22ee7e..3ec79ad 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -62,6 +62,7 @@ add_llvm_library(LLVMAnalysis
   TypeBasedAliasAnalysis.cpp
   ScopedNoAliasAA.cpp
   ValueTracking.cpp
+  VectorUtils.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Analysis
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 92f6932..52ef807 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -52,34 +52,136 @@ namespace {
     bool Captured;
   };
 
+  struct NumberedInstCache {
+    SmallDenseMap<const Instruction *, unsigned, 32> NumberedInsts;
+    BasicBlock::const_iterator LastInstFound;
+    unsigned LastInstPos;
+    const BasicBlock *BB;
+
+    NumberedInstCache(const BasicBlock *BasicB) : LastInstPos(0), BB(BasicB) {
+      LastInstFound = BB->end();
+    }
+
+    /// \brief Find the first instruction 'A' or 'B' in 'BB'. Number out
+    /// instruction while walking 'BB'.
+    const Instruction *find(const Instruction *A, const Instruction *B) {
+      const Instruction *Inst = nullptr;
+      assert(!(LastInstFound == BB->end() && LastInstPos != 0) &&
+             "Instruction supposed to be in NumberedInsts");
+
+      // Start the search with the instruction found in the last lookup round.
+      auto II = BB->begin();
+      auto IE = BB->end();
+      if (LastInstFound != IE)
+        II = std::next(LastInstFound);
+
+      // Number all instructions up to the point where we find 'A' or 'B'.
+      for (++LastInstPos; II != IE; ++II, ++LastInstPos) {
+        Inst = cast<Instruction>(II);
+        NumberedInsts[Inst] = LastInstPos;
+        if (Inst == A || Inst == B)
+          break;
+      }
+
+      assert(II != IE && "Instruction not found?");
+      LastInstFound = II;
+      return Inst;
+    }
+
+    /// \brief Find out whether 'A' dominates 'B', meaning whether 'A'
+    /// comes before 'B' in 'BB'. This is a simplification that considers
+    /// cached instruction positions and ignores other basic blocks, being
+    /// only relevant to compare relative instructions positions inside 'BB'.
+    bool dominates(const Instruction *A, const Instruction *B) {
+      assert(A->getParent() == B->getParent() &&
+             "Instructions must be in the same basic block!");
+
+      unsigned NA = NumberedInsts.lookup(A);
+      unsigned NB = NumberedInsts.lookup(B);
+      if (NA && NB)
+        return NA < NB;
+      if (NA)
+        return true;
+      if (NB)
+        return false;
+
+      return A == find(A, B);
+    }
+  };
+
   /// Only find pointer captures which happen before the given instruction. Uses
   /// the dominator tree to determine whether one instruction is before another.
   /// Only support the case where the Value is defined in the same basic block
   /// as the given instruction and the use.
   struct CapturesBefore : public CaptureTracker {
+
     CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT,
                    bool IncludeI)
-      : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures),
-        IncludeI(IncludeI), Captured(false) {}
+      : LocalInstCache(I->getParent()), BeforeHere(I), DT(DT),
+        ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {}
 
     void tooManyUses() override { Captured = true; }
 
-    bool shouldExplore(const Use *U) override {
-      Instruction *I = cast<Instruction>(U->getUser());
-      if (BeforeHere == I && !IncludeI)
-        return false;
-
+    bool isSafeToPrune(Instruction *I) {
       BasicBlock *BB = I->getParent();
       // We explore this usage only if the usage can reach "BeforeHere".
       // If use is not reachable from entry, there is no need to explore.
       if (BeforeHere != I && !DT->isReachableFromEntry(BB))
+        return true;
+
+      // Compute the case where both instructions are inside the same basic
+      // block. Since instructions in the same BB as BeforeHere are numbered in
+      // 'LocalInstCache', avoid using 'dominates' and 'isPotentiallyReachable'
+      // which are very expensive for large basic blocks.
+      if (BB == BeforeHere->getParent()) {
+        // 'I' dominates 'BeforeHere' => not safe to prune.
+        //
+        // The value defined by an invoke dominates an instruction only if it
+        // dominates every instruction in UseBB. A PHI is dominated only if
+        // the instruction dominates every possible use in the UseBB. Since
+        // UseBB == BB, avoid pruning.
+        if (isa<InvokeInst>(BeforeHere) || isa<PHINode>(I) || I == BeforeHere)
+          return false;
+        if (!LocalInstCache.dominates(BeforeHere, I))
+          return false;
+
+        // 'BeforeHere' comes before 'I', it's safe to prune if we also
+        // guarantee that 'I' never reaches 'BeforeHere' through a back-edge or
+        // by its successors, i.e, prune if:
+        //
+        //  (1) BB is an entry block or have no sucessors.
+        //  (2) There's no path coming back through BB sucessors.
+        if (BB == &BB->getParent()->getEntryBlock() ||
+            !BB->getTerminator()->getNumSuccessors())
+          return true;
+
+        SmallVector<BasicBlock*, 32> Worklist;
+        Worklist.append(succ_begin(BB), succ_end(BB));
+        if (!isPotentiallyReachableFromMany(Worklist, BB, DT))
+          return true;
+
         return false;
+      }
+
       // If the value is defined in the same basic block as use and BeforeHere,
       // there is no need to explore the use if BeforeHere dominates use.
       // Check whether there is a path from I to BeforeHere.
       if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
           !isPotentiallyReachable(I, BeforeHere, DT))
+        return true;
+
+      return false;
+    }
+
+    bool shouldExplore(const Use *U) override {
+      Instruction *I = cast<Instruction>(U->getUser());
+
+      if (BeforeHere == I && !IncludeI)
         return false;
+
+      if (isSafeToPrune(I))
+        return false;
+
       return true;
     }
 
@@ -87,21 +189,14 @@ namespace {
       if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
         return false;
 
-      Instruction *I = cast<Instruction>(U->getUser());
-      if (BeforeHere == I && !IncludeI)
+      if (!shouldExplore(U))
         return false;
 
-      BasicBlock *BB = I->getParent();
-      // Same logic as in shouldExplore.
-      if (BeforeHere != I && !DT->isReachableFromEntry(BB))
-        return false;
-      if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
-          !isPotentiallyReachable(I, BeforeHere, DT))
-        return false;
       Captured = true;
       return true;
     }
 
+    NumberedInstCache LocalInstCache;
     const Instruction *BeforeHere;
     DominatorTree *DT;
 
@@ -110,7 +205,7 @@ namespace {
 
     bool Captured;
   };
-} // namespace
+}
 
 /// PointerMayBeCaptured - Return true if this pointer value may be captured
 /// by the enclosing function (which is required to exist).  This routine can
diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp
index d603b7b..9d15786 100644
--- a/lib/Analysis/Delinearization.cpp
+++ b/lib/Analysis/Delinearization.cpp
@@ -115,7 +115,7 @@ void Delinearization::print(raw_ostream &O, const Module *) const {
       O << "AddRec: " << *AR << "\n";
 
       SmallVector<const SCEV *, 3> Subscripts, Sizes;
-      AR->delinearize(*SE, Subscripts, Sizes, SE->getElementSize(Inst));
+      SE->delinearize(AR, Subscripts, Sizes, SE->getElementSize(Inst));
       if (Subscripts.size() == 0 || Sizes.size() == 0 ||
           Subscripts.size() != Sizes.size()) {
         O << "failed to delinearize\n";
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index b16cdfe..4826ac4 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -625,10 +625,9 @@ void Dependence::dump(raw_ostream &OS) const {
   OS << "!\n";
 }
 
-static AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
-                                                         const DataLayout &DL,
-                                                         const Value *A,
-                                                         const Value *B) {
+static AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
+                                          const DataLayout &DL, const Value *A,
+                                          const Value *B) {
   const Value *AObj = GetUnderlyingObject(A, DL);
   const Value *BObj = GetUnderlyingObject(B, DL);
   return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()),
@@ -3267,8 +3266,8 @@ bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV,
 
   // First step: collect parametric terms in both array references.
   SmallVector<const SCEV *, 4> Terms;
-  SrcAR->collectParametricTerms(*SE, Terms);
-  DstAR->collectParametricTerms(*SE, Terms);
+  SE->collectParametricTerms(SrcAR, Terms);
+  SE->collectParametricTerms(DstAR, Terms);
 
   // Second step: find subscript sizes.
   SmallVector<const SCEV *, 4> Sizes;
@@ -3276,8 +3275,8 @@ bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV,
 
   // Third step: compute the access functions for each subscript.
   SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
-  SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes);
-  DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes);
+  SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
+  SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);
 
   // Fail when there is only a subscript: that's a linearized access function.
   if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
@@ -3365,16 +3364,16 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
 
   switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
                                  SrcPtr)) {
-  case AliasAnalysis::MayAlias:
-  case AliasAnalysis::PartialAlias:
+  case MayAlias:
+  case PartialAlias:
     // cannot analyse objects if we don't understand their aliasing.
     DEBUG(dbgs() << "can't analyze may or partial alias\n");
     return make_unique<Dependence>(Src, Dst);
-  case AliasAnalysis::NoAlias:
+  case NoAlias:
     // If the objects noalias, they are distinct, accesses are independent.
     DEBUG(dbgs() << "no alias\n");
     return nullptr;
-  case AliasAnalysis::MustAlias:
+  case MustAlias:
     break; // The underlying objects alias; test accesses for dependence.
   }
 
@@ -3814,7 +3813,7 @@ const  SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,
   Value *SrcPtr = getPointerOperand(Src);
   Value *DstPtr = getPointerOperand(Dst);
   assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
-                                SrcPtr) == AliasAnalysis::MustAlias);
+                                SrcPtr) == MustAlias);
 
   // establish loop nesting levels
   establishNestingLevels(Src, Dst);
diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp
index 3765adf..e5ee295 100644
--- a/lib/Analysis/DivergenceAnalysis.cpp
+++ b/lib/Analysis/DivergenceAnalysis.cpp
@@ -284,7 +284,7 @@ void DivergencePropagator::propagate() {
   }
 }
 
-} // namespace
+} /// end namespace anonymous
 
 FunctionPass *llvm::createDivergenceAnalysisPass() {
   return new DivergenceAnalysis();
diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp
index 0e0d174..0c880df 100644
--- a/lib/Analysis/DomPrinter.cpp
+++ b/lib/Analysis/DomPrinter.cpp
@@ -78,7 +78,7 @@ struct DOTGraphTraits<PostDominatorTree*>
     return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
   }
 };
-} // namespace llvm
+}
 
 namespace {
 struct DominatorTreeWrapperPassAnalysisGraphTraits {
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 6b3e063..07b389a 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -451,7 +451,7 @@ bool CGPassManager::runOnModule(Module &M) {
     const std::vector<CallGraphNode *> &NodeVec = *CGI;
     CurSCC.initialize(NodeVec.data(), NodeVec.data() + NodeVec.size());
     ++CGI;
-    
+
     // At the top level, we run all the passes in this pass manager on the
     // functions in this SCC.  However, we support iterative compilation in the
     // case where a function pass devirtualizes a call to a function.  For
diff --git a/lib/Analysis/IPA/CallPrinter.cpp b/lib/Analysis/IPA/CallPrinter.cpp
index f183625..68dcd3c 100644
--- a/lib/Analysis/IPA/CallPrinter.cpp
+++ b/lib/Analysis/IPA/CallPrinter.cpp
@@ -41,7 +41,7 @@ struct AnalysisCallGraphWrapperPassTraits {
   }
 };
 
-} // namespace llvm
+} // end llvm namespace
 
 namespace {
 
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index a32631d..f1ddde2 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -189,7 +189,7 @@ namespace {
                               GlobalValue *OkayStoreDest = nullptr);
     bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
   };
-} // namespace
+}
 
 char GlobalsModRef::ID = 0;
 INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
@@ -479,8 +479,8 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
 /// alias - If one of the pointers is to a global that we are tracking, and the
 /// other is some random pointer, we know there cannot be an alias, because the
 /// address of the global isn't taken.
-AliasAnalysis::AliasResult GlobalsModRef::alias(const MemoryLocation &LocA,
-                                                const MemoryLocation &LocB) {
+AliasResult GlobalsModRef::alias(const MemoryLocation &LocA,
+                                 const MemoryLocation &LocB) {
   // Get the base object these pointers point to.
   const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL);
   const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL);
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 2bd959d..349b9ca 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -54,6 +54,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   // The called function.
   Function &F;
 
+  // The candidate callsite being analyzed. Please do not use this to do
+  // analysis in the caller function; we want the inline cost query to be
+  // easily cacheable. Instead, use the cover function paramHasAttr.
+  CallSite CandidateCS;
+
   int Threshold;
   int Cost;
 
@@ -106,6 +111,17 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool simplifyCallSite(Function *F, CallSite CS);
   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
 
+  /// Return true if the given argument to the function being considered for
+  /// inlining has the given attribute set either at the call site or the
+  /// function declaration.  Primarily used to inspect call site specific
+  /// attributes since these can be more precise than the ones on the callee
+  /// itself. 
+  bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
+  
+  /// Return true if the given value is known non null within the callee if
+  /// inlined through this particular callsite. 
+  bool isKnownNonNullInCallee(Value *V);
+
   // Custom analysis routines.
   bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
 
@@ -144,9 +160,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
 
 public:
   CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
-               Function &Callee, int Threshold)
-      : TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0),
-        IsCallerRecursive(false), IsRecursiveCall(false),
+               Function &Callee, int Threshold, CallSite CSArg)
+    : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold),
+        Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
         ExposesReturnsTwice(false), HasDynamicAlloca(false),
         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
         HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
@@ -496,6 +512,33 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
   return false;
 }
 
+bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
+  unsigned ArgNo = A->getArgNo();
+  return CandidateCS.paramHasAttr(ArgNo+1, Attr);
+}
+
+bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
+  // Does the *call site* have the NonNull attribute set on an argument?  We
+  // use the attribute on the call site to memoize any analysis done in the
+  // caller. This will also trip if the callee function has a non-null
+  // parameter attribute, but that's a less interesting case because hopefully
+  // the callee would already have been simplified based on that.
+  if (Argument *A = dyn_cast<Argument>(V))
+    if (paramHasAttr(A, Attribute::NonNull))
+      return true;
+  
+  // Is this an alloca in the caller?  This is distinct from the attribute case
+  // above because attributes aren't updated within the inliner itself and we
+  // always want to catch the alloca derived case.
+  if (isAllocaDerivedArg(V))
+    // We can actually predict the result of comparisons between an
+    // alloca-derived value and null. Note that this fires regardless of
+    // SROA firing.
+    return true;
+  
+  return false;
+}
+
 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
   // First try to handle simplified comparisons.
@@ -537,18 +580,14 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) {
   }
 
   // If the comparison is an equality comparison with null, we can simplify it
-  // for any alloca-derived argument.
-  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)))
-    if (isAllocaDerivedArg(I.getOperand(0))) {
-      // We can actually predict the result of comparisons between an
-      // alloca-derived value and null. Note that this fires regardless of
-      // SROA firing.
-      bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
-      SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
-                                        : ConstantInt::getFalse(I.getType());
-      return true;
-    }
-
+  // if we know the value (argument) can't be null
+  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
+      isKnownNonNullInCallee(I.getOperand(0))) {
+    bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
+    SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
+                                      : ConstantInt::getFalse(I.getType());
+    return true;
+  }
   // Finally check for SROA candidates in comparisons.
   Value *SROAArg;
   DenseMap<Value *, int>::iterator CostIt;
@@ -790,7 +829,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
   // during devirtualization and so we want to give it a hefty bonus for
   // inlining, but cap that bonus in the event that inlining wouldn't pan
   // out. Pretend to inline the function, with a custom threshold.
-  CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold);
+  CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
   if (CA.analyzeCall(CS)) {
     // We were able to inline the indirect call! Subtract the cost from the
     // bonus we want to apply, but don't go below zero.
@@ -1305,9 +1344,9 @@ static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
 /// \brief Test that there are no attribute conflicts between Caller and Callee
 ///        that prevent inlining.
 static bool functionsHaveCompatibleAttributes(Function *Caller,
-                                              Function *Callee) {
-  return attributeMatches(Caller, Callee, "target-cpu") &&
-         attributeMatches(Caller, Callee, "target-features") &&
+                                              Function *Callee,
+                                              TargetTransformInfo &TTI) {
+  return TTI.hasCompatibleFunctionAttributes(Caller, Callee) &&
          attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
          attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
          attributeMatches(Caller, Callee, Attribute::SanitizeThread);
@@ -1329,7 +1368,8 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
 
   // Never inline functions with conflicting attributes (unless callee has
   // always-inline attribute).
-  if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee))
+  if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee,
+                                         TTIWP->getTTI(*Callee)))
     return llvm::InlineCost::getNever();
 
   // Don't inline this call if the caller has the optnone attribute.
@@ -1346,7 +1386,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
         << "...\n");
 
-  CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold);
+  CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold, CS);
   bool ShouldInline = CA.analyzeCall(CS);
 
   DEBUG(CA.dump());
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index e76d26e..de2b9c0 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -64,7 +64,7 @@ namespace {
     void print(raw_ostream &O, const Module *M) const override {}
 
   };
-} // namespace
+}
 
 char InstCount::ID = 0;
 INITIALIZE_PASS(InstCount, "instcount",
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index f421d28..a6ae7f2 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -286,7 +286,7 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
               << Val.getConstantRange().getUpper() << '>';
   return OS << "constant<" << *Val.getConstant() << '>';
 }
-} // namespace llvm
+}
 
 //===----------------------------------------------------------------------===//
 //                          LazyValueInfoCache Decl
@@ -306,7 +306,7 @@ namespace {
       deleted();
     }
   };
-} // namespace
+}
 
 namespace { 
   /// This is the cache kept by LazyValueInfo which
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 6ea6ccb..0b9308a 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -157,7 +157,7 @@ namespace {
       WriteValues({V1, Vs...});
     }
   };
-} // namespace
+}
 
 char Lint::ID = 0;
 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
@@ -244,9 +244,8 @@ void Lint::visitCallSite(CallSite CS) {
         if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
           for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
             if (AI != BI && (*BI)->getType()->isPointerTy()) {
-              AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
-              Assert(Result != AliasAnalysis::MustAlias &&
-                         Result != AliasAnalysis::PartialAlias,
+              AliasResult Result = AA->alias(*AI, *BI);
+              Assert(Result != MustAlias && Result != PartialAlias,
                      "Unusual: noalias argument aliases another argument", &I);
             }
 
@@ -297,7 +296,7 @@ void Lint::visitCallSite(CallSite CS) {
         if (Len->getValue().isIntN(32))
           Size = Len->getValue().getZExtValue();
       Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
-                 AliasAnalysis::MustAlias,
+                 MustAlias,
              "Undefined behavior: memcpy source and destination overlap", &I);
       break;
     }
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index aed3b04..624c5a1 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -65,6 +65,12 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
 bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
                                        unsigned Align) {
   const DataLayout &DL = ScanFrom->getModule()->getDataLayout();
+
+  // Zero alignment means that the load has the ABI alignment for the target
+  if (Align == 0)
+    Align = DL.getABITypeAlignment(V->getType()->getPointerElementType());
+  assert(isPowerOf2_32(Align));
+
   int64_t ByteOffset = 0;
   Value *Base = V;
   Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL);
@@ -102,7 +108,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
     if (Align <= BaseAlign) {
       // Check if the load is within the bounds of the underlying object.
       if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) &&
-          (Align == 0 || (ByteOffset % Align) == 0))
+          ((ByteOffset % Align) == 0))
         return true;
     }
   }
@@ -128,20 +134,28 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
       return false;
 
     Value *AccessedPtr;
-    if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
+    unsigned AccessedAlign;
+    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
       AccessedPtr = LI->getPointerOperand();
-    else if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+      AccessedAlign = LI->getAlignment();
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
       AccessedPtr = SI->getPointerOperand();
-    else
+      AccessedAlign = SI->getAlignment();
+    } else
+      continue;
+
+    Type *AccessedTy = AccessedPtr->getType()->getPointerElementType();
+    if (AccessedAlign == 0)
+      AccessedAlign = DL.getABITypeAlignment(AccessedTy);
+    if (AccessedAlign < Align)
       continue;
 
     // Handle trivial cases.
     if (AccessedPtr == V)
       return true;
 
-    auto *AccessedTy = cast<PointerType>(AccessedPtr->getType());
     if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
-        LoadSize <= DL.getTypeStoreSize(AccessedTy->getElementType()))
+        LoadSize <= DL.getTypeStoreSize(AccessedTy))
       return true;
   }
   return false;
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index 8425b75..b11cd7e 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -22,7 +22,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/VectorUtils.h"
+#include "llvm/Analysis/VectorUtils.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "loop-accesses"
@@ -504,6 +504,54 @@ static bool isInBoundsGep(Value *Ptr) {
   return false;
 }
 
+/// \brief Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
+/// i.e. monotonically increasing/decreasing.
+static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
+                           ScalarEvolution *SE, const Loop *L) {
+  // FIXME: This should probably only return true for NUW.
+  if (AR->getNoWrapFlags(SCEV::NoWrapMask))
+    return true;
+
+  // Scalar evolution does not propagate the non-wrapping flags to values that
+  // are derived from a non-wrapping induction variable because non-wrapping
+  // could be flow-sensitive.
+  //
+  // Look through the potentially overflowing instruction to try to prove
+  // non-wrapping for the *specific* value of Ptr.
+
+  // The arithmetic implied by an inbounds GEP can't overflow.
+  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!GEP || !GEP->isInBounds())
+    return false;
+
+  // Make sure there is only one non-const index and analyze that.
+  Value *NonConstIndex = nullptr;
+  for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
+    if (!isa<ConstantInt>(*Index)) {
+      if (NonConstIndex)
+        return false;
+      NonConstIndex = *Index;
+    }
+  if (!NonConstIndex)
+    // The recurrence is on the pointer, ignore for now.
+    return false;
+
+  // The index in GEP is signed.  It is non-wrapping if it's derived from a NSW
+  // AddRec using a NSW operation.
+  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
+    if (OBO->hasNoSignedWrap() &&
+        // Assume constant for other the operand so that the AddRec can be
+        // easily found.
+        isa<ConstantInt>(OBO->getOperand(1))) {
+      auto *OpScev = SE->getSCEV(OBO->getOperand(0));
+
+      if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
+        return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
+    }
+
+  return false;
+}
+
 /// \brief Check whether the access through \p Ptr has a constant stride.
 int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
                        const ValueToValueMap &StridesMap) {
@@ -541,7 +589,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
   // to access the pointer value "0" which is undefined behavior in address
   // space 0, therefore we can also vectorize this case.
   bool IsInBoundsGEP = isInBoundsGep(Ptr);
-  bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
+  bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp);
   bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
   if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
     DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 81b7ecd..e9fcf02 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -56,7 +56,7 @@ public:
 };
 
 char PrintLoopPass::ID = 0;
-} // namespace
+}
 
 //===----------------------------------------------------------------------===//
 // LPPassManager
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 54a04d9..da3b829 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -74,7 +74,7 @@ namespace {
       return InstTypePair(inst, type);
     }
   };
-} // namespace
+}
 
 char MemDepPrinter::ID = 0;
 INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps",
diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp
index b0194d3..fa292a2 100644
--- a/lib/Analysis/MemDerefPrinter.cpp
+++ b/lib/Analysis/MemDerefPrinter.cpp
@@ -37,7 +37,7 @@ namespace {
       Vec.clear();
     }
   };
-} // namespace
+}
 
 char MemDerefPrinter::ID = 0;
 INITIALIZE_PASS_BEGIN(MemDerefPrinter, "print-memderefs",
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index cf8ba5c..782a67b 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -486,10 +486,10 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
       MemoryLocation LoadLoc = MemoryLocation::get(LI);
 
       // If we found a pointer, check if it could be the same as our pointer.
-      AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
+      AliasResult R = AA->alias(LoadLoc, MemLoc);
 
       if (isLoad) {
-        if (R == AliasAnalysis::NoAlias) {
+        if (R == NoAlias) {
           // If this is an over-aligned integer load (for example,
           // "load i8* %P, align 4") see if it would obviously overlap with the
           // queried location if widened to a larger load (e.g. if the queried
@@ -506,7 +506,7 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
         }
 
         // Must aliased loads are defs of each other.
-        if (R == AliasAnalysis::MustAlias)
+        if (R == MustAlias)
           return MemDepResult::getDef(Inst);
 
 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
@@ -516,7 +516,7 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
 
         // If we have a partial alias, then return this as a clobber for the
         // client to handle.
-        if (R == AliasAnalysis::PartialAlias)
+        if (R == PartialAlias)
           return MemDepResult::getClobber(Inst);
 #endif
 
@@ -526,7 +526,7 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
       }
 
       // Stores don't depend on other no-aliased accesses.
-      if (R == AliasAnalysis::NoAlias)
+      if (R == NoAlias)
         continue;
 
       // Stores don't alias loads from read-only memory.
@@ -575,11 +575,11 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
       MemoryLocation StoreLoc = MemoryLocation::get(SI);
 
       // If we found a pointer, check if it could be the same as our pointer.
-      AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
+      AliasResult R = AA->alias(StoreLoc, MemLoc);
 
-      if (R == AliasAnalysis::NoAlias)
+      if (R == NoAlias)
         continue;
-      if (R == AliasAnalysis::MustAlias)
+      if (R == MustAlias)
         return MemDepResult::getDef(Inst);
       if (isInvariantLoad)
        continue;
@@ -603,7 +603,7 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
       if (isInvariantLoad)
         continue;
       // Be conservative if the accessed pointer may alias the allocation.
-      if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias)
+      if (AA->alias(Inst, AccessPtr) != NoAlias)
         return MemDepResult::getClobber(Inst);
       // If the allocation is not aliased and does not read memory (like
       // strdup), it is safe to ignore.
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 45ae818..36c4714 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -40,7 +40,7 @@ namespace {
     }
     void print(raw_ostream &O, const Module *M) const override;
   };
-} // namespace
+}
 
 char ModuleDebugInfoPrinter::ID = 0;
 INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo",
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index 2b09bec..d7f5109 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -194,7 +194,7 @@ struct RegionOnlyPrinter
     }
 };
 
-} // namespace
+}
 
 char RegionOnlyPrinter::ID = 0;
 INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 81e07e9..9c7c175 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -627,7 +627,7 @@ namespace {
       llvm_unreachable("Unknown SCEV kind!");
     }
   };
-} // namespace
+}
 
 /// GroupByComplexity - Given a list of SCEV objects, order them by their
 /// complexity, and group objects of the same complexity together by value.
@@ -689,7 +689,7 @@ struct FindSCEVSize {
     return false;
   }
 };
-} // namespace
+}
 
 // Returns the size of the SCEV S.
 static inline int sizeOfSCEV(const SCEV *S) {
@@ -937,7 +937,7 @@ private:
   const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
 };
 
-} // namespace
+}
 
 //===----------------------------------------------------------------------===//
 //                      Simple SCEV method implementations
@@ -1248,7 +1248,7 @@ struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
 
 const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
     SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
-} // namespace
+}
 
 // The recurrence AR has been shown to have no signed/unsigned wrap or something
 // close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
@@ -3300,7 +3300,7 @@ namespace {
     }
     bool isDone() const { return FindOne; }
   };
-} // namespace
+}
 
 bool ScalarEvolution::checkValidity(const SCEV *S) const {
   FindInvalidSCEVUnknown F;
@@ -7594,7 +7594,7 @@ struct FindUndefs {
     return Found;
   }
 };
-} // namespace
+}
 
 // Return true when S contains at least an undef value.
 static inline bool
@@ -7644,14 +7644,14 @@ struct SCEVCollectTerms {
   }
   bool isDone() const { return false; }
 };
-} // namespace
+}
 
 /// Find parametric terms in this SCEVAddRecExpr.
-void SCEVAddRecExpr::collectParametricTerms(
-    ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Terms) const {
+void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
+    SmallVectorImpl<const SCEV *> &Terms) {
   SmallVector<const SCEV *, 4> Strides;
-  SCEVCollectStrides StrideCollector(SE, Strides);
-  visitAll(this, StrideCollector);
+  SCEVCollectStrides StrideCollector(*this, Strides);
+  visitAll(Expr, StrideCollector);
 
   DEBUG({
       dbgs() << "Strides:\n";
@@ -7737,7 +7737,7 @@ struct FindParameter {
     return FoundParameter;
   }
 };
-} // namespace
+}
 
 // Returns true when S contains at least a SCEVUnknown parameter.
 static inline bool
@@ -7867,19 +7867,23 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
 
 /// Third step of delinearization: compute the access functions for the
 /// Subscripts based on the dimensions in Sizes.
-void SCEVAddRecExpr::computeAccessFunctions(
-    ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Subscripts,
-    SmallVectorImpl<const SCEV *> &Sizes) const {
+void ScalarEvolution::computeAccessFunctions(
+    const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
+    SmallVectorImpl<const SCEV *> &Sizes) {
 
   // Early exit in case this SCEV is not an affine multivariate function.
-  if (Sizes.empty() || !this->isAffine())
+  if (Sizes.empty())
     return;
 
-  const SCEV *Res = this;
+  if (auto AR = dyn_cast<SCEVAddRecExpr>(Expr))
+    if (!AR->isAffine())
+      return;
+
+  const SCEV *Res = Expr;
   int Last = Sizes.size() - 1;
   for (int i = Last; i >= 0; i--) {
     const SCEV *Q, *R;
-    SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R);
+    SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R);
 
     DEBUG({
         dbgs() << "Res: " << *Res << "\n";
@@ -7971,31 +7975,31 @@ void SCEVAddRecExpr::computeAccessFunctions(
 /// asking for the SCEV of the memory access with respect to all enclosing
 /// loops, calling SCEV->delinearize on that and printing the results.
 
-void SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
+void ScalarEvolution::delinearize(const SCEV *Expr,
                                  SmallVectorImpl<const SCEV *> &Subscripts,
                                  SmallVectorImpl<const SCEV *> &Sizes,
-                                 const SCEV *ElementSize) const {
+                                 const SCEV *ElementSize) {
   // First step: collect parametric terms.
   SmallVector<const SCEV *, 4> Terms;
-  collectParametricTerms(SE, Terms);
+  collectParametricTerms(Expr, Terms);
 
   if (Terms.empty())
     return;
 
   // Second step: find subscript sizes.
-  SE.findArrayDimensions(Terms, Sizes, ElementSize);
+  findArrayDimensions(Terms, Sizes, ElementSize);
 
   if (Sizes.empty())
     return;
 
   // Third step: compute the access functions for each subscript.
-  computeAccessFunctions(SE, Subscripts, Sizes);
+  computeAccessFunctions(Expr, Subscripts, Sizes);
 
   if (Subscripts.empty())
     return;
 
   DEBUG({
-      dbgs() << "succeeded to delinearize " << *this << "\n";
+      dbgs() << "succeeded to delinearize " << *Expr << "\n";
       dbgs() << "ArrayDecl[UnknownSize]";
       for (const SCEV *S : Sizes)
         dbgs() << "[" << *S << "]";
@@ -8418,7 +8422,7 @@ struct SCEVSearch {
   }
   bool isDone() const { return IsFound; }
 };
-} // namespace
+}
 
 bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
   SCEVSearch Search(Op);
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 2d45c59..6bc0d85 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -107,9 +107,8 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
   return nullptr;
 }
 
-AliasAnalysis::AliasResult
-ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,
-                                    const MemoryLocation &LocB) {
+AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,
+                                                const MemoryLocation &LocB) {
   // If either of the memory references is empty, it doesn't matter what the
   // pointer values are. This allows the code below to ignore this special
   // case.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 0264ad1..fee2a2d 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -24,10 +24,12 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
+using namespace PatternMatch;
 
 /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
 /// reusing an existing cast if a suitable one exists, moving an existing
@@ -661,7 +663,7 @@ public:
   }
 };
 
-} // namespace
+}
 
 Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
   Type *Ty = SE.getEffectiveSCEVType(S->getType());
@@ -751,25 +753,30 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
   // out of loops.
   Value *Prod = nullptr;
   for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
-       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ++I) {
     const SCEV *Op = I->second;
     if (!Prod) {
       // This is the first operand. Just expand it.
       Prod = expand(Op);
-      ++I;
     } else if (Op->isAllOnesValue()) {
       // Instead of doing a multiply by negative one, just do a negate.
       Prod = InsertNoopCastOfTo(Prod, Ty);
       Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
-      ++I;
     } else {
       // A simple mul.
       Value *W = expandCodeFor(Op, Ty);
       Prod = InsertNoopCastOfTo(Prod, Ty);
       // Canonicalize a constant to the RHS.
       if (isa<Constant>(Prod)) std::swap(Prod, W);
-      Prod = InsertBinop(Instruction::Mul, Prod, W);
-      ++I;
+      const APInt *RHS;
+      if (match(W, m_Power2(RHS))) {
+        // Canonicalize Prod*(1<<C) to Prod<<C.
+        assert(!Ty->isVectorTy() && "vector types are not SCEVable");
+        Prod = InsertBinop(Instruction::Shl, Prod,
+                           ConstantInt::get(Ty, RHS->logBase2()));
+      } else {
+        Prod = InsertBinop(Instruction::Mul, Prod, W);
+      }
     }
   }
 
@@ -1933,7 +1940,7 @@ struct SCEVFindUnsafe {
   }
   bool isDone() const { return IsUnsafe; }
 };
-} // namespace
+}
 
 namespace llvm {
 bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp
index a8cfeb6..a5fca3e 100644
--- a/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/lib/Analysis/ScopedNoAliasAA.cpp
@@ -177,8 +177,8 @@ ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes,
   return true;
 }
 
-AliasAnalysis::AliasResult ScopedNoAliasAA::alias(const MemoryLocation &LocA,
-                                                  const MemoryLocation &LocB) {
+AliasResult ScopedNoAliasAA::alias(const MemoryLocation &LocA,
+                                   const MemoryLocation &LocB) {
   if (!EnableScopedNoAlias)
     return AliasAnalysis::alias(LocA, LocB);
 
diff --git a/lib/Analysis/StratifiedSets.h b/lib/Analysis/StratifiedSets.h
index 878ca3d..fd3fbc0 100644
--- a/lib/Analysis/StratifiedSets.h
+++ b/lib/Analysis/StratifiedSets.h
@@ -688,5 +688,5 @@ private:
 
   bool inbounds(StratifiedIndex N) const { return N < Links.size(); }
 };
-} // namespace llvm
+}
 #endif // LLVM_ADT_STRATIFIEDSETS_H
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 24cada3..520d1e5 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -284,6 +284,11 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
   return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
 }
 
+bool TargetTransformInfo::hasCompatibleFunctionAttributes(
+    const Function *Caller, const Function *Callee) const {
+  return TTIImpl->hasCompatibleFunctionAttributes(Caller, Callee);
+}
+
 TargetTransformInfo::Concept::~Concept() {}
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 82d29e0..4e9c6f6 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -270,7 +270,7 @@ namespace {
       return TBAAStructTypeNode(P);
     }
   };
-} // namespace
+}
 
 namespace {
   /// TypeBasedAliasAnalysis - This is a simple alias analysis
@@ -454,9 +454,8 @@ TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
   return false;
 }
 
-AliasAnalysis::AliasResult
-TypeBasedAliasAnalysis::alias(const MemoryLocation &LocA,
-                              const MemoryLocation &LocB) {
+AliasResult TypeBasedAliasAnalysis::alias(const MemoryLocation &LocA,
+                                          const MemoryLocation &LocB) {
   if (!EnableTBAA)
     return AliasAnalysis::alias(LocA, LocB);
 
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
new file mode 100644
index 0000000..96fddd1
--- /dev/null
+++ b/lib/Analysis/VectorUtils.cpp
@@ -0,0 +1,213 @@
+//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines vectorizer utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/VectorUtils.h"
+
+/// \brief Identify if the intrinsic is trivially vectorizable.
+/// This method returns true if the intrinsic's argument types are all
+/// scalars for the scalar form of the intrinsic and all vectors for
+/// the vector form of the intrinsic.
+bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
+  switch (ID) {
+  case Intrinsic::sqrt:
+  case Intrinsic::sin:
+  case Intrinsic::cos:
+  case Intrinsic::exp:
+  case Intrinsic::exp2:
+  case Intrinsic::log:
+  case Intrinsic::log10:
+  case Intrinsic::log2:
+  case Intrinsic::fabs:
+  case Intrinsic::minnum:
+  case Intrinsic::maxnum:
+  case Intrinsic::copysign:
+  case Intrinsic::floor:
+  case Intrinsic::ceil:
+  case Intrinsic::trunc:
+  case Intrinsic::rint:
+  case Intrinsic::nearbyint:
+  case Intrinsic::round:
+  case Intrinsic::bswap:
+  case Intrinsic::ctpop:
+  case Intrinsic::pow:
+  case Intrinsic::fma:
+  case Intrinsic::fmuladd:
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+  case Intrinsic::powi:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// \brief Identifies if the intrinsic has a scalar operand. It check for
+/// ctlz,cttz and powi special intrinsics whose argument is scalar.
+bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
+                                        unsigned ScalarOpdIdx) {
+  switch (ID) {
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+  case Intrinsic::powi:
+    return (ScalarOpdIdx == 1);
+  default:
+    return false;
+  }
+}
+
+/// \brief Check call has a unary float signature
+/// It checks following:
+/// a) call should have a single argument
+/// b) argument type should be floating point type
+/// c) call instruction type and argument type should be same
+/// d) call should only reads memory.
+/// If all these condition is met then return ValidIntrinsicID
+/// else return not_intrinsic.
+llvm::Intrinsic::ID
+llvm::checkUnaryFloatSignature(const CallInst &I,
+                               Intrinsic::ID ValidIntrinsicID) {
+  if (I.getNumArgOperands() != 1 ||
+      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+      I.getType() != I.getArgOperand(0)->getType() || !I.onlyReadsMemory())
+    return Intrinsic::not_intrinsic;
+
+  return ValidIntrinsicID;
+}
+
+/// \brief Check call has a binary float signature
+/// It checks following:
+/// a) call should have 2 arguments.
+/// b) arguments type should be floating point type
+/// c) call instruction type and arguments type should be same
+/// d) call should only reads memory.
+/// If all these condition is met then return ValidIntrinsicID
+/// else return not_intrinsic.
+llvm::Intrinsic::ID
+llvm::checkBinaryFloatSignature(const CallInst &I,
+                                Intrinsic::ID ValidIntrinsicID) {
+  if (I.getNumArgOperands() != 2 ||
+      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+      !I.getArgOperand(1)->getType()->isFloatingPointTy() ||
+      I.getType() != I.getArgOperand(0)->getType() ||
+      I.getType() != I.getArgOperand(1)->getType() || !I.onlyReadsMemory())
+    return Intrinsic::not_intrinsic;
+
+  return ValidIntrinsicID;
+}
+
+/// \brief Returns intrinsic ID for call.
+/// For the input call instruction it finds mapping intrinsic and returns
+/// its ID, in case it does not found it return not_intrinsic.
+llvm::Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI,
+                                                const TargetLibraryInfo *TLI) {
+  // If we have an intrinsic call, check if it is trivially vectorizable.
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+    Intrinsic::ID ID = II->getIntrinsicID();
+    if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
+        ID == Intrinsic::lifetime_end || ID == Intrinsic::assume)
+      return ID;
+    return Intrinsic::not_intrinsic;
+  }
+
+  if (!TLI)
+    return Intrinsic::not_intrinsic;
+
+  LibFunc::Func Func;
+  Function *F = CI->getCalledFunction();
+  // We're going to make assumptions on the semantics of the functions, check
+  // that the target knows that it's available in this environment and it does
+  // not have local linkage.
+  if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
+    return Intrinsic::not_intrinsic;
+
+  // Otherwise check if we have a call to a function that can be turned into a
+  // vector intrinsic.
+  switch (Func) {
+  default:
+    break;
+  case LibFunc::sin:
+  case LibFunc::sinf:
+  case LibFunc::sinl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::sin);
+  case LibFunc::cos:
+  case LibFunc::cosf:
+  case LibFunc::cosl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::cos);
+  case LibFunc::exp:
+  case LibFunc::expf:
+  case LibFunc::expl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::exp);
+  case LibFunc::exp2:
+  case LibFunc::exp2f:
+  case LibFunc::exp2l:
+    return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
+  case LibFunc::log:
+  case LibFunc::logf:
+  case LibFunc::logl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::log);
+  case LibFunc::log10:
+  case LibFunc::log10f:
+  case LibFunc::log10l:
+    return checkUnaryFloatSignature(*CI, Intrinsic::log10);
+  case LibFunc::log2:
+  case LibFunc::log2f:
+  case LibFunc::log2l:
+    return checkUnaryFloatSignature(*CI, Intrinsic::log2);
+  case LibFunc::fabs:
+  case LibFunc::fabsf:
+  case LibFunc::fabsl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
+  case LibFunc::fmin:
+  case LibFunc::fminf:
+  case LibFunc::fminl:
+    return checkBinaryFloatSignature(*CI, Intrinsic::minnum);
+  case LibFunc::fmax:
+  case LibFunc::fmaxf:
+  case LibFunc::fmaxl:
+    return checkBinaryFloatSignature(*CI, Intrinsic::maxnum);
+  case LibFunc::copysign:
+  case LibFunc::copysignf:
+  case LibFunc::copysignl:
+    return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
+  case LibFunc::floor:
+  case LibFunc::floorf:
+  case LibFunc::floorl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::floor);
+  case LibFunc::ceil:
+  case LibFunc::ceilf:
+  case LibFunc::ceill:
+    return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
+  case LibFunc::trunc:
+  case LibFunc::truncf:
+  case LibFunc::truncl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
+  case LibFunc::rint:
+  case LibFunc::rintf:
+  case LibFunc::rintl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::rint);
+  case LibFunc::nearbyint:
+  case LibFunc::nearbyintf:
+  case LibFunc::nearbyintl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
+  case LibFunc::round:
+  case LibFunc::roundf:
+  case LibFunc::roundl:
+    return checkUnaryFloatSignature(*CI, Intrinsic::round);
+  case LibFunc::pow:
+  case LibFunc::powf:
+  case LibFunc::powl:
+    return checkBinaryFloatSignature(*CI, Intrinsic::pow);
+  }
+
+  return Intrinsic::not_intrinsic;
+}
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 0bdc350..88f359d 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -903,20 +903,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
   if (CurPtr[0] != '.') {
     if (TokStart[0] == '0' && TokStart[1] == 'x')
       return Lex0x();
-    unsigned Len = CurPtr-TokStart;
-    uint32_t numBits = ((Len * 64) / 19) + 2;
-    APInt Tmp(numBits, StringRef(TokStart, Len), 10);
-    if (TokStart[0] == '-') {
-      uint32_t minBits = Tmp.getMinSignedBits();
-      if (minBits > 0 && minBits < numBits)
-        Tmp = Tmp.trunc(minBits);
-      APSIntVal = APSInt(Tmp, false);
-    } else {
-      uint32_t activeBits = Tmp.getActiveBits();
-      if (activeBits > 0 && activeBits < numBits)
-        Tmp = Tmp.trunc(activeBits);
-      APSIntVal = APSInt(Tmp, true);
-    }
+    APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart));
     return lltok::APSInt;
   }
 
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index a121e59..b3c7fa0 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -13,6 +13,7 @@
 
 #include "LLParser.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/AsmParser/SlotMapping.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
@@ -161,6 +162,14 @@ bool LLParser::ValidateEndOfModule() {
 
   UpgradeDebugInfo(*M);
 
+  if (!Slots)
+    return false;
+  // Initialize the slot mapping.
+  // Because by this point we've parsed and validated everything, we can "steal"
+  // the mapping from LLParser as it doesn't need it anymore.
+  Slots->GlobalValues = std::move(NumberedVals);
+  Slots->MetadataNodes = std::move(NumberedMetadata);
+
   return false;
 }
 
@@ -3667,6 +3676,24 @@ bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) {
   return false;
 }
 
+/// ParseDIModule:
+///   ::= !DIModule(scope: !0, name: "SomeModule", configMacros: "-DNDEBUG",
+///                 includePath: "/usr/include", isysroot: "/")
+bool LLParser::ParseDIModule(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(scope, MDField, );                                                  \
+  REQUIRED(name, MDStringField, );                                             \
+  OPTIONAL(configMacros, MDStringField, );                                     \
+  OPTIONAL(includePath, MDStringField, );                                      \
+  OPTIONAL(isysroot, MDStringField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DIModule, (Context, scope.Val, name.Val,
+                           configMacros.Val, includePath.Val, isysroot.Val));
+  return false;
+}
+
 /// ParseDITemplateTypeParameter:
 ///   ::= !DITemplateTypeParameter(name: "Ty", type: !1)
 bool LLParser::ParseDITemplateTypeParameter(MDNode *&Result, bool IsDistinct) {
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 9f554c0..6e57b3e 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -37,6 +37,7 @@ namespace llvm {
   class Comdat;
   class MDString;
   class MDNode;
+  struct SlotMapping;
   class StructType;
 
   /// ValID - Represents a reference of a definition of some sort with no type.
@@ -87,6 +88,7 @@ namespace llvm {
     LLVMContext &Context;
     LLLexer Lex;
     Module *M;
+    SlotMapping *Slots;
 
     // Instruction metadata resolution.  Each instruction can have a list of
     // MDRef info associated with them.
@@ -135,9 +137,10 @@ namespace llvm {
     std::map<unsigned, AttrBuilder> NumberedAttrBuilders;
 
   public:
-    LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *m)
-        : Context(m->getContext()), Lex(F, SM, Err, m->getContext()), M(m),
-          BlockAddressPFS(nullptr) {}
+    LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *M,
+             SlotMapping *Slots = nullptr)
+        : Context(M->getContext()), Lex(F, SM, Err, M->getContext()), M(M),
+          Slots(Slots), BlockAddressPFS(nullptr) {}
     bool Run();
 
     LLVMContext &getContext() { return Context; }
@@ -469,6 +472,6 @@ namespace llvm {
     bool ParseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes);
     bool sortUseListOrder(Value *V, ArrayRef<unsigned> Indexes, SMLoc Loc);
   };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index c55a6a1..9145a54 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -22,21 +22,23 @@
 #include <system_error>
 using namespace llvm;
 
-bool llvm::parseAssemblyInto(MemoryBufferRef F, Module &M, SMDiagnostic &Err) {
+bool llvm::parseAssemblyInto(MemoryBufferRef F, Module &M, SMDiagnostic &Err,
+                             SlotMapping *Slots) {
   SourceMgr SM;
   std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(F);
   SM.AddNewSourceBuffer(std::move(Buf), SMLoc());
 
-  return LLParser(F.getBuffer(), SM, Err, &M).Run();
+  return LLParser(F.getBuffer(), SM, Err, &M, Slots).Run();
 }
 
 std::unique_ptr<Module> llvm::parseAssembly(MemoryBufferRef F,
                                             SMDiagnostic &Err,
-                                            LLVMContext &Context) {
+                                            LLVMContext &Context,
+                                            SlotMapping *Slots) {
   std::unique_ptr<Module> M =
       make_unique<Module>(F.getBufferIdentifier(), Context);
 
-  if (parseAssemblyInto(F, *M, Err))
+  if (parseAssemblyInto(F, *M, Err, Slots))
     return nullptr;
 
   return M;
@@ -44,7 +46,8 @@ std::unique_ptr<Module> llvm::parseAssembly(MemoryBufferRef F,
 
 std::unique_ptr<Module> llvm::parseAssemblyFile(StringRef Filename,
                                                 SMDiagnostic &Err,
-                                                LLVMContext &Context) {
+                                                LLVMContext &Context,
+                                                SlotMapping *Slots) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
       MemoryBuffer::getFileOrSTDIN(Filename);
   if (std::error_code EC = FileOrErr.getError()) {
@@ -53,12 +56,13 @@ std::unique_ptr<Module> llvm::parseAssemblyFile(StringRef Filename,
     return nullptr;
   }
 
-  return parseAssembly(FileOrErr.get()->getMemBufferRef(), Err, Context);
+  return parseAssembly(FileOrErr.get()->getMemBufferRef(), Err, Context, Slots);
 }
 
 std::unique_ptr<Module> llvm::parseAssemblyString(StringRef AsmString,
                                                   SMDiagnostic &Err,
-                                                  LLVMContext &Context) {
+                                                  LLVMContext &Context,
+                                                  SlotMapping *Slots) {
   MemoryBufferRef F(AsmString, "<string>");
-  return parseAssembly(F, Err, Context);
+  return parseAssembly(F, Err, Context, Slots);
 }
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 0cadd6c..09f0b68 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -136,7 +136,6 @@ class BitcodeReader : public GVMaterializer {
   std::unique_ptr<MemoryBuffer> Buffer;
   std::unique_ptr<BitstreamReader> StreamFile;
   BitstreamCursor Stream;
-  bool IsStreamed;
   uint64_t NextUnreadBit = 0;
   bool SeenValueSymbolTable = false;
 
@@ -171,7 +170,7 @@ class BitcodeReader : public GVMaterializer {
 
   // When intrinsic functions are encountered which require upgrading they are
   // stored here with their replacement function.
-  typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
+  typedef DenseMap<Function*, Function*> UpgradedIntrinsicMap;
   UpgradedIntrinsicMap UpgradedIntrinsics;
 
   // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
@@ -428,15 +427,13 @@ BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context,
                              DiagnosticHandlerFunction DiagnosticHandler)
     : Context(Context),
       DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)),
-      Buffer(Buffer), IsStreamed(false), ValueList(Context),
-      MDValueList(Context) {}
+      Buffer(Buffer), ValueList(Context), MDValueList(Context) {}
 
 BitcodeReader::BitcodeReader(LLVMContext &Context,
                              DiagnosticHandlerFunction DiagnosticHandler)
     : Context(Context),
       DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)),
-      Buffer(nullptr), IsStreamed(true), ValueList(Context),
-      MDValueList(Context) {}
+      Buffer(nullptr), ValueList(Context), MDValueList(Context) {}
 
 std::error_code BitcodeReader::materializeForwardReferencedFunctions() {
   if (WillMaterializeAllForwardRefs)
@@ -731,7 +728,7 @@ public:
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 };
-} // namespace
+}
 
 // FIXME: can we inherit this from ConstantExpr?
 template <>
@@ -739,7 +736,7 @@ struct OperandTraits<ConstantPlaceHolder> :
   public FixedNumOperandTraits<ConstantPlaceHolder, 1> {
 };
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantPlaceHolder, Value)
-} // namespace llvm
+}
 
 void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) {
   if (Idx == size()) {
@@ -1830,6 +1827,20 @@ std::error_code BitcodeReader::parseMetadata() {
           NextMDValueNo++);
       break;
     }
+
+    case bitc::METADATA_MODULE: {
+      if (Record.size() != 6)
+        return error("Invalid record");
+
+      MDValueList.assignValue(
+          GET_OR_DISTINCT(DIModule, Record[0],
+                          (Context, getMDOrNull(Record[1]),
+                          getMDString(Record[2]), getMDString(Record[3]),
+                          getMDString(Record[4]), getMDString(Record[5]))),
+          NextMDValueNo++);
+      break;
+    }
+
     case bitc::METADATA_FILE: {
       if (Record.size() != 3)
         return error("Invalid record");
@@ -2699,7 +2710,7 @@ std::error_code BitcodeReader::globalCleanup() {
   for (Function &F : *TheModule) {
     Function *NewFn;
     if (UpgradeIntrinsicFunction(&F, NewFn))
-      UpgradedIntrinsics.push_back(std::make_pair(&F, NewFn));
+      UpgradedIntrinsics[&F] = NewFn;
   }
 
   // Look for global variables which need to be renamed.
@@ -2789,13 +2800,11 @@ std::error_code BitcodeReader::parseModule(bool Resume,
 
         if (std::error_code EC = rememberAndSkipFunctionBody())
           return EC;
-        // For streaming bitcode, suspend parsing when we reach the function
-        // bodies. Subsequent materialization calls will resume it when
-        // necessary. For streaming, the function bodies must be at the end of
-        // the bitcode. If the bitcode file is old, the symbol table will be
-        // at the end instead and will not have been seen yet. In this case,
-        // just finish the parse now.
-        if (IsStreamed && SeenValueSymbolTable) {
+        // Suspend parsing when we reach the function bodies. Subsequent
+        // materialization calls will resume it when necessary. If the bitcode
+        // file is old, the symbol table will be at the end instead and will not
+        // have been seen yet. In this case, just finish the parse now.
+        if (SeenValueSymbolTable) {
           NextUnreadBit = Stream.GetCurrentBitNo();
           return std::error_code();
         }
@@ -3049,8 +3058,7 @@ std::error_code BitcodeReader::parseModule(bool Resume,
       if (!isProto) {
         Func->setIsMaterializable(true);
         FunctionsWithBodies.push_back(Func);
-        if (IsStreamed)
-          DeferredFunctionInfo[Func] = 0;
+        DeferredFunctionInfo[Func] = 0;
       }
       break;
     }
@@ -4434,7 +4442,7 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) {
   assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
   // If its position is recorded as 0, its body is somewhere in the stream
   // but we haven't seen it yet.
-  if (DFII->second == 0 && IsStreamed)
+  if (DFII->second == 0)
     if (std::error_code EC = findFunctionInStream(F, DFII))
       return EC;
 
@@ -4449,13 +4457,14 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) {
     stripDebugInfo(*F);
 
   // Upgrade any old intrinsic calls in the function.
-  for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(),
-       E = UpgradedIntrinsics.end(); I != E; ++I) {
-    if (I->first != I->second) {
-      for (auto UI = I->first->user_begin(), UE = I->first->user_end();
+  for (auto &I : UpgradedIntrinsics) {
+    if (I.first != I.second) {
+      for (auto UI = I.first->user_begin(), UE = I.first->user_end();
            UI != UE;) {
-        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
-          UpgradeIntrinsicCall(CI, I->second);
+        User *U = *UI;
+        ++UI;
+        if (CallInst *CI = dyn_cast<CallInst>(U))
+          UpgradeIntrinsicCall(CI, I.second);
       }
     }
   }
@@ -4523,20 +4532,18 @@ std::error_code BitcodeReader::materializeModule(Module *M) {
   // delete the old functions to clean up. We can't do this unless the entire
   // module is materialized because there could always be another function body
   // with calls to the old function.
-  for (std::vector<std::pair<Function*, Function*> >::iterator I =
-       UpgradedIntrinsics.begin(), E = UpgradedIntrinsics.end(); I != E; ++I) {
-    if (I->first != I->second) {
-      for (auto UI = I->first->user_begin(), UE = I->first->user_end();
-           UI != UE;) {
-        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
-          UpgradeIntrinsicCall(CI, I->second);
+  for (auto &I : UpgradedIntrinsics) {
+    if (I.first != I.second) {
+      for (auto *U : I.first->users()) {
+        if (CallInst *CI = dyn_cast<CallInst>(U))
+          UpgradeIntrinsicCall(CI, I.second);
       }
-      if (!I->first->use_empty())
-        I->first->replaceAllUsesWith(I->second);
-      I->first->eraseFromParent();
+      if (!I.first->use_empty())
+        I.first->replaceAllUsesWith(I.second);
+      I.first->eraseFromParent();
     }
   }
-  std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
+  UpgradedIntrinsics.clear();
 
   for (unsigned I = 0, E = InstsWithTBAATag.size(); I < E; I++)
     UpgradeInstWithTBAATag(InstsWithTBAATag[I]);
@@ -4618,7 +4625,7 @@ class BitcodeErrorCategoryType : public std::error_category {
     llvm_unreachable("Unknown error type!");
   }
 };
-} // namespace
+}
 
 static ManagedStatic<BitcodeErrorCategoryType> ErrorCategory;
 
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index e79eeb0..622f7ea 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1032,6 +1032,17 @@ static void WriteDINamespace(const DINamespace *N, const ValueEnumerator &VE,
   Record.clear();
 }
 
+static void WriteDIModule(const DIModule *N, const ValueEnumerator &VE,
+                          BitstreamWriter &Stream,
+                          SmallVectorImpl<uint64_t> &Record, unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  for (auto &I : N->operands())
+    Record.push_back(VE.getMetadataOrNullID(I));
+
+  Stream.EmitRecord(bitc::METADATA_MODULE, Record, Abbrev);
+  Record.clear();
+}
+
 static void WriteDITemplateTypeParameter(const DITemplateTypeParameter *N,
                                          const ValueEnumerator &VE,
                                          BitstreamWriter &Stream,
@@ -1519,8 +1530,8 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
     } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
                isa<ConstantVector>(C)) {
       Code = bitc::CST_CODE_AGGREGATE;
-      for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
-        Record.push_back(VE.getValueID(C->getOperand(i)));
+      for (const Value *Op : C->operands())
+        Record.push_back(VE.getValueID(Op));
       AbbrevToUse = AggregateAbbrev;
     } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
       switch (CE->getOpcode()) {
diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index c890380..3165743 100644
--- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -41,7 +41,7 @@ namespace {
       return false;
     }
   };
-} // namespace
+}
 
 char WriteBitcodePass::ID = 0;
 
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 53c3a40..44dd604 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -52,7 +52,7 @@ struct OrderMap {
     IDs[V].first = ID;
   }
 };
-} // namespace
+}
 
 static void orderValue(const Value *V, OrderMap &OM) {
   if (OM.lookup(V).first)
@@ -691,9 +691,7 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) {
 
   // This constant may have operands, make sure to enumerate the types in
   // them.
-  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
-    const Value *Op = C->getOperand(i);
-
+  for (const Value *Op : C->operands()) {
     // Don't enumerate basic blocks here, this happens as operands to
     // blockaddress.
     if (isa<BasicBlock>(Op))
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index b2daa48..92d166e 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -203,6 +203,6 @@ private:
   void EnumerateNamedMetadata(const Module &M);
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 63d2085..eba7383 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -33,7 +33,7 @@ namespace llvm {
 class RegisterClassInfo;
 
   /// Contains all the state necessary for anti-dep breaking.
-  class AggressiveAntiDepState {
+class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
   public:
     /// Information about a register reference within a liverange
     typedef struct {
@@ -108,8 +108,8 @@ class RegisterClassInfo;
     bool IsLive(unsigned Reg);
   };
 
-
-  class AggressiveAntiDepBreaker : public AntiDepBreaker {
+  class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepBreaker
+      : public AntiDepBreaker {
     MachineFunction& MF;
     MachineRegisterInfo &MRI;
     const TargetInstrInfo *TII;
@@ -174,6 +174,6 @@ class RegisterClassInfo;
                                    RenameOrderType& RenameOrder,
                                    std::map<unsigned, unsigned> &RenameMap);
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 1e4eaa7..02b2d92 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -25,7 +25,7 @@ namespace llvm {
 class RegisterClassInfo;
 class VirtRegMap;
 
-class AllocationOrder {
+class LLVM_LIBRARY_VISIBILITY AllocationOrder {
   SmallVector<MCPhysReg, 16> Hints;
   ArrayRef<MCPhysReg> Order;
   int Pos;
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index 7985241..9f05200 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -27,7 +27,7 @@ namespace llvm {
 
 /// This class works in conjunction with the post-RA scheduler to rename
 /// registers to break register anti-dependencies (WAR hazards).
-class AntiDepBreaker {
+class LLVM_LIBRARY_VISIBILITY AntiDepBreaker {
 public:
   typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > 
     DbgValueVector;
@@ -62,6 +62,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h
index e0ce3f9..211fc98 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -48,5 +48,5 @@ public:
 
   void resetUsedFlag() { HasBeenUsed = false; }
 };
-} // namespace llvm
+}
 #endif
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 95da588..8a7e9f9 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -179,7 +179,7 @@ bool AsmPrinter::doInitialization(Module &M) {
 
   OutStreamer->InitSections(false);
 
-  Mang = new Mangler(TM.getDataLayout());
+  Mang = new Mangler();
 
   // Emit the version-min deplyment target directive if needed.
   //
@@ -2086,8 +2086,12 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
   MCValue MV;
   if (!(*ME)->evaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute())
     return;
+  const MCSymbolRefExpr *SymA = MV.getSymA();
+  if (!SymA)
+    return;
 
-  const MCSymbol *GOTEquivSym = &MV.getSymA()->getSymbol();
+  // Check that GOT equivalent symbol is cached.
+  const MCSymbol *GOTEquivSym = &SymA->getSymbol();
   if (!AP.GlobalGOTEquivs.count(GOTEquivSym))
     return;
 
@@ -2095,8 +2099,11 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
   if (!BaseGV)
     return;
 
+  // Check for a valid base symbol
   const MCSymbol *BaseSym = AP.getSymbol(BaseGV);
-  if (BaseSym != &MV.getSymB()->getSymbol())
+  const MCSymbolRefExpr *SymB = MV.getSymB();
+
+  if (!SymB || BaseSym != &SymB->getSymbol())
     return;
 
   // Make sure to match:
@@ -2292,11 +2299,10 @@ MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
                                                            TM);
 }
 
-/// GetExternalSymbolSymbol - Return the MCSymbol for the specified
-/// ExternalSymbol.
+/// Return the MCSymbol for the specified ExternalSymbol.
 MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
   SmallString<60> NameStr;
-  Mang->getNameWithPrefix(NameStr, Sym);
+  Mangler::getNameWithPrefix(NameStr, Sym, *TM.getDataLayout());
   return OutContext.getOrCreateSymbol(NameStr);
 }
 
@@ -2384,8 +2390,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
     if (isVerbose())
       OutStreamer->AddComment("Block address taken");
 
-    std::vector<MCSymbol*> Symbols = MMI->getAddrLabelSymbolToEmit(BB);
-    for (auto *Sym : Symbols)
+    for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
       OutStreamer->EmitLabel(Sym);
   }
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 8ee613b..ad180b6 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -261,8 +261,7 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
   // Emit the DIE attribute values.
   for (const auto &V : Die.values()) {
     dwarf::Attribute Attr = V.getAttribute();
-    dwarf::Form Form = V.getForm();
-    assert(Form && "Too many attributes for DIE (check abbreviation)");
+    assert(V.getForm() && "Too many attributes for DIE (check abbreviation)");
 
     if (isVerbose()) {
       OutStreamer->AddComment(dwarf::AttributeString(Attr));
@@ -272,13 +271,13 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
     }
 
     // Emit an attribute using the defined form.
-    V.EmitValue(this, Form);
+    V.EmitValue(this);
   }
 
   // Emit the DIE children if any.
   if (Die.hasChildren()) {
     for (auto &Child : Die.children())
-      emitDwarfDIE(*Child);
+      emitDwarfDIE(Child);
 
     OutStreamer->AddComment("End Of Children Mark");
     EmitInt8(0);
diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 7a712a0..0cc829f 100644
--- a/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -103,6 +103,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index f2da855..ba2f61a 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMAsmPrinter
   AsmPrinterDwarf.cpp
   AsmPrinterInlineAsm.cpp
   DbgValueHistoryCalculator.cpp
+  DebugLocStream.cpp
   DIE.cpp
   DIEHash.cpp
   DwarfAccelTable.cpp
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 4847de4..46dbc76 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -165,25 +165,23 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const {
   }
 
   IndentCount += 2;
-  for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+  unsigned I = 0;
+  for (const auto &V : Values) {
     O << Indent;
 
     if (!isBlock)
-      O << dwarf::AttributeString(Values[i].getAttribute());
+      O << dwarf::AttributeString(V.getAttribute());
     else
-      O << "Blk[" << i << "]";
+      O << "Blk[" << I++ << "]";
 
-    O <<  "  "
-      << dwarf::FormEncodingString(Values[i].getForm())
-      << " ";
-    Values[i].print(O);
+    O << "  " << dwarf::FormEncodingString(V.getForm()) << " ";
+    V.print(O);
     O << "\n";
   }
   IndentCount -= 2;
 
-  for (unsigned j = 0, M = Children.size(); j < M; ++j) {
-    Children[j]->print(O, IndentCount+4);
-  }
+  for (const auto &Child : children())
+    Child.print(O, IndentCount + 4);
 
   if (!isBlock) O << "\n";
 }
@@ -193,7 +191,7 @@ void DIE::dump() {
 }
 #endif
 
-void DIEValue::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+void DIEValue::EmitValue(const AsmPrinter *AP) const {
   switch (Ty) {
   case isNone:
     llvm_unreachable("Expected valid DIEValue");
@@ -205,7 +203,7 @@ void DIEValue::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
   }
 }
 
-unsigned DIEValue::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEValue::SizeOf(const AsmPrinter *AP) const {
   switch (Ty) {
   case isNone:
     llvm_unreachable("Expected valid DIEValue");
@@ -507,8 +505,8 @@ void DIETypeSignature::print(raw_ostream &O) const {
 ///
 unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
   if (!Size) {
-    for (unsigned i = 0, N = Values.size(); i < N; ++i)
-      Size += Values[i].SizeOf(AP, Values[i].getForm());
+    for (const auto &V : Values)
+      Size += V.SizeOf(AP);
   }
 
   return Size;
@@ -527,8 +525,8 @@ void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
     Asm->EmitULEB128(Size); break;
   }
 
-  for (unsigned i = 0, N = Values.size(); i < N; ++i)
-    Values[i].EmitValue(Asm, Values[i].getForm());
+  for (const auto &V : Values)
+    V.EmitValue(Asm);
 }
 
 /// SizeOf - Determine size of location data in bytes.
@@ -560,8 +558,8 @@ void DIELoc::print(raw_ostream &O) const {
 ///
 unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
   if (!Size) {
-    for (unsigned i = 0, N = Values.size(); i < N; ++i)
-      Size += Values[i].SizeOf(AP, Values[i].getForm());
+    for (const auto &V : Values)
+      Size += V.SizeOf(AP);
   }
 
   return Size;
@@ -578,8 +576,8 @@ void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
   case dwarf::DW_FORM_block:  Asm->EmitULEB128(Size); break;
   }
 
-  for (unsigned i = 0, N = Values.size(); i < N; ++i)
-    Values[i].EmitValue(Asm, Values[i].getForm());
+  for (const auto &V : Values)
+    V.EmitValue(Asm);
 }
 
 /// SizeOf - Determine size of block data in bytes.
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 1445254..5e60156 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -263,7 +263,7 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
 
 // Hash all of the values in a block like set of values. This assumes that
 // all of the data is going to be added as integers.
-void DIEHash::hashBlockData(const DIE::value_range &Values) {
+void DIEHash::hashBlockData(const DIE::const_value_range &Values) {
   for (const auto &V : Values)
     Hash.update((uint64_t)V.getDIEInteger().getValue());
 }
@@ -454,15 +454,15 @@ void DIEHash::computeHash(const DIE &Die) {
   for (auto &C : Die.children()) {
     // 7.27 Step 7
     // If C is a nested type entry or a member function entry, ...
-    if (isType(C->getTag()) || C->getTag() == dwarf::DW_TAG_subprogram) {
-      StringRef Name = getDIEStringAttr(*C, dwarf::DW_AT_name);
+    if (isType(C.getTag()) || C.getTag() == dwarf::DW_TAG_subprogram) {
+      StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name);
       // ... and has a DW_AT_name attribute
       if (!Name.empty()) {
-        hashNestedType(*C, Name);
+        hashNestedType(C, Name);
         continue;
       }
     }
-    computeHash(*C);
+    computeHash(C);
   }
 
   // Following the last (or if there are no children), append a zero byte.
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h
index 789e6dd..833ca02 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -128,7 +128,7 @@ private:
 
   /// \brief Hashes the data in a block like DIEValue, e.g. DW_FORM_block or
   /// DW_FORM_exprloc.
-  void hashBlockData(const DIE::value_range &Values);
+  void hashBlockData(const DIE::const_value_range &Values);
 
   /// \brief Hashes the contents pointed to in the .debug_loc section.
   void hashLocList(const DIELocList &LocList);
@@ -157,6 +157,6 @@ private:
   AsmPrinter *AP;
   DenseMap<const DIE *, unsigned> Numbering;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
index 5d40050..546d1b4 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -55,6 +55,6 @@ public:
 void calculateDbgValueHistory(const MachineFunction *MF,
                               const TargetRegisterInfo *TRI,
                               DbgValueHistoryMap &Result);
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 083228b..afffa83 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -142,7 +142,7 @@ public:
   }
 
   /// \brief Lower this entry into a DWARF expression.
-  void finalize(const AsmPrinter &AP, DebugLocStream &Locs,
+  void finalize(const AsmPrinter &AP, DebugLocStream::ListBuilder &List,
                 const DIBasicType *BT);
 };
 
@@ -175,6 +175,6 @@ inline bool operator<(const DebugLocEntry::Value &A,
          B.getExpression()->getBitPieceOffset();
 }
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
new file mode 100644
index 0000000..7e8ed71
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
@@ -0,0 +1,46 @@
+//===- DebugLocStream.cpp - DWARF debug_loc stream --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DebugLocStream.h"
+#include "DwarfDebug.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+
+using namespace llvm;
+
+bool DebugLocStream::finalizeList(AsmPrinter &Asm) {
+  if (Lists.back().EntryOffset == Entries.size()) {
+    // Empty list.  Delete it.
+    Lists.pop_back();
+    return false;
+  }
+
+  // Real list.  Generate a label for it.
+  Lists.back().Label = Asm.createTempSymbol("debug_loc");
+  return true;
+}
+
+void DebugLocStream::finalizeEntry() {
+  if (Entries.back().ByteOffset != DWARFBytes.size())
+    return;
+
+  // The last entry was empty.  Delete it.
+  Comments.erase(Comments.begin() + Entries.back().CommentOffset,
+                 Comments.end());
+  Entries.pop_back();
+
+  assert(Lists.back().EntryOffset <= Entries.size() &&
+         "Popped off more entries than are in the list");
+}
+
+DebugLocStream::ListBuilder::~ListBuilder() {
+  if (!Locs.finalizeList(Asm))
+    return;
+  V.initializeDbgValue(&MI);
+  V.setDebugLocListIndex(ListIndex);
+}
diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.h b/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 1ae385d..3656e9d 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -15,7 +15,11 @@
 #include "ByteStreamer.h"
 
 namespace llvm {
+
+class AsmPrinter;
+class DbgVariable;
 class DwarfCompileUnit;
+class MachineInstr;
 class MCSymbol;
 
 /// \brief Byte stream of .debug_loc entries.
@@ -29,10 +33,10 @@ class DebugLocStream {
 public:
   struct List {
     DwarfCompileUnit *CU;
-    MCSymbol *Label;
+    MCSymbol *Label = nullptr;
     size_t EntryOffset;
-    List(DwarfCompileUnit *CU, MCSymbol *Label, size_t EntryOffset)
-        : CU(CU), Label(Label), EntryOffset(EntryOffset) {}
+    List(DwarfCompileUnit *CU, size_t EntryOffset)
+        : CU(CU), EntryOffset(EntryOffset) {}
   };
   struct Entry {
     const MCSymbol *BeginSym;
@@ -61,18 +65,30 @@ public:
   const List &getList(size_t LI) const { return Lists[LI]; }
   ArrayRef<List> getLists() const { return Lists; }
 
+  class ListBuilder;
+  class EntryBuilder;
+
+private:
   /// \brief Start a new .debug_loc entry list.
   ///
   /// Start a new .debug_loc entry list.  Return the new list's index so it can
   /// be retrieved later via \a getList().
   ///
   /// Until the next call, \a startEntry() will add entries to this list.
-  size_t startList(DwarfCompileUnit *CU, MCSymbol *Label) {
+  size_t startList(DwarfCompileUnit *CU) {
     size_t LI = Lists.size();
-    Lists.emplace_back(CU, Label, Entries.size());
+    Lists.emplace_back(CU, Entries.size());
     return LI;
   }
 
+  /// Finalize a .debug_loc entry list.
+  ///
+  /// If there are no entries in this list, delete it outright.  Otherwise,
+  /// create a label with \a Asm.
+  ///
+  /// \return false iff the list is deleted.
+  bool finalizeList(AsmPrinter &Asm);
+
   /// \brief Start a new .debug_loc entry.
   ///
   /// Until the next call, bytes added to the stream will be added to this
@@ -81,6 +97,10 @@ public:
     Entries.emplace_back(BeginSym, EndSym, DWARFBytes.size(), Comments.size());
   }
 
+  /// Finalize a .debug_loc entry, deleting if it's empty.
+  void finalizeEntry();
+
+public:
   BufferByteStreamer getStreamer() {
     return BufferByteStreamer(DWARFBytes, Comments, GenerateComments);
   }
@@ -129,5 +149,45 @@ private:
     return Entries[EI + 1].CommentOffset - Entries[EI].CommentOffset;
   }
 };
+
+/// Builder for DebugLocStream lists.
+class DebugLocStream::ListBuilder {
+  DebugLocStream &Locs;
+  AsmPrinter &Asm;
+  DbgVariable &V;
+  const MachineInstr &MI;
+  size_t ListIndex;
+
+public:
+  ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm,
+              DbgVariable &V, const MachineInstr &MI)
+      : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)) {}
+
+  /// Finalize the list.
+  ///
+  /// If the list is empty, delete it.  Otherwise, finalize it by creating a
+  /// temp symbol in \a Asm and setting up the \a DbgVariable.
+  ~ListBuilder();
+
+  DebugLocStream &getLocs() { return Locs; }
+};
+
+/// Builder for DebugLocStream entries.
+class DebugLocStream::EntryBuilder {
+  DebugLocStream &Locs;
+
+public:
+  EntryBuilder(ListBuilder &List, const MCSymbol *Begin, const MCSymbol *End)
+      : Locs(List.getLocs()) {
+    Locs.startEntry(Begin, End);
+  }
+
+  /// Finalize the entry, deleting it if it's empty.
+  ~EntryBuilder() { Locs.finalizeEntry(); }
+
+  BufferByteStreamer getStreamer() { return Locs.getStreamer(); }
+};
+
 } // namespace llvm
+
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index cc677c2..4d81441 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -252,5 +252,5 @@ public:
   void dump() { print(dbgs()); }
 #endif
 };
-} // namespace llvm
+}
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 45c56fb..fc54a29 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -42,7 +42,8 @@ void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
     DD->addArangeLabel(SymbolCU(this, Label));
 
   unsigned idx = DD->getAddressPool().getIndex(Label);
-  Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, DIEInteger(idx));
+  Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_GNU_addr_index,
+               DIEInteger(idx));
 }
 
 void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
@@ -52,9 +53,11 @@ void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
     DD->addArangeLabel(SymbolCU(this, Label));
 
   if (Label)
-    Die.addValue(Attribute, dwarf::DW_FORM_addr, DIELabel(Label));
+    Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr,
+                 DIELabel(Label));
   else
-    Die.addValue(Attribute, dwarf::DW_FORM_addr, DIEInteger(0));
+    Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr,
+                 DIEInteger(0));
 }
 
 unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName,
@@ -225,16 +228,15 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
   CURanges.back().setEnd(Range.getEnd());
 }
 
-void DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
-                                       const MCSymbol *Label,
-                                       const MCSymbol *Sec) {
+DIE::value_iterator
+DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+                                  const MCSymbol *Label, const MCSymbol *Sec) {
   if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-    addLabel(Die, Attribute,
-             DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
-                                        : dwarf::DW_FORM_data4,
-             Label);
-  else
-    addSectionDelta(Die, Attribute, Label, Sec);
+    return addLabel(Die, Attribute,
+                    DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+                                               : dwarf::DW_FORM_data4,
+                    Label);
+  return addSectionDelta(Die, Attribute, Label, Sec);
 }
 
 void DwarfCompileUnit::initStmtList() {
@@ -242,20 +244,19 @@ void DwarfCompileUnit::initStmtList() {
   MCSymbol *LineTableStartSym =
       Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID());
 
-  stmtListIndex = std::distance(UnitDie.values_begin(), UnitDie.values_end());
-
   // DW_AT_stmt_list is a offset of line number information for this
   // compile unit in debug_line section. For split dwarf this is
   // left in the skeleton CU and so not included.
   // The line table entries are not always emitted in assembly, so it
   // is not okay to use line_table_start here.
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-  addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
-                  TLOF.getDwarfLineSection()->getBeginSymbol());
+  StmtListValue =
+      addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
+                      TLOF.getDwarfLineSection()->getBeginSymbol());
 }
 
 void DwarfCompileUnit::applyStmtList(DIE &D) {
-  D.addValue(UnitDie.values_begin()[stmtListIndex]);
+  D.addValue(DIEValueAllocator, *StmtListValue);
 }
 
 void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
@@ -300,7 +301,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
 
 // Construct a DIE for this scope.
 void DwarfCompileUnit::constructScopeDIE(
-    LexicalScope *Scope, SmallVectorImpl<std::unique_ptr<DIE>> &FinalChildren) {
+    LexicalScope *Scope, SmallVectorImpl<DIE *> &FinalChildren) {
   if (!Scope || !Scope->getScopeNode())
     return;
 
@@ -311,12 +312,12 @@ void DwarfCompileUnit::constructScopeDIE(
          "constructSubprogramScopeDIE for non-inlined "
          "subprograms");
 
-  SmallVector<std::unique_ptr<DIE>, 8> Children;
+  SmallVector<DIE *, 8> Children;
 
   // We try to create the scope DIE first, then the children DIEs. This will
   // avoid creating un-used children then removing them later when we find out
   // the scope DIE is null.
-  std::unique_ptr<DIE> ScopeDIE;
+  DIE *ScopeDIE;
   if (Scope->getParent() && isa<DISubprogram>(DS)) {
     ScopeDIE = constructInlinedScopeDIE(Scope);
     if (!ScopeDIE)
@@ -361,11 +362,13 @@ void DwarfCompileUnit::constructScopeDIE(
   FinalChildren.push_back(std::move(ScopeDIE));
 }
 
-void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
-                                       const MCSymbol *Hi, const MCSymbol *Lo) {
-  Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
-                                                     : dwarf::DW_FORM_data4,
-               new (DIEValueAllocator) DIEDelta(Hi, Lo));
+DIE::value_iterator
+DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+                                  const MCSymbol *Hi, const MCSymbol *Lo) {
+  return Die.addValue(DIEValueAllocator, Attribute,
+                      DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+                                                 : dwarf::DW_FORM_data4,
+                      new (DIEValueAllocator) DIEDelta(Hi, Lo));
 }
 
 void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
@@ -413,8 +416,7 @@ void DwarfCompileUnit::attachRangesOrLowHighPC(
 
 // This scope represents inlined body of a function. Construct DIE to
 // represent this concrete inlined copy of the function.
-std::unique_ptr<DIE>
-DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
+DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
   assert(Scope->getScopeNode());
   auto *DS = Scope->getScopeNode();
   auto *InlinedSP = getDISubprogram(DS);
@@ -423,7 +425,7 @@ DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
   DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP];
   assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
 
-  auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_inlined_subroutine);
+  auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine);
   addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE);
 
   attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
@@ -443,12 +445,11 @@ DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
 
 // Construct new DW_TAG_lexical_block for this scope and attach
 // DW_AT_low_pc/DW_AT_high_pc labels.
-std::unique_ptr<DIE>
-DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) {
+DIE *DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) {
   if (DD->isLexicalScopeDIENull(Scope))
     return nullptr;
 
-  auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_lexical_block);
+  auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_lexical_block);
   if (Scope->isAbstractScope())
     return ScopeDIE;
 
@@ -458,18 +459,16 @@ DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) {
 }
 
 /// constructVariableDIE - Construct a DIE for the given DbgVariable.
-std::unique_ptr<DIE> DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,
-                                                            bool Abstract) {
+DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) {
   auto D = constructVariableDIEImpl(DV, Abstract);
   DV.setDIE(*D);
   return D;
 }
 
-std::unique_ptr<DIE>
-DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
-                                           bool Abstract) {
+DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
+                                                bool Abstract) {
   // Define variable debug information entry.
-  auto VariableDie = make_unique<DIE>(DV.getTag());
+  auto VariableDie = DIE::get(DIEValueAllocator, DV.getTag());
 
   if (Abstract) {
     applyVariableAttributes(DV, *VariableDie);
@@ -508,7 +507,7 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
   }
 
   // .. else use frame index.
-  if (DV.getFrameIndex().back() == ~0)
+  if (DV.getFrameIndex().empty())
     return VariableDie;
 
   auto Expr = DV.getExpression().begin();
@@ -529,17 +528,18 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
   return VariableDie;
 }
 
-std::unique_ptr<DIE> DwarfCompileUnit::constructVariableDIE(
-    DbgVariable &DV, const LexicalScope &Scope, DIE *&ObjectPointer) {
+DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,
+                                            const LexicalScope &Scope,
+                                            DIE *&ObjectPointer) {
   auto Var = constructVariableDIE(DV, Scope.isAbstractScope());
   if (DV.isObjectPointer())
-    ObjectPointer = Var.get();
+    ObjectPointer = Var;
   return Var;
 }
 
-DIE *DwarfCompileUnit::createScopeChildrenDIE(
-    LexicalScope *Scope, SmallVectorImpl<std::unique_ptr<DIE>> &Children,
-    unsigned *ChildScopeCount) {
+DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
+                                              SmallVectorImpl<DIE *> &Children,
+                                              unsigned *ChildScopeCount) {
   DIE *ObjectPointer = nullptr;
 
   for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope))
@@ -580,13 +580,14 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
   // variadic function.
   if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] &&
       !includeMinimalInlineScopes())
-    ScopeDIE.addChild(make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters));
+    ScopeDIE.addChild(
+        DIE::get(DIEValueAllocator, dwarf::DW_TAG_unspecified_parameters));
 }
 
 DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
                                                  DIE &ScopeDIE) {
   // We create children when the scope DIE is not null.
-  SmallVector<std::unique_ptr<DIE>, 8> Children;
+  SmallVector<DIE *, 8> Children;
   DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children);
 
   // Add children
@@ -629,14 +630,16 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
     addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
 }
 
-std::unique_ptr<DIE>
-DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity *Module) {
-  std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module->getTag());
-  insertDIE(Module, IMDie.get());
+DIE *DwarfCompileUnit::constructImportedEntityDIE(
+    const DIImportedEntity *Module) {
+  DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag());
+  insertDIE(Module, IMDie);
   DIE *EntityDie;
   auto *Entity = resolve(Module->getEntity());
   if (auto *NS = dyn_cast<DINamespace>(Entity))
     EntityDie = getOrCreateNameSpace(NS);
+  else if (auto *M = dyn_cast<DIModule>(Entity))
+    EntityDie = getOrCreateModule(M);
   else if (auto *SP = dyn_cast<DISubprogram>(Entity))
     EntityDie = getOrCreateSubprogramDIE(SP);
   else if (auto *T = dyn_cast<DIType>(Entity))
@@ -686,7 +689,7 @@ void DwarfCompileUnit::collectDeadVariables(const DISubprogram *SP) {
     SPDIE = getDIE(SP);
   assert(SPDIE);
   for (const DILocalVariable *DV : Variables) {
-    DbgVariable NewVar(DV, /* IA */ nullptr, /* Expr */ nullptr, DD);
+    DbgVariable NewVar(DV, /* IA */ nullptr, DD);
     auto VariableDie = constructVariableDIE(NewVar);
     applyVariableAttributes(NewVar, *VariableDie);
     SPDIE->addChild(std::move(VariableDie));
@@ -725,7 +728,7 @@ void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
 /// DbgVariable based on provided MachineLocation.
 void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
                                           MachineLocation Location) {
-  if (DV.variableHasComplexAddress())
+  if (DV.hasComplexAddress())
     addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
   else if (DV.isBlockByrefVariable())
     addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
@@ -781,7 +784,7 @@ void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
                                        unsigned Index) {
   dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
                                                 : dwarf::DW_FORM_data4;
-  Die.addValue(Attribute, Form, DIELocList(Index));
+  Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index));
 }
 
 void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
@@ -798,7 +801,7 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
 /// Add a Dwarf expression attribute data and value.
 void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
                                const MCExpr *Expr) {
-  Die.addValue((dwarf::Attribute)0, Form, DIEExpr(Expr));
+  Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, Form, DIEExpr(Expr));
 }
 
 void DwarfCompileUnit::applySubprogramAttributesToDefinition(
@@ -817,4 +820,4 @@ bool DwarfCompileUnit::includeMinimalInlineScopes() const {
   return getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly ||
          (DD->useSplitDwarf() && !Skeleton);
 }
-} // namespace llvm
+} // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 48c302b..509c943 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -31,7 +31,7 @@ class LexicalScope;
 class DwarfCompileUnit : public DwarfUnit {
   /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
   /// the need to search for it in applyStmtList.
-  unsigned stmtListIndex;
+  DIE::value_iterator StmtListValue;
 
   /// Skeleton unit associated with this unit.
   DwarfCompileUnit *Skeleton;
@@ -58,8 +58,7 @@ class DwarfCompileUnit : public DwarfUnit {
 
   /// \brief Construct a DIE for the given DbgVariable without initializing the
   /// DbgVariable's DIE reference.
-  std::unique_ptr<DIE> constructVariableDIEImpl(const DbgVariable &DV,
-                                                bool Abstract);
+  DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract);
 
   bool isDwoUnit() const override;
 
@@ -92,8 +91,8 @@ public:
                             const MCSymbol *Label);
 
   /// addSectionDelta - Add a label delta attribute data and value.
-  void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
-                       const MCSymbol *Lo);
+  DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+                                      const MCSymbol *Hi, const MCSymbol *Lo);
 
   DwarfCompileUnit &getCU() override { return *this; }
 
@@ -106,8 +105,9 @@ public:
 
   /// addSectionLabel - Add a Dwarf section label attribute data and value.
   ///
-  void addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
-                       const MCSymbol *Label, const MCSymbol *Sec);
+  DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+                                      const MCSymbol *Label,
+                                      const MCSymbol *Sec);
 
   /// \brief Find DIE for the given subprogram and attach appropriate
   /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
@@ -116,7 +116,7 @@ public:
   DIE &updateSubprogramScopeDIE(const DISubprogram *SP);
 
   void constructScopeDIE(LexicalScope *Scope,
-                         SmallVectorImpl<std::unique_ptr<DIE>> &FinalChildren);
+                         SmallVectorImpl<DIE *> &FinalChildren);
 
   /// \brief A helper function to construct a RangeSpanList for a given
   /// lexical scope.
@@ -128,23 +128,21 @@ public:
                                const SmallVectorImpl<InsnRange> &Ranges);
   /// \brief This scope represents inlined body of a function. Construct
   /// DIE to represent this concrete inlined copy of the function.
-  std::unique_ptr<DIE> constructInlinedScopeDIE(LexicalScope *Scope);
+  DIE *constructInlinedScopeDIE(LexicalScope *Scope);
 
   /// \brief Construct new DW_TAG_lexical_block for this scope and
   /// attach DW_AT_low_pc/DW_AT_high_pc labels.
-  std::unique_ptr<DIE> constructLexicalScopeDIE(LexicalScope *Scope);
+  DIE *constructLexicalScopeDIE(LexicalScope *Scope);
 
   /// constructVariableDIE - Construct a DIE for the given DbgVariable.
-  std::unique_ptr<DIE> constructVariableDIE(DbgVariable &DV,
-                                            bool Abstract = false);
+  DIE *constructVariableDIE(DbgVariable &DV, bool Abstract = false);
 
-  std::unique_ptr<DIE> constructVariableDIE(DbgVariable &DV,
-                                            const LexicalScope &Scope,
-                                            DIE *&ObjectPointer);
+  DIE *constructVariableDIE(DbgVariable &DV, const LexicalScope &Scope,
+                            DIE *&ObjectPointer);
 
   /// A helper function to create children of a Scope DIE.
   DIE *createScopeChildrenDIE(LexicalScope *Scope,
-                              SmallVectorImpl<std::unique_ptr<DIE>> &Children,
+                              SmallVectorImpl<DIE *> &Children,
                               unsigned *ChildScopeCount = nullptr);
 
   /// \brief Construct a DIE for this subprogram scope.
@@ -155,8 +153,7 @@ public:
   void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
 
   /// \brief Construct import_module DIE.
-  std::unique_ptr<DIE>
-  constructImportedEntityDIE(const DIImportedEntity *Module);
+  DIE *constructImportedEntityDIE(const DIImportedEntity *Module);
 
   void finishSubprogramDefinition(const DISubprogram *SP);
 
@@ -231,6 +228,6 @@ public:
   const MCSymbol *getBaseAddress() const { return BaseAddress; }
 };
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index fb33169..7d03a39 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -678,8 +678,7 @@ DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) {
 
 void DwarfDebug::createAbstractVariable(const DILocalVariable *Var,
                                         LexicalScope *Scope) {
-  auto AbsDbgVariable =
-      make_unique<DbgVariable>(Var, /* IA */ nullptr, /* Expr */ nullptr, this);
+  auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr, this);
   InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
   AbstractVariables[Var] = std::move(AbsDbgVariable);
 }
@@ -722,10 +721,9 @@ void DwarfDebug::collectVariableInfoFromMMITable(
     if (!Scope)
       continue;
 
-    const DIExpression *Expr = cast_or_null<DIExpression>(VI.Expr);
     ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode());
-    auto RegVar =
-        make_unique<DbgVariable>(Var.first, Var.second, Expr, this, VI.Slot);
+    auto RegVar = make_unique<DbgVariable>(Var.first, Var.second, this);
+    RegVar->initializeMMI(VI.Expr, VI.Slot);
     if (InfoHolder.addScopeVariable(Scope, RegVar.get()))
       ConcreteVariables.push_back(std::move(RegVar));
   }
@@ -870,6 +868,14 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
   }
 }
 
+DbgVariable *DwarfDebug::createConcreteVariable(LexicalScope &Scope,
+                                                InlinedVariable IV) {
+  ensureAbstractVariableIsCreatedIfScoped(IV, Scope.getScopeNode());
+  ConcreteVariables.push_back(
+      make_unique<DbgVariable>(IV.first, IV.second, this));
+  InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get());
+  return ConcreteVariables.back().get();
+}
 
 // Find variables for each lexical scope.
 void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
@@ -898,20 +904,19 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
       continue;
 
     Processed.insert(IV);
+    DbgVariable *RegVar = createConcreteVariable(*Scope, IV);
+
     const MachineInstr *MInsn = Ranges.front().first;
     assert(MInsn->isDebugValue() && "History must begin with debug value");
-    ensureAbstractVariableIsCreatedIfScoped(IV, Scope->getScopeNode());
-    ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this));
-    DbgVariable *RegVar = ConcreteVariables.back().get();
-    InfoHolder.addScopeVariable(Scope, RegVar);
 
     // Check if the first DBG_VALUE is valid for the rest of the function.
-    if (Ranges.size() == 1 && Ranges.front().second == nullptr)
+    if (Ranges.size() == 1 && Ranges.front().second == nullptr) {
+      RegVar->initializeDbgValue(MInsn);
       continue;
+    }
 
     // Handle multiple DBG_VALUE instructions describing one variable.
-    RegVar->setDebugLocListIndex(
-        DebugLocs.startList(&TheCU, Asm->createTempSymbol("debug_loc")));
+    DebugLocStream::ListBuilder List(DebugLocs, TheCU, *Asm, *RegVar, *MInsn);
 
     // Build the location list for this variable.
     SmallVector<DebugLocEntry, 8> Entries;
@@ -925,20 +930,14 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
 
     // Finalize the entry by lowering it into a DWARF bytestream.
     for (auto &Entry : Entries)
-      Entry.finalize(*Asm, DebugLocs, BT);
+      Entry.finalize(*Asm, List, BT);
   }
 
   // Collect info for variables that were optimized out.
   for (const DILocalVariable *DV : SP->getVariables()) {
-    if (!Processed.insert(InlinedVariable(DV, nullptr)).second)
-      continue;
-    if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) {
-      ensureAbstractVariableIsCreatedIfScoped(InlinedVariable(DV, nullptr),
-                                              Scope->getScopeNode());
-      ConcreteVariables.push_back(make_unique<DbgVariable>(
-          DV, /* IA */ nullptr, /* Expr */ nullptr, this));
-      InfoHolder.addScopeVariable(Scope, ConcreteVariables.back().get());
-    }
+    if (Processed.insert(InlinedVariable(DV, nullptr)).second)
+      if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope()))
+        createConcreteVariable(*Scope, InlinedVariable(DV, nullptr));
   }
 }
 
@@ -1505,10 +1504,11 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
   // FIXME: ^
 }
 
-void DebugLocEntry::finalize(const AsmPrinter &AP, DebugLocStream &Locs,
+void DebugLocEntry::finalize(const AsmPrinter &AP,
+                             DebugLocStream::ListBuilder &List,
                              const DIBasicType *BT) {
-  Locs.startEntry(Begin, End);
-  BufferByteStreamer Streamer = Locs.getStreamer();
+  DebugLocStream::EntryBuilder Entry(List, Begin, End);
+  BufferByteStreamer Streamer = Entry.getStreamer();
   const DebugLocEntry::Value &Value = Values[0];
   if (Value.isBitPiece()) {
     // Emit all pieces that belong to the same variable and range.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 700f736..1c3e2ae 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -67,42 +67,61 @@ public:
 };
 
 //===----------------------------------------------------------------------===//
-/// \brief This class is used to track local variable information.
+/// This class is used to track local variable information.
 ///
-/// - Variables whose location changes over time have a DebugLocListIndex and
-///   the other fields are not used.
+/// Variables can be created from allocas, in which case they're generated from
+/// the MMI table.  Such variables can have multiple expressions and frame
+/// indices.  The \a Expr and \a FrameIndices array must match.
 ///
-/// - Variables that are described by multiple MMI table entries have multiple
-///   expressions and frame indices.
+/// Variables can be created from \c DBG_VALUE instructions.  Those whose
+/// location changes over time use \a DebugLocListIndex, while those with a
+/// single instruction use \a MInsn and (optionally) a single entry of \a Expr.
+///
+/// Variables that have been optimized out use none of these fields.
 class DbgVariable {
-  const DILocalVariable *Var; /// Variable Descriptor.
-  const DILocation *IA;       /// Inlined at location.
-  SmallVector<const DIExpression *, 1>
-      Expr;                          /// Complex address location expression.
-  DIE *TheDIE;                /// Variable DIE.
-  unsigned DebugLocListIndex;        /// Offset in DebugLocs.
-  const MachineInstr *MInsn;  /// DBG_VALUE instruction of the variable.
-  SmallVector<int, 1> FrameIndex; /// Frame index of the variable.
+  const DILocalVariable *Var;                /// Variable Descriptor.
+  const DILocation *IA;                      /// Inlined at location.
+  SmallVector<const DIExpression *, 1> Expr; /// Complex address.
+  DIE *TheDIE = nullptr;                     /// Variable DIE.
+  unsigned DebugLocListIndex = ~0u;          /// Offset in DebugLocs.
+  const MachineInstr *MInsn = nullptr;       /// DBG_VALUE instruction.
+  SmallVector<int, 1> FrameIndex;            /// Frame index.
   DwarfDebug *DD;
 
 public:
-  /// Construct a DbgVariable from a variable.
-  DbgVariable(const DILocalVariable *V, const DILocation *IA,
-              const DIExpression *E, DwarfDebug *DD, int FI = ~0)
-      : Var(V), IA(IA), Expr(1, E), TheDIE(nullptr), DebugLocListIndex(~0U),
-        MInsn(nullptr), DD(DD) {
+  /// Construct a DbgVariable.
+  ///
+  /// Creates a variable without any DW_AT_location.  Call \a initializeMMI()
+  /// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions.
+  DbgVariable(const DILocalVariable *V, const DILocation *IA, DwarfDebug *DD)
+      : Var(V), IA(IA), DD(DD) {}
+
+  /// Initialize from the MMI table.
+  void initializeMMI(const DIExpression *E, int FI) {
+    assert(Expr.empty() && "Already initialized?");
+    assert(FrameIndex.empty() && "Already initialized?");
+    assert(!MInsn && "Already initialized?");
+
+    assert((!E || E->isValid()) && "Expected valid expression");
+    assert(~FI && "Expected valid index");
+
+    Expr.push_back(E);
     FrameIndex.push_back(FI);
-    assert(!E || E->isValid());
   }
 
-  /// Construct a DbgVariable from a DEBUG_VALUE.
-  /// AbstractVar may be NULL.
-  DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD)
-      : Var(DbgValue->getDebugVariable()),
-        IA(DbgValue->getDebugLoc()->getInlinedAt()),
-        Expr(1, DbgValue->getDebugExpression()), TheDIE(nullptr),
-        DebugLocListIndex(~0U), MInsn(DbgValue), DD(DD) {
-    FrameIndex.push_back(~0);
+  /// Initialize from a DBG_VALUE instruction.
+  void initializeDbgValue(const MachineInstr *DbgValue) {
+    assert(Expr.empty() && "Already initialized?");
+    assert(FrameIndex.empty() && "Already initialized?");
+    assert(!MInsn && "Already initialized?");
+
+    assert(Var == DbgValue->getDebugVariable() && "Wrong variable");
+    assert(IA == DbgValue->getDebugLoc()->getInlinedAt() && "Wrong inlined-at");
+
+    MInsn = DbgValue;
+    if (auto *E = DbgValue->getDebugExpression())
+      if (E->getNumElements())
+        Expr.push_back(E);
   }
 
   // Accessors.
@@ -123,17 +142,16 @@ public:
     assert(V.Var == Var && "conflicting variable");
     assert(V.IA == IA && "conflicting inlined-at location");
 
-    if (V.getFrameIndex().back() != ~0) {
-      auto E = V.getExpression();
-      auto FI = V.getFrameIndex();
-      Expr.append(E.begin(), E.end());
-      FrameIndex.append(FI.begin(), FI.end());
-    }
-    assert(Expr.size() > 1 ? std::all_of(Expr.begin(), Expr.end(),
-                                         [](const DIExpression *E) {
-                                           return E->isBitPiece();
-                                         })
-                           : (true && "conflicting locations for variable"));
+    assert(!FrameIndex.empty() && "Expected an MMI entry");
+    assert(!V.FrameIndex.empty() && "Expected an MMI entry");
+    assert(Expr.size() == FrameIndex.size() && "Mismatched expressions");
+    assert(V.Expr.size() == V.FrameIndex.size() && "Mismatched expressions");
+
+    Expr.append(V.Expr.begin(), V.Expr.end());
+    FrameIndex.append(V.FrameIndex.begin(), V.FrameIndex.end());
+    assert(std::all_of(Expr.begin(), Expr.end(), [](const DIExpression *E) {
+             return E && E->isBitPiece();
+           }) && "conflicting locations for variable");
   }
 
   // Translate tag to proper Dwarf tag.
@@ -160,11 +178,13 @@ public:
     return false;
   }
 
-  bool variableHasComplexAddress() const {
-    assert(Var && "Invalid complex DbgVariable!");
-    assert(Expr.size() == 1 &&
-           "variableHasComplexAddress() invoked on multi-FI variable");
-    return Expr.back()->getNumElements() > 0;
+  bool hasComplexAddress() const {
+    assert(MInsn && "Expected DBG_VALUE, not MMI variable");
+    assert(FrameIndex.empty() && "Expected DBG_VALUE, not MMI variable");
+    assert(
+        (Expr.empty() || (Expr.size() == 1 && Expr.back()->getNumElements())) &&
+        "Invalid Expr for DBG_VALUE");
+    return !Expr.empty();
   }
   bool isBlockByrefVariable() const;
   const DIType *getType() const;
@@ -344,6 +364,8 @@ class DwarfDebug : public AsmPrinterHandler {
   void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var,
                                                const MDNode *Scope);
 
+  DbgVariable *createConcreteVariable(LexicalScope &Scope, InlinedVariable IV);
+
   /// \brief Construct a DIE for this abstract scope.
   void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index a4fd36f..f4667b4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -21,7 +21,7 @@ namespace llvm {
 class MachineFunction;
 class ARMTargetStreamer;
 
-class DwarfCFIExceptionBase : public EHStreamer {
+class LLVM_LIBRARY_VISIBILITY DwarfCFIExceptionBase : public EHStreamer {
 protected:
   DwarfCFIExceptionBase(AsmPrinter *A);
 
@@ -31,7 +31,7 @@ protected:
   void markFunctionEnd() override;
 };
 
-class DwarfCFIException : public DwarfCFIExceptionBase {
+class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase {
   /// Per-function flag to indicate if .cfi_personality should be emitted.
   bool shouldEmitPersonality;
 
@@ -61,7 +61,7 @@ public:
   void endFunction(const MachineFunction *) override;
 };
 
-class ARMException : public DwarfCFIExceptionBase {
+class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase {
   void emitTypeInfos(unsigned TTypeEncoding) override;
   ARMTargetStreamer &getTargetStreamer();
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 154d7d9..78ec937 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -131,6 +131,6 @@ public:
   void EmitUnsigned(uint64_t Value) override;
   bool isFrameRegister(unsigned MachineReg) override;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index fdefb1d..51b27b4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -103,7 +103,7 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
   // Size the DIE attribute values.
   for (const auto &V : Die.values())
     // Size attribute value.
-    Offset += V.SizeOf(Asm, V.getForm());
+    Offset += V.SizeOf(Asm);
 
   // Size the DIE children if any.
   if (Die.hasChildren()) {
@@ -111,7 +111,7 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
     assert(Abbrev.hasChildren() && "Children flag not set");
 
     for (auto &Child : Die.children())
-      Offset = computeSizeAndOffset(*Child, Offset);
+      Offset = computeSizeAndOffset(Child, Offset);
 
     // End of children marker.
     Offset += sizeof(int8_t);
@@ -170,4 +170,4 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
   Vars.push_back(Var);
   return true;
 }
-} // namespace llvm
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 22759fd..8402027 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -114,5 +114,5 @@ public:
     return DITypeNodeToDieMap.lookup(TypeMD);
   }
 };
-} // namespace llvm
+}
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index c107258..93a1684 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -45,5 +45,5 @@ public:
   /// Get a reference to an entry in the string pool.
   EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
 };
-} // namespace llvm
+}
 #endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index f4b15ba..3555822 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -66,8 +66,9 @@ bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) {
 DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag,
                      const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW,
                      DwarfFile *DWU)
-    : UniqueID(UID), CUNode(Node), UnitDie(UnitTag), DebugInfoOffset(0), Asm(A),
-      DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) {
+    : UniqueID(UID), CUNode(Node),
+      UnitDie(*DIE::get(DIEValueAllocator, UnitTag)), DebugInfoOffset(0),
+      Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) {
   assert(UnitTag == dwarf::DW_TAG_compile_unit ||
          UnitTag == dwarf::DW_TAG_type_unit);
 }
@@ -184,16 +185,18 @@ void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) {
 
 void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
   if (DD->getDwarfVersion() >= 4)
-    Die.addValue(Attribute, dwarf::DW_FORM_flag_present, DIEInteger(1));
+    Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag_present,
+                 DIEInteger(1));
   else
-    Die.addValue(Attribute, dwarf::DW_FORM_flag, DIEInteger(1));
+    Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag,
+                 DIEInteger(1));
 }
 
 void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute,
                         Optional<dwarf::Form> Form, uint64_t Integer) {
   if (!Form)
     Form = DIEInteger::BestForm(false, Integer);
-  Die.addValue(Attribute, *Form, DIEInteger(Integer));
+  Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer));
 }
 
 void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) {
@@ -204,7 +207,7 @@ void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute,
                         Optional<dwarf::Form> Form, int64_t Integer) {
   if (!Form)
     Form = DIEInteger::BestForm(true, Integer);
-  Die.addValue(Attribute, *Form, DIEInteger(Integer));
+  Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer));
 }
 
 void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
@@ -214,14 +217,15 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
 
 void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
                           StringRef String) {
-  Die.addValue(Attribute,
+  Die.addValue(DIEValueAllocator, Attribute,
                isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp,
                DIEString(DU->getStringPool().getEntry(*Asm, String)));
 }
 
-void DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
-                         const MCSymbol *Label) {
-  Die.addValue(Attribute, Form, DIELabel(Label));
+DIE::value_iterator DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute,
+                                        dwarf::Form Form,
+                                        const MCSymbol *Label) {
+  return Die.addValue(DIEValueAllocator, Attribute, Form, DIELabel(Label));
 }
 
 void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
@@ -254,7 +258,7 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
 
 void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
                               const MCSymbol *Hi, const MCSymbol *Lo) {
-  Die.addValue(Attribute, dwarf::DW_FORM_data4,
+  Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_data4,
                new (DIEValueAllocator) DIEDelta(Hi, Lo));
 }
 
@@ -269,8 +273,8 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) {
   // and think this is a full definition.
   addFlag(Die, dwarf::DW_AT_declaration);
 
-  Die.addValue(dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8,
-               DIETypeSignature(Type));
+  Die.addValue(DIEValueAllocator, dwarf::DW_AT_signature,
+               dwarf::DW_FORM_ref_sig8, DIETypeSignature(Type));
 }
 
 void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
@@ -282,7 +286,7 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
     DieCU = &getUnitDie();
   if (!EntryCU)
     EntryCU = &getUnitDie();
-  Die.addValue(Attribute,
+  Die.addValue(DIEValueAllocator, Attribute,
                EntryCU == DieCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
                Entry);
 }
@@ -290,7 +294,7 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
 DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) {
   assert(Tag != dwarf::DW_TAG_auto_variable &&
          Tag != dwarf::DW_TAG_arg_variable);
-  DIE &Die = Parent.addChild(make_unique<DIE>((dwarf::Tag)Tag));
+  DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, (dwarf::Tag)Tag));
   if (N)
     insertDIE(N, &Die);
   return Die;
@@ -299,14 +303,15 @@ DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) {
 void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) {
   Loc->ComputeSize(Asm);
   DIELocs.push_back(Loc); // Memoize so we can call the destructor later on.
-  Die.addValue(Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc);
+  Die.addValue(DIEValueAllocator, Attribute,
+               Loc->BestForm(DD->getDwarfVersion()), Loc);
 }
 
 void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,
                          DIEBlock *Block) {
   Block->ComputeSize(Asm);
   DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
-  Die.addValue(Attribute, Block->BestForm(), Block);
+  Die.addValue(DIEValueAllocator, Attribute, Block->BestForm(), Block);
 }
 
 void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File,
@@ -1064,6 +1069,30 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
   return &NDie;
 }
 
+DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
+  // Construct the context before querying for the existence of the DIE in case
+  // such construction creates the DIE.
+  DIE *ContextDIE = getOrCreateContextDIE(M->getScope());
+
+  if (DIE *MDie = getDIE(M))
+    return MDie;
+  DIE &MDie = createAndAddDIE(dwarf::DW_TAG_module, *ContextDIE, M);
+
+  if (!M->getName().empty()) {
+    addString(MDie, dwarf::DW_AT_name, M->getName());
+    addGlobalName(M->getName(), MDie, M->getScope());
+  }
+  if (!M->getConfigurationMacros().empty())
+    addString(MDie, dwarf::DW_AT_LLVM_config_macros,
+              M->getConfigurationMacros());
+  if (!M->getIncludePath().empty())
+    addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath());
+  if (!M->getISysRoot().empty())
+    addString(MDie, dwarf::DW_AT_LLVM_isysroot, M->getISysRoot());
+  
+  return &MDie;
+}
+
 DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE (as is the case for member function
@@ -1340,24 +1369,44 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
       // Handle bitfield, assume bytes are 8 bits.
       addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
       addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
-
+      //
+      // The DWARF 2 DW_AT_bit_offset is counting the bits between the most
+      // significant bit of the aligned storage unit containing the bit field to
+      // the most significan bit of the bit field.
+      //
+      // FIXME: DWARF 4 states that DW_AT_data_bit_offset (which
+      // counts from the beginning, regardless of endianness) should
+      // be used instead.
+      //
+      //
+      // Struct      Align       Align       Align
+      // v           v           v           v
+      // +-----------+-----*-----+-----*-----+--
+      // | ...             |b1|b2|b3|b4|
+      // +-----------+-----*-----+-----*-----+--
+      // |           |     |<-- Size ->|     |
+      // |<---- Offset --->|           |<--->|
+      // |           |     |              \_ DW_AT_bit_offset (little endian)
+      // |           |<--->|
+      // |<--------->|  \_ StartBitOffset = DW_AT_bit_offset (big endian)
+      //     \                            = DW_AT_data_bit_offset (biendian)
+      //      \_ OffsetInBytes
       uint64_t Offset = DT->getOffsetInBits();
-      uint64_t AlignMask = ~(DT->getAlignInBits() - 1);
-      uint64_t HiMark = (Offset + FieldSize) & AlignMask;
-      uint64_t FieldOffset = (HiMark - FieldSize);
-      Offset -= FieldOffset;
-
-      // Maybe we need to work from the other end.
-      if (Asm->getDataLayout().isLittleEndian())
-        Offset = FieldSize - (Offset + Size);
-      addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset);
-
-      // Here DW_AT_data_member_location points to the anonymous
-      // field that includes this bit field.
-      OffsetInBytes = FieldOffset >> 3;
+      uint64_t Align = DT->getAlignInBits() ? DT->getAlignInBits() : FieldSize;
+      uint64_t AlignMask = ~(Align - 1);
+      // The bits from the start of the storage unit to the start of the field.
+      uint64_t StartBitOffset = Offset - (Offset & AlignMask);
+      // The endian-dependent DWARF 2 offset.
+      uint64_t DwarfBitOffset = Asm->getDataLayout().isLittleEndian()
+        ? OffsetToAlignment(Offset + Size, Align)
+        : StartBitOffset;
+
+      // The byte offset of the field's aligned storage unit inside the struct.
+      OffsetInBytes = (Offset - StartBitOffset) / 8;
+      addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, DwarfBitOffset);
     } else
       // This is not a bitfield.
-      OffsetInBytes = DT->getOffsetInBits() >> 3;
+      OffsetInBytes = DT->getOffsetInBits() / 8;
 
     if (DD->getDwarfVersion() <= 2) {
       DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc;
@@ -1386,8 +1435,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
   // Objective-C properties.
   if (DINode *PNode = DT->getObjCProperty())
     if (DIE *PDie = getDIE(PNode))
-      MemberDie.addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
-                         DIEEntry(*PDie));
+      MemberDie.addValue(DIEValueAllocator, dwarf::DW_AT_APPLE_property,
+                         dwarf::DW_FORM_ref4, DIEEntry(*PDie));
 
   if (DT->isArtificial())
     addFlag(MemberDie, dwarf::DW_AT_artificial);
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 200ddf0..4000ae4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -73,8 +73,11 @@ protected:
   /// MDNode for the compile unit.
   const DICompileUnit *CUNode;
 
+  // All DIEValues are allocated through this allocator.
+  BumpPtrAllocator DIEValueAllocator;
+
   /// Unit debug information entry.
-  DIE UnitDie;
+  DIE &UnitDie;
 
   /// Offset of the UnitDie from beginning of debug info section.
   unsigned DebugInfoOffset;
@@ -104,9 +107,6 @@ protected:
   /// corresponds to the MDNode mapped with the subprogram DIE.
   DenseMap<DIE *, const DINode *> ContainingTypeMap;
 
-  // All DIEValues are allocated through this allocator.
-  BumpPtrAllocator DIEValueAllocator;
-
   /// The section this unit will be emitted in.
   MCSection *Section;
 
@@ -206,8 +206,8 @@ public:
   void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
 
   /// \brief Add a Dwarf label attribute data and value.
-  void addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
-                const MCSymbol *Label);
+  DIE::value_iterator addLabel(DIE &Die, dwarf::Attribute Attribute,
+                               dwarf::Form Form, const MCSymbol *Label);
 
   void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
 
@@ -291,6 +291,7 @@ public:
                dwarf::Attribute Attribute = dwarf::DW_AT_type);
 
   DIE *getOrCreateNameSpace(const DINamespace *NS);
+  DIE *getOrCreateModule(const DIModule *M);
   DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false);
 
   void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
@@ -402,5 +403,5 @@ public:
   }
   DwarfCompileUnit &getCU() override { return CU; }
 };
-} // namespace llvm
+} // end llvm namespace
 #endif
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index 128a8ad..e42e082 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -30,7 +30,7 @@ template <typename T>
 class SmallVectorImpl;
 
 /// Emits exception handling directives.
-class EHStreamer : public AsmPrinterHandler {
+class LLVM_LIBRARY_VISIBILITY EHStreamer : public AsmPrinterHandler {
 protected:
   /// Target of directive emission.
   AsmPrinter *Asm;
@@ -132,7 +132,7 @@ public:
   void beginInstruction(const MachineInstr *MI) override {}
   void endInstruction() override {}
 };
-} // namespace llvm
+}
 
 #endif
 
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 802456b..2ceec61 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -58,7 +58,7 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
   SymName[Letter] = toupper(SymName[Letter]);
 
   SmallString<128> TmpStr;
-  AP.Mang->getNameWithPrefix(TmpStr, SymName);
+  Mangler::getNameWithPrefix(TmpStr, SymName, M.getDataLayout());
 
   MCSymbol *Sym = AP.OutContext.getOrCreateSymbol(TmpStr);
 
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 11bfe76..535b1f6 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -378,4 +378,4 @@ void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) {
     return;
   maybeRecordLocation(DL, Asm->MF);
 }
-} // namespace llvm
+}
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
index c66d141..a5b399f 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -29,7 +29,7 @@
 
 namespace llvm {
 /// \brief Collects and handles line tables information in a CodeView format.
-class WinCodeViewLineTables : public AsmPrinterHandler {
+class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler {
   AsmPrinter *Asm;
   DebugLoc PrevInstLoc;
 
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index 1ba6060..79830bc 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -319,6 +319,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
       return;
   } else {
     FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(ParentLinkageName);
+    emitEHRegistrationOffsetLabel(FuncInfo, ParentLinkageName);
   }
 
   MCSymbol *UnwindMapXData = nullptr;
@@ -547,28 +548,33 @@ void WinException::extendIP2StateTable(const MachineFunction *MF,
   }
 }
 
+void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
+                                                 StringRef FLinkageName) {
+  // Outlined helpers called by the EH runtime need to know the offset of the EH
+  // registration in order to recover the parent frame pointer. Now that we know
+  // we've code generated the parent, we can emit the label assignment that
+  // those helpers use to get the offset of the registration node.
+  assert(FuncInfo.EHRegNodeEscapeIndex != INT_MAX &&
+         "no EH reg node frameescape index");
+  MCSymbol *ParentFrameOffset =
+      Asm->OutContext.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+  MCSymbol *RegistrationOffsetSym = Asm->OutContext.getOrCreateFrameAllocSymbol(
+      FLinkageName, FuncInfo.EHRegNodeEscapeIndex);
+  const MCExpr *RegistrationOffsetSymRef =
+      MCSymbolRefExpr::create(RegistrationOffsetSym, Asm->OutContext);
+  Asm->OutStreamer->EmitAssignment(ParentFrameOffset, RegistrationOffsetSymRef);
+}
+
 /// Emit the language-specific data that _except_handler3 and 4 expect. This is
 /// functionally equivalent to the __C_specific_handler table, except it is
 /// indexed by state number instead of IP.
 void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
   MCStreamer &OS = *Asm->OutStreamer;
-
-  // Define the EH registration node offset label in terms of its frameescape
-  // label. The WinEHStatePass ensures that the registration node is passed to
-  // frameescape. This allows SEH filter functions to access the
-  // EXCEPTION_POINTERS field, which is filled in by the _except_handlerN.
   const Function *F = MF->getFunction();
-  WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(F);
-  assert(FuncInfo.EHRegNodeEscapeIndex != INT_MAX &&
-         "no EH reg node frameescape index");
   StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName());
-  MCSymbol *ParentFrameOffset =
-      Asm->OutContext.getOrCreateParentFrameOffsetSymbol(FLinkageName);
-  MCSymbol *FrameAllocSym = Asm->OutContext.getOrCreateFrameAllocSymbol(
-      FLinkageName, FuncInfo.EHRegNodeEscapeIndex);
-  const MCSymbolRefExpr *FrameAllocSymRef =
-      MCSymbolRefExpr::create(FrameAllocSym, Asm->OutContext);
-  OS.EmitAssignment(ParentFrameOffset, FrameAllocSymRef);
+
+  WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(F);
+  emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
 
   // Emit the __ehtable label that we use for llvm.x86.seh.lsda.
   MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName);
diff --git a/lib/CodeGen/AsmPrinter/WinException.h b/lib/CodeGen/AsmPrinter/WinException.h
index bbff3c2..669c9cc 100644
--- a/lib/CodeGen/AsmPrinter/WinException.h
+++ b/lib/CodeGen/AsmPrinter/WinException.h
@@ -23,7 +23,7 @@ class MachineFunction;
 class MCExpr;
 struct WinEHFuncInfo;
 
-class WinException : public EHStreamer {
+class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
   /// Per-function flag to indicate if personality info should be emitted.
   bool shouldEmitPersonality = false;
 
@@ -50,6 +50,11 @@ class WinException : public EHStreamer {
   void extendIP2StateTable(const MachineFunction *MF, const Function *ParentF,
                            WinEHFuncInfo &FuncInfo);
 
+  /// Emits the label used with llvm.x86.seh.recoverfp, which is used by
+  /// outlined funclets.
+  void emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
+                                     StringRef FLinkageName);
+
   const MCExpr *create32bitRef(const MCSymbol *Value);
   const MCExpr *create32bitRef(const GlobalValue *GV);
 
@@ -70,7 +75,7 @@ public:
   /// Gather and emit post-function exception information.
   void endFunction(const MachineFunction *) override;
 };
-} // namespace llvm
+}
 
 #endif
 
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index 0bb0fa3..530ab46 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -55,7 +55,7 @@ namespace {
     bool isIdempotentRMW(AtomicRMWInst *AI);
     bool simplifyIdempotentRMW(AtomicRMWInst *AI);
   };
-} // namespace
+}
 
 char AtomicExpand::ID = 0;
 char &llvm::AtomicExpandID = AtomicExpand::ID;
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index e7b7f5b..6182667 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -79,7 +79,7 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-} // namespace
+}
 
 char BranchFolderPass::ID = 0;
 char &llvm::BranchFolderPassID = BranchFolderPass::ID;
@@ -270,7 +270,9 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &Op = MI->getOperand(i);
 
-    // Merge in bits from the operand if easy.
+    // Merge in bits from the operand if easy. We can't use MachineOperand's
+    // hash_code here because it's not deterministic and we sort by hash value
+    // later.
     unsigned OperandHash = 0;
     switch (Op.getType()) {
     case MachineOperand::MO_Register:
@@ -304,17 +306,9 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
 
 /// HashEndOfMBB - Hash the last instruction in the MBB.
 static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) {
-  MachineBasicBlock::const_iterator I = MBB->end();
-  if (I == MBB->begin())
-    return 0; // Empty MBB.
-
-  --I;
-  // Skip debug info so it will not affect codegen.
-  while (I->isDebugValue()) {
-    if (I == MBB->begin())
-      return 0; // MBB empty except for debug info.
-    --I;
-  }
+  MachineBasicBlock::const_iterator I = MBB->getLastNonDebugInstr();
+  if (I == MBB->end())
+    return 0;
 
   return HashMachineInstr(I);
 }
@@ -1123,25 +1117,15 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
 // Blocks should be considered empty if they contain only debug info;
 // else the debug info would affect codegen.
 static bool IsEmptyBlock(MachineBasicBlock *MBB) {
-  if (MBB->empty())
-    return true;
-  for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
-       MBBI!=MBBE; ++MBBI) {
-    if (!MBBI->isDebugValue())
-      return false;
-  }
-  return true;
+  return MBB->getFirstNonDebugInstr() == MBB->end();
 }
 
 // Blocks with only debug info and branches should be considered the same
 // as blocks with only branches.
 static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
-  MachineBasicBlock::iterator MBBI, MBBE;
-  for (MBBI = MBB->begin(), MBBE = MBB->end(); MBBI!=MBBE; ++MBBI) {
-    if (!MBBI->isDebugValue())
-      break;
-  }
-  return (MBBI->isBranch());
+  MachineBasicBlock::iterator I = MBB->getFirstNonDebugInstr();
+  assert(I != MBB->end() && "empty block!");
+  return I->isBranch();
 }
 
 /// IsBetterFallthrough - Return true if it would be clearly better to
@@ -1154,36 +1138,24 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
   // MBB1 doesn't, we prefer to fall through into MBB1.  This allows us to
   // optimize branches that branch to either a return block or an assert block
   // into a fallthrough to the return.
-  if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false;
+  MachineBasicBlock::iterator MBB1I = MBB1->getLastNonDebugInstr();
+  MachineBasicBlock::iterator MBB2I = MBB2->getLastNonDebugInstr();
+  if (MBB1I == MBB1->end() || MBB2I == MBB2->end())
+    return false;
 
   // If there is a clear successor ordering we make sure that one block
   // will fall through to the next
   if (MBB1->isSuccessor(MBB2)) return true;
   if (MBB2->isSuccessor(MBB1)) return false;
 
-  // Neither block consists entirely of debug info (per IsEmptyBlock check),
-  // so we needn't test for falling off the beginning here.
-  MachineBasicBlock::iterator MBB1I = --MBB1->end();
-  while (MBB1I->isDebugValue())
-    --MBB1I;
-  MachineBasicBlock::iterator MBB2I = --MBB2->end();
-  while (MBB2I->isDebugValue())
-    --MBB2I;
   return MBB2I->isCall() && !MBB1I->isCall();
 }
 
 /// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch
-/// instructions on the block. Always use the DebugLoc of the first
-/// branching instruction found unless its absent, in which case use the
-/// DebugLoc of the second if present.
+/// instructions on the block.
 static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
-    return DebugLoc();
-  --I;
-  while (I->isDebugValue() && I != MBB.begin())
-    --I;
-  if (I->isBranch())
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I != MBB.end() && I->isBranch())
     return I->getDebugLoc();
   return DebugLoc();
 }
@@ -1408,19 +1380,10 @@ ReoptimizeBlock:
       // If the only things remaining in the block are debug info, remove these
       // as well, so this will behave the same as an empty block in non-debug
       // mode.
-      if (!MBB->empty()) {
-        bool NonDebugInfoFound = false;
-        for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-             I != E; ++I) {
-          if (!I->isDebugValue()) {
-            NonDebugInfoFound = true;
-            break;
-          }
-        }
-        if (!NonDebugInfoFound)
-          // Make the block empty, losing the debug info (we could probably
-          // improve this in some cases.)
-          MBB->erase(MBB->begin(), MBB->end());
+      if (IsEmptyBlock(MBB)) {
+        // Make the block empty, losing the debug info (we could probably
+        // improve this in some cases.)
+        MBB->erase(MBB->begin(), MBB->end());
       }
       // If this block is just an unconditional branch to CurTBB, we can
       // usually completely eliminate the block.  The only case we cannot
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index d1b17dd..46c05dc 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -24,7 +24,7 @@ namespace llvm {
   class TargetInstrInfo;
   class TargetRegisterInfo;
 
-  class BranchFolder {
+  class LLVM_LIBRARY_VISIBILITY BranchFolder {
   public:
     explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
                           const MachineBlockFrequencyInfo &MBFI,
@@ -142,6 +142,6 @@ namespace llvm {
     bool HoistCommonCode(MachineFunction &MF);
     bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
   };
-} // namespace llvm
+}
 
 #endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index a992c5e..eb75529 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -30,6 +30,7 @@ add_llvm_library(LLVMCodeGen
   ImplicitNullChecks.cpp
   InlineSpiller.cpp
   InterferenceCache.cpp
+  InterleavedAccessPass.cpp
   IntrinsicLowering.cpp
   LLVMTargetMachine.cpp
   LatencyPriorityQueue.cpp
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 247c45b..70de4e7 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -189,7 +189,7 @@ class TypePromotionTransaction;
     bool splitBranchCondition(Function &F);
     bool simplifyOffsetableRelocate(Instruction &I);
   };
-} // namespace
+}
 
 char CodeGenPrepare::ID = 0;
 INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
diff --git a/lib/CodeGen/CoreCLRGC.cpp b/lib/CodeGen/CoreCLRGC.cpp
index 0816d14..28c97ba 100644
--- a/lib/CodeGen/CoreCLRGC.cpp
+++ b/lib/CodeGen/CoreCLRGC.cpp
@@ -45,7 +45,7 @@ public:
     return (1 == PT->getAddressSpace());
   }
 };
-} // namespace
+}
 
 static GCRegistry::Add<CoreCLRGC> X("coreclr", "CoreCLR-compatible GC");
 
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 1ca5300..10b8739 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -31,7 +31,7 @@ class RegisterClassInfo;
 class TargetInstrInfo;
 class TargetRegisterInfo;
 
-  class CriticalAntiDepBreaker : public AntiDepBreaker {
+class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
     MachineFunction& MF;
     MachineRegisterInfo &MRI;
     const TargetInstrInfo *TII;
@@ -103,6 +103,6 @@ class TargetRegisterInfo;
                                       const TargetRegisterClass *RC,
                                       SmallVectorImpl<unsigned> &Forbid);
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 02cdb50..0a188c0 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -110,7 +110,7 @@ public:
   // Schedule - Actual scheduling work.
   void schedule() override;
 };
-} // namespace llvm
+}
 
 DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
                                            MachineLoopInfo &MLI, bool IsPostRA)
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index efaf47c..963d573 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -45,7 +45,7 @@ namespace {
   private:
     bool isDead(const MachineInstr *MI) const;
   };
-} // namespace
+}
 char DeadMachineInstructionElim::ID = 0;
 char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID;
 
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index f43b2f1..aea7c31 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -89,7 +89,7 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
   O << "}\n";
   return O;
 }
-} // namespace llvm
+}
 
 /// view - Visualize the annotated bipartite CFG with Graphviz.
 void EdgeBundles::view() const {
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index dd508b3..5b09cf1 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -110,7 +110,7 @@ struct DomainValue {
     Instrs.clear();
   }
 };
-} // namespace
+}
 
 namespace {
 /// Information about a live register.
@@ -201,7 +201,7 @@ private:
   bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
   void processUndefReads(MachineBasicBlock*);
 };
-} // namespace
+}
 
 char ExeDepsFix::ID = 0;
 
diff --git a/lib/CodeGen/FaultMaps.cpp b/lib/CodeGen/FaultMaps.cpp
index 0512ff9..2acafaf 100644
--- a/lib/CodeGen/FaultMaps.cpp
+++ b/lib/CodeGen/FaultMaps.cpp
@@ -112,3 +112,39 @@ const char *FaultMaps::faultTypeToString(FaultMaps::FaultKind FT) {
     return "FaultingLoad";
   }
 }
+
+raw_ostream &llvm::
+operator<<(raw_ostream &OS,
+           const FaultMapParser::FunctionFaultInfoAccessor &FFI) {
+  OS << "Fault kind: "
+     << FaultMaps::faultTypeToString((FaultMaps::FaultKind)FFI.getFaultKind())
+     << ", faulting PC offset: " << FFI.getFaultingPCOffset()
+     << ", handling PC offset: " << FFI.getHandlerPCOffset();
+  return OS;
+}
+
+raw_ostream &llvm::
+operator<<(raw_ostream &OS, const FaultMapParser::FunctionInfoAccessor &FI) {
+  OS << "FunctionAddress: " << format_hex(FI.getFunctionAddr(), 8)
+     << ", NumFaultingPCs: " << FI.getNumFaultingPCs() << "\n";
+  for (unsigned i = 0, e = FI.getNumFaultingPCs(); i != e; ++i)
+    OS << FI.getFunctionFaultInfoAt(i) << "\n";
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const FaultMapParser &FMP) {
+  OS << "Version: " << format_hex(FMP.getFaultMapVersion(), 2) << "\n";
+  OS << "NumFunctions: " << FMP.getNumFunctions() << "\n";
+
+  if (FMP.getNumFunctions() == 0)
+    return OS;
+
+  FaultMapParser::FunctionInfoAccessor FI;
+
+  for (unsigned i = 0, e = FMP.getNumFunctions(); i != e; ++i) {
+    FI = (i == 0) ? FMP.getFirstFunctionInfo() : FI.getNextFunctionInfo();
+    OS << FI;
+  }
+
+  return OS;
+}
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index cba7f5f..c8116a4 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -38,7 +38,7 @@ public:
   bool runOnFunction(Function &F) override;
   bool doFinalization(Module &M) override;
 };
-} // namespace
+}
 
 INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
                 "Create Garbage Collector Module Metadata", false, false)
diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp
index fcef322..d8edd7e 100644
--- a/lib/CodeGen/GCRootLowering.cpp
+++ b/lib/CodeGen/GCRootLowering.cpp
@@ -76,7 +76,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
-} // namespace
+}
 
 // -----------------------------------------------------------------------------
 
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 963dfe7..ee0532b 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -197,8 +197,7 @@ namespace {
     bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
                       unsigned &Dups1, unsigned &Dups2) const;
     void ScanInstructions(BBInfo &BBI);
-    BBInfo &AnalyzeBlock(MachineBasicBlock *BB,
-                         std::vector<IfcvtToken*> &Tokens);
+    void AnalyzeBlock(MachineBasicBlock *MBB, std::vector<IfcvtToken*> &Tokens);
     bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
                              bool isTriangle = false, bool RevBranch = false);
     void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens);
@@ -264,7 +263,7 @@ namespace {
   };
 
   char IfConverter::ID = 0;
-} // namespace
+}
 
 char &llvm::IfConverterID = IfConverter::ID;
 
@@ -764,155 +763,185 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
 /// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
 /// the specified block. Record its successors and whether it looks like an
 /// if-conversion candidate.
-IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
-                                             std::vector<IfcvtToken*> &Tokens) {
-  BBInfo &BBI = BBAnalysis[BB->getNumber()];
+void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB,
+                               std::vector<IfcvtToken*> &Tokens) {
+  struct BBState {
+    BBState(MachineBasicBlock *BB) : MBB(BB), SuccsAnalyzed(false) {}
+    MachineBasicBlock *MBB;
+
+    /// This flag is true if MBB's successors have been analyzed.
+    bool SuccsAnalyzed;
+  };
 
-  if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed)
-    return BBI;
+  // Push MBB to the stack.
+  SmallVector<BBState, 16> BBStack(1, MBB);
 
-  BBI.BB = BB;
-  BBI.IsBeingAnalyzed = true;
+  while (!BBStack.empty()) {
+    BBState &State = BBStack.back();
+    MachineBasicBlock *BB = State.MBB;
+    BBInfo &BBI = BBAnalysis[BB->getNumber()];
 
-  ScanInstructions(BBI);
+    if (!State.SuccsAnalyzed) {
+      if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed) {
+        BBStack.pop_back();
+        continue;
+      }
 
-  // Unanalyzable or ends with fallthrough or unconditional branch, or if is not
-  // considered for ifcvt anymore.
-  if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
-    BBI.IsBeingAnalyzed = false;
-    BBI.IsAnalyzed = true;
-    return BBI;
-  }
+      BBI.BB = BB;
+      BBI.IsBeingAnalyzed = true;
 
-  // Do not ifcvt if either path is a back edge to the entry block.
-  if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
-    BBI.IsBeingAnalyzed = false;
-    BBI.IsAnalyzed = true;
-    return BBI;
-  }
+      ScanInstructions(BBI);
 
-  // Do not ifcvt if true and false fallthrough blocks are the same.
-  if (!BBI.FalseBB) {
-    BBI.IsBeingAnalyzed = false;
-    BBI.IsAnalyzed = true;
-    return BBI;
-  }
+      // Unanalyzable or ends with fallthrough or unconditional branch, or if is
+      // not considered for ifcvt anymore.
+      if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
+        BBI.IsBeingAnalyzed = false;
+        BBI.IsAnalyzed = true;
+        BBStack.pop_back();
+        continue;
+      }
 
-  BBInfo &TrueBBI  = AnalyzeBlock(BBI.TrueBB, Tokens);
-  BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
+      // Do not ifcvt if either path is a back edge to the entry block.
+      if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+        BBI.IsBeingAnalyzed = false;
+        BBI.IsAnalyzed = true;
+        BBStack.pop_back();
+        continue;
+      }
 
-  if (TrueBBI.IsDone && FalseBBI.IsDone) {
-    BBI.IsBeingAnalyzed = false;
-    BBI.IsAnalyzed = true;
-    return BBI;
-  }
+      // Do not ifcvt if true and false fallthrough blocks are the same.
+      if (!BBI.FalseBB) {
+        BBI.IsBeingAnalyzed = false;
+        BBI.IsAnalyzed = true;
+        BBStack.pop_back();
+        continue;
+      }
 
-  SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
-  bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+      // Push the False and True blocks to the stack.
+      State.SuccsAnalyzed = true;
+      BBStack.push_back(BBI.FalseBB);
+      BBStack.push_back(BBI.TrueBB);
+      continue;
+    }
 
-  unsigned Dups = 0;
-  unsigned Dups2 = 0;
-  bool TNeedSub = !TrueBBI.Predicate.empty();
-  bool FNeedSub = !FalseBBI.Predicate.empty();
-  bool Enqueued = false;
+    BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+    BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
 
-  BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
+    if (TrueBBI.IsDone && FalseBBI.IsDone) {
+      BBI.IsBeingAnalyzed = false;
+      BBI.IsAnalyzed = true;
+      BBStack.pop_back();
+      continue;
+    }
 
-  if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
-      MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
-                                       TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
-                         *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
-                                        FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
-                         Prediction) &&
-      FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
-      FeasibilityAnalysis(FalseBBI, RevCond)) {
-    // Diamond:
-    //   EBB
-    //   / \_
-    //  |   |
-    // TBB FBB
-    //   \ /
-    //  TailBB
-    // Note TailBB can be empty.
-    Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
-                                    Dups2));
-    Enqueued = true;
-  }
+    SmallVector<MachineOperand, 4>
+        RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+    bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
 
-  if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
-      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         TrueBBI.ExtraCost2, Prediction) &&
-      FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
-    // Triangle:
-    //   EBB
-    //   | \_
-    //   |  |
-    //   | TBB
-    //   |  /
-    //   FBB
-    Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
-    Enqueued = true;
-  }
+    unsigned Dups = 0;
+    unsigned Dups2 = 0;
+    bool TNeedSub = !TrueBBI.Predicate.empty();
+    bool FNeedSub = !FalseBBI.Predicate.empty();
+    bool Enqueued = false;
 
-  if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
-      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         TrueBBI.ExtraCost2, Prediction) &&
-      FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
-    Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
-    Enqueued = true;
-  }
+    BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
 
-  if (ValidSimple(TrueBBI, Dups, Prediction) &&
-      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         TrueBBI.ExtraCost2, Prediction) &&
-      FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
-    // Simple (split, no rejoin):
-    //   EBB
-    //   | \_
-    //   |  |
-    //   | TBB---> exit
-    //   |
-    //   FBB
-    Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
-    Enqueued = true;
-  }
+    if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
+        MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
+                                         TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
+                           *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
+                                        FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
+                         Prediction) &&
+        FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
+        FeasibilityAnalysis(FalseBBI, RevCond)) {
+      // Diamond:
+      //   EBB
+      //   / \_
+      //  |   |
+      // TBB FBB
+      //   \ /
+      //  TailBB
+      // Note TailBB can be empty.
+      Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
+                                      Dups2));
+      Enqueued = true;
+    }
 
-  if (CanRevCond) {
-    // Try the other path...
-    if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
-                      Prediction.getCompl()) &&
-        MeetIfcvtSizeLimit(*FalseBBI.BB,
-                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           FalseBBI.ExtraCost2, Prediction.getCompl()) &&
-        FeasibilityAnalysis(FalseBBI, RevCond, true)) {
-      Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+    if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
+        MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                           TrueBBI.ExtraCost2, Prediction) &&
+        FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+      // Triangle:
+      //   EBB
+      //   | \_
+      //   |  |
+      //   | TBB
+      //   |  /
+      //   FBB
+      Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
       Enqueued = true;
     }
 
-    if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
-                      Prediction.getCompl()) &&
-        MeetIfcvtSizeLimit(*FalseBBI.BB,
-                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           FalseBBI.ExtraCost2, Prediction.getCompl()) &&
-        FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
-      Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+    if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
+        MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                           TrueBBI.ExtraCost2, Prediction) &&
+        FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+      Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
       Enqueued = true;
     }
 
-    if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
-        MeetIfcvtSizeLimit(*FalseBBI.BB,
-                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           FalseBBI.ExtraCost2, Prediction.getCompl()) &&
-        FeasibilityAnalysis(FalseBBI, RevCond)) {
-      Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+    if (ValidSimple(TrueBBI, Dups, Prediction) &&
+        MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                           TrueBBI.ExtraCost2, Prediction) &&
+        FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+      // Simple (split, no rejoin):
+      //   EBB
+      //   | \_
+      //   |  |
+      //   | TBB---> exit
+      //   |
+      //   FBB
+      Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
       Enqueued = true;
     }
-  }
 
-  BBI.IsEnqueued = Enqueued;
-  BBI.IsBeingAnalyzed = false;
-  BBI.IsAnalyzed = true;
-  return BBI;
+    if (CanRevCond) {
+      // Try the other path...
+      if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
+                        Prediction.getCompl()) &&
+          MeetIfcvtSizeLimit(*FalseBBI.BB,
+                             FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                             FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+          FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+        Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+        Enqueued = true;
+      }
+
+      if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
+                        Prediction.getCompl()) &&
+          MeetIfcvtSizeLimit(*FalseBBI.BB,
+                             FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                           FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+        FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+        Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+        Enqueued = true;
+      }
+
+      if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
+          MeetIfcvtSizeLimit(*FalseBBI.BB,
+                             FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                             FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+          FeasibilityAnalysis(FalseBBI, RevCond)) {
+        Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+        Enqueued = true;
+      }
+    }
+
+    BBI.IsEnqueued = Enqueued;
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    BBStack.pop_back();
+  }
 }
 
 /// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
@@ -1355,15 +1384,9 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   Redefs.addLiveIns(BBI1->BB);
 
   // Remove the duplicated instructions at the beginnings of both paths.
-  MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
-  MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
-  MachineBasicBlock::iterator DIE1 = BBI1->BB->end();
-  MachineBasicBlock::iterator DIE2 = BBI2->BB->end();
   // Skip dbg_value instructions
-  while (DI1 != DIE1 && DI1->isDebugValue())
-    ++DI1;
-  while (DI2 != DIE2 && DI2->isDebugValue())
-    ++DI2;
+  MachineBasicBlock::iterator DI1 = BBI1->BB->getFirstNonDebugInstr();
+  MachineBasicBlock::iterator DI2 = BBI2->BB->getFirstNonDebugInstr();
   BBI1->NonPredSize -= NumDups1;
   BBI2->NonPredSize -= NumDups1;
 
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index b1176ce..a02cd67 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -99,7 +99,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
-} // namespace
+}
 
 bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getSubtarget().getInstrInfo();
@@ -124,6 +124,13 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
     MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList) {
   typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate;
 
+  MDNode *BranchMD =
+      MBB.getBasicBlock()
+          ? MBB.getBasicBlock()->getTerminator()->getMetadata("make.implicit")
+          : nullptr;
+  if (!BranchMD)
+    return false;
+
   MachineBranchPredicate MBP;
 
   if (TII->AnalyzeBranchPredicate(MBB, MBP, true))
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 48c95c9..9989f23 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -181,7 +181,7 @@ private:
   void spillAroundUses(unsigned Reg);
   void spillAll();
 };
-} // namespace
+}
 
 namespace llvm {
 
@@ -194,7 +194,7 @@ Spiller *createInlineSpiller(MachineFunctionPass &pass,
   return new InlineSpiller(pass, mf, vrm);
 }
 
-} // namespace llvm
+}
 
 //===----------------------------------------------------------------------===//
 //                                Snippets
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 6519a80..18aa5c7 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -21,7 +21,7 @@ namespace llvm {
 
 class LiveIntervals;
 
-class InterferenceCache {
+class LLVM_LIBRARY_VISIBILITY InterferenceCache {
   const TargetRegisterInfo *TRI;
   LiveIntervalUnion *LIUArray;
   MachineFunction *MF;
diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp
new file mode 100644
index 0000000..53c8adc
--- /dev/null
+++ b/lib/CodeGen/InterleavedAccessPass.cpp
@@ -0,0 +1,286 @@
+//=----------------------- InterleavedAccessPass.cpp -----------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Interleaved Access pass, which identifies
+// interleaved memory accesses and transforms into target specific intrinsics.
+//
+// An interleaved load reads data from memory into several vectors, with
+// DE-interleaving the data on a factor. An interleaved store writes several
+// vectors to memory with RE-interleaving the data on a factor.
+//
+// As interleaved accesses are hard to be identified in CodeGen (mainly because
+// the VECTOR_SHUFFLE DAG node is quite different from the shufflevector IR),
+// we identify and transform them to intrinsics in this pass. So the intrinsics
+// can be easily matched into target specific instructions later in CodeGen.
+//
+// E.g. An interleaved load (Factor = 2):
+//        %wide.vec = load <8 x i32>, <8 x i32>* %ptr
+//        %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6>
+//        %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7>
+//
+// It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
+// intrinsic in ARM backend.
+//
+// E.g. An interleaved store (Factor = 3):
+//        %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
+//                                    <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+//        store <12 x i32> %i.vec, <12 x i32>* %ptr
+//
+// It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
+// intrinsic in ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "interleaved-access"
+
+static cl::opt<bool> LowerInterleavedAccesses(
+    "lower-interleaved-accesses",
+    cl::desc("Enable lowering interleaved accesses to intrinsics"),
+    cl::init(false), cl::Hidden);
+
+static unsigned MaxFactor; // The maximum supported interleave factor.
+
+namespace llvm {
+static void initializeInterleavedAccessPass(PassRegistry &);
+}
+
+namespace {
+
+class InterleavedAccess : public FunctionPass {
+
+public:
+  static char ID;
+  InterleavedAccess(const TargetMachine *TM = nullptr)
+      : FunctionPass(ID), TM(TM), TLI(nullptr) {
+    initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
+  }
+
+  const char *getPassName() const override { return "Interleaved Access Pass"; }
+
+  bool runOnFunction(Function &F) override;
+
+private:
+  const TargetMachine *TM;
+  const TargetLowering *TLI;
+
+  /// \brief Transform an interleaved load into target specific intrinsics.
+  bool lowerInterleavedLoad(LoadInst *LI,
+                            SmallVector<Instruction *, 32> &DeadInsts);
+
+  /// \brief Transform an interleaved store into target specific intrinsics.
+  bool lowerInterleavedStore(StoreInst *SI,
+                             SmallVector<Instruction *, 32> &DeadInsts);
+};
+} // end anonymous namespace.
+
+char InterleavedAccess::ID = 0;
+INITIALIZE_TM_PASS(InterleavedAccess, "interleaved-access",
+    "Lower interleaved memory accesses to target specific intrinsics",
+    false, false)
+
+FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) {
+  return new InterleavedAccess(TM);
+}
+
+/// \brief Check if the mask is a DE-interleave mask of the given factor
+/// \p Factor like:
+///     <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
+static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
+                                       unsigned &Index) {
+  // Check all potential start indices from 0 to (Factor - 1).
+  for (Index = 0; Index < Factor; Index++) {
+    unsigned i = 0;
+
+    // Check that elements are in ascending order by Factor. Ignore undef
+    // elements.
+    for (; i < Mask.size(); i++)
+      if (Mask[i] >= 0 && static_cast<unsigned>(Mask[i]) != Index + i * Factor)
+        break;
+
+    if (i == Mask.size())
+      return true;
+  }
+
+  return false;
+}
+
+/// \brief Check if the mask is a DE-interleave mask for an interleaved load.
+///
+/// E.g. DE-interleave masks (Factor = 2) could be:
+///     <0, 2, 4, 6>    (mask of index 0 to extract even elements)
+///     <1, 3, 5, 7>    (mask of index 1 to extract odd elements)
+static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
+                               unsigned &Index) {
+  if (Mask.size() < 2)
+    return false;
+
+  // Check potential Factors.
+  for (Factor = 2; Factor <= MaxFactor; Factor++)
+    if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
+      return true;
+
+  return false;
+}
+
+/// \brief Check if the mask is RE-interleave mask for an interleaved store.
+///
+/// I.e. <0, NumSubElts, ... , NumSubElts*(Factor - 1), 1, NumSubElts + 1, ...>
+///
+/// E.g. The RE-interleave mask (Factor = 2) could be:
+///     <0, 4, 1, 5, 2, 6, 3, 7>
+static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor) {
+  unsigned NumElts = Mask.size();
+  if (NumElts < 4)
+    return false;
+
+  // Check potential Factors.
+  for (Factor = 2; Factor <= MaxFactor; Factor++) {
+    if (NumElts % Factor)
+      continue;
+
+    unsigned NumSubElts = NumElts / Factor;
+    if (!isPowerOf2_32(NumSubElts))
+      continue;
+
+    // Check whether each element matchs the RE-interleaved rule. Ignore undef
+    // elements.
+    unsigned i = 0;
+    for (; i < NumElts; i++)
+      if (Mask[i] >= 0 &&
+          static_cast<unsigned>(Mask[i]) !=
+              (i % Factor) * NumSubElts + i / Factor)
+        break;
+
+    // Find a RE-interleaved mask of current factor.
+    if (i == NumElts)
+      return true;
+  }
+
+  return false;
+}
+
+bool InterleavedAccess::lowerInterleavedLoad(
+    LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts) {
+  if (!LI->isSimple())
+    return false;
+
+  SmallVector<ShuffleVectorInst *, 4> Shuffles;
+
+  // Check if all users of this load are shufflevectors.
+  for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) {
+    ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(*UI);
+    if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
+      return false;
+
+    Shuffles.push_back(SVI);
+  }
+
+  if (Shuffles.empty())
+    return false;
+
+  unsigned Factor, Index;
+
+  // Check if the first shufflevector is DE-interleave shuffle.
+  if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index))
+    return false;
+
+  // Holds the corresponding index for each DE-interleave shuffle.
+  SmallVector<unsigned, 4> Indices;
+  Indices.push_back(Index);
+
+  Type *VecTy = Shuffles[0]->getType();
+
+  // Check if other shufflevectors are also DE-interleaved of the same type
+  // and factor as the first shufflevector.
+  for (unsigned i = 1; i < Shuffles.size(); i++) {
+    if (Shuffles[i]->getType() != VecTy)
+      return false;
+
+    if (!isDeInterleaveMaskOfFactor(Shuffles[i]->getShuffleMask(), Factor,
+                                    Index))
+      return false;
+
+    Indices.push_back(Index);
+  }
+
+  DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
+
+  // Try to create target specific intrinsics to replace the load and shuffles.
+  if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor))
+    return false;
+
+  for (auto SVI : Shuffles)
+    DeadInsts.push_back(SVI);
+
+  DeadInsts.push_back(LI);
+  return true;
+}
+
+bool InterleavedAccess::lowerInterleavedStore(
+    StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) {
+  if (!SI->isSimple())
+    return false;
+
+  ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
+  if (!SVI || !SVI->hasOneUse())
+    return false;
+
+  // Check if the shufflevector is RE-interleave shuffle.
+  unsigned Factor;
+  if (!isReInterleaveMask(SVI->getShuffleMask(), Factor))
+    return false;
+
+  DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
+
+  // Try to create target specific intrinsics to replace the store and shuffle.
+  if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
+    return false;
+
+  // Already have a new target specific interleaved store. Erase the old store.
+  DeadInsts.push_back(SI);
+  DeadInsts.push_back(SVI);
+  return true;
+}
+
+bool InterleavedAccess::runOnFunction(Function &F) {
+  if (!TM || !LowerInterleavedAccesses)
+    return false;
+
+  DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
+
+  TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+  MaxFactor = TLI->getMaxSupportedInterleaveFactor();
+
+  // Holds dead instructions that will be erased later.
+  SmallVector<Instruction *, 32> DeadInsts;
+  bool Changed = false;
+
+  for (auto &I : inst_range(F)) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(&I))
+      Changed |= lowerInterleavedLoad(LI, DeadInsts);
+
+    if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+      Changed |= lowerInterleavedStore(SI, DeadInsts);
+  }
+
+  for (auto I : DeadInsts)
+    I->eraseFromParent();
+
+  return Changed;
+}
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index ac2d1a1..694aa17 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -31,7 +31,7 @@ class LiveInterval;
 class LiveIntervals;
 class VirtRegMap;
 
-class LiveDebugVariables : public MachineFunctionPass {
+class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass {
   void *pImpl;
   DenseMap<const Function *, DISubprogram *> FunctionDIs;
 
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index eef7643..cbd98e3 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -14,6 +14,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -123,3 +125,42 @@ void LivePhysRegs::dump() const {
   dbgs() << "  " << *this;
 #endif
 }
+
+/// Add live-in registers of basic block \p MBB to \p LiveRegs.
+static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) {
+  for (unsigned Reg : make_range(MBB.livein_begin(), MBB.livein_end()))
+    LiveRegs.addReg(Reg);
+}
+
+/// Add pristine registers to the given \p LiveRegs. This function removes
+/// actually saved callee save registers when \p InPrologueEpilogue is false.
+static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
+                         const TargetRegisterInfo &TRI) {
+  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  if (!MFI.isCalleeSavedInfoValid())
+    return;
+
+  for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+    LiveRegs.addReg(*CSR);
+  for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+    LiveRegs.removeReg(Info.getReg());
+}
+
+void LivePhysRegs::addLiveOuts(const MachineBasicBlock *MBB,
+                               bool AddPristines) {
+  if (AddPristines) {
+    const MachineFunction &MF = *MBB->getParent();
+    addPristines(*this, MF, *TRI);
+  }
+  for (const MachineBasicBlock *Succ : MBB->successors())
+    ::addLiveIns(*this, *Succ);
+}
+
+void LivePhysRegs::addLiveIns(const MachineBasicBlock *MBB,
+                              bool AddPristines) {
+  if (AddPristines) {
+    const MachineFunction &MF = *MBB->getParent();
+    addPristines(*this, MF, *TRI);
+  }
+  ::addLiveIns(*this, *MBB);
+}
diff --git a/lib/CodeGen/MIRParser/CMakeLists.txt b/lib/CodeGen/MIRParser/CMakeLists.txt
index 468f072..7e757f6 100644
--- a/lib/CodeGen/MIRParser/CMakeLists.txt
+++ b/lib/CodeGen/MIRParser/CMakeLists.txt
@@ -1,4 +1,6 @@
 add_llvm_library(LLVMMIRParser
+  MILexer.cpp
+  MIParser.cpp
   MIRParser.cpp
   )
 
diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp
new file mode 100644
index 0000000..e9b3916
--- /dev/null
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@@ -0,0 +1,199 @@
+//===- MILexer.cpp - Machine instructions lexer implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lexing of machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MILexer.h"
+#include "llvm/ADT/Twine.h"
+#include <cctype>
+
+using namespace llvm;
+
+namespace {
+
+/// This class provides a way to iterate and get characters from the source
+/// string.
+class Cursor {
+  const char *Ptr;
+  const char *End;
+
+public:
+  Cursor(NoneType) : Ptr(nullptr), End(nullptr) {}
+
+  explicit Cursor(StringRef Str) {
+    Ptr = Str.data();
+    End = Ptr + Str.size();
+  }
+
+  bool isEOF() const { return Ptr == End; }
+
+  char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; }
+
+  void advance(unsigned I = 1) { Ptr += I; }
+
+  StringRef remaining() const { return StringRef(Ptr, End - Ptr); }
+
+  StringRef upto(Cursor C) const {
+    assert(C.Ptr >= Ptr && C.Ptr <= End);
+    return StringRef(Ptr, C.Ptr - Ptr);
+  }
+
+  StringRef::iterator location() const { return Ptr; }
+
+  operator bool() const { return Ptr != nullptr; }
+};
+
+} // end anonymous namespace
+
+/// Skip the leading whitespace characters and return the updated cursor.
+static Cursor skipWhitespace(Cursor C) {
+  while (isspace(C.peek()))
+    C.advance();
+  return C;
+}
+
+static bool isIdentifierChar(char C) {
+  return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.';
+}
+
+static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
+  if (!isalpha(C.peek()) && C.peek() != '_')
+    return None;
+  auto Range = C;
+  while (isIdentifierChar(C.peek()))
+    C.advance();
+  auto Identifier = Range.upto(C);
+  Token = MIToken(Identifier == "_" ? MIToken::underscore : MIToken::Identifier,
+                  Identifier);
+  return C;
+}
+
+static Cursor maybeLexMachineBasicBlock(
+    Cursor C, MIToken &Token,
+    function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+  if (!C.remaining().startswith("%bb."))
+    return None;
+  auto Range = C;
+  C.advance(4); // Skip '%bb.'
+  if (!isdigit(C.peek())) {
+    Token = MIToken(MIToken::Error, C.remaining());
+    ErrorCallback(C.location(), "expected a number after '%bb.'");
+    return C;
+  }
+  auto NumberRange = C;
+  while (isdigit(C.peek()))
+    C.advance();
+  StringRef Number = NumberRange.upto(C);
+  unsigned StringOffset = 4 + Number.size(); // Drop '%bb.<id>'
+  if (C.peek() == '.') {
+    C.advance(); // Skip '.'
+    ++StringOffset;
+    while (isIdentifierChar(C.peek()))
+      C.advance();
+  }
+  Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), APSInt(Number),
+                  StringOffset);
+  return C;
+}
+
+static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
+  if (C.peek() != '%')
+    return None;
+  auto Range = C;
+  C.advance(); // Skip '%'
+  while (isIdentifierChar(C.peek()))
+    C.advance();
+  Token = MIToken(MIToken::NamedRegister, Range.upto(C),
+                  /*StringOffset=*/1); // Drop the '%'
+  return C;
+}
+
+static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token) {
+  if (C.peek() != '@')
+    return None;
+  auto Range = C;
+  C.advance(); // Skip the '@'
+  // TODO: add support for quoted names.
+  if (!isdigit(C.peek())) {
+    while (isIdentifierChar(C.peek()))
+      C.advance();
+    Token = MIToken(MIToken::NamedGlobalValue, Range.upto(C),
+                    /*StringOffset=*/1); // Drop the '@'
+    return C;
+  }
+  auto NumberRange = C;
+  while (isdigit(C.peek()))
+    C.advance();
+  Token =
+      MIToken(MIToken::GlobalValue, Range.upto(C), APSInt(NumberRange.upto(C)));
+  return C;
+}
+
+static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) {
+  if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
+    return None;
+  auto Range = C;
+  C.advance();
+  while (isdigit(C.peek()))
+    C.advance();
+  StringRef StrVal = Range.upto(C);
+  Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal));
+  return C;
+}
+
+static MIToken::TokenKind symbolToken(char C) {
+  switch (C) {
+  case ',':
+    return MIToken::comma;
+  case '=':
+    return MIToken::equal;
+  default:
+    return MIToken::Error;
+  }
+}
+
+static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
+  auto Kind = symbolToken(C.peek());
+  if (Kind == MIToken::Error)
+    return None;
+  auto Range = C;
+  C.advance();
+  Token = MIToken(Kind, Range.upto(C));
+  return C;
+}
+
+StringRef llvm::lexMIToken(
+    StringRef Source, MIToken &Token,
+    function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+  auto C = skipWhitespace(Cursor(Source));
+  if (C.isEOF()) {
+    Token = MIToken(MIToken::Eof, C.remaining());
+    return C.remaining();
+  }
+
+  if (Cursor R = maybeLexIdentifier(C, Token))
+    return R.remaining();
+  if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
+    return R.remaining();
+  if (Cursor R = maybeLexRegister(C, Token))
+    return R.remaining();
+  if (Cursor R = maybeLexGlobalValue(C, Token))
+    return R.remaining();
+  if (Cursor R = maybeLexIntegerLiteral(C, Token))
+    return R.remaining();
+  if (Cursor R = maybeLexSymbol(C, Token))
+    return R.remaining();
+
+  Token = MIToken(MIToken::Error, C.remaining());
+  ErrorCallback(C.location(),
+                Twine("unexpected character '") + Twine(C.peek()) + "'");
+  return C.remaining();
+}
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
new file mode 100644
index 0000000..c28935f
--- /dev/null
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -0,0 +1,96 @@
+//===- MILexer.h - Lexer for machine instructions -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the function that lexes the machine instruction source
+// string.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
+#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
+
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include <functional>
+
+namespace llvm {
+
+class Twine;
+
+/// A token produced by the machine instruction lexer.
+struct MIToken {
+  enum TokenKind {
+    // Markers
+    Eof,
+    Error,
+
+    // Tokens with no info.
+    comma,
+    equal,
+    underscore,
+
+    // Identifier tokens
+    Identifier,
+    NamedRegister,
+    MachineBasicBlock,
+    NamedGlobalValue,
+    GlobalValue,
+
+    // Other tokens
+    IntegerLiteral
+  };
+
+private:
+  TokenKind Kind;
+  unsigned StringOffset;
+  StringRef Range;
+  APSInt IntVal;
+
+public:
+  MIToken(TokenKind Kind, StringRef Range, unsigned StringOffset = 0)
+      : Kind(Kind), StringOffset(StringOffset), Range(Range) {}
+
+  MIToken(TokenKind Kind, StringRef Range, const APSInt &IntVal,
+          unsigned StringOffset = 0)
+      : Kind(Kind), StringOffset(StringOffset), Range(Range), IntVal(IntVal) {}
+
+  TokenKind kind() const { return Kind; }
+
+  bool isError() const { return Kind == Error; }
+
+  bool isRegister() const {
+    return Kind == NamedRegister || Kind == underscore;
+  }
+
+  bool is(TokenKind K) const { return Kind == K; }
+
+  bool isNot(TokenKind K) const { return Kind != K; }
+
+  StringRef::iterator location() const { return Range.begin(); }
+
+  StringRef stringValue() const { return Range.drop_front(StringOffset); }
+
+  const APSInt &integerValue() const { return IntVal; }
+
+  bool hasIntegerValue() const {
+    return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
+           Kind == GlobalValue;
+  }
+};
+
+/// Consume a single machine instruction token in the given source and return
+/// the remaining source string.
+StringRef lexMIToken(
+    StringRef Source, MIToken &Token,
+    function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
new file mode 100644
index 0000000..b618e53
--- /dev/null
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -0,0 +1,423 @@
+//===- MIParser.cpp - Machine instructions parser implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the parsing of machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MIParser.h"
+#include "MILexer.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class MIParser {
+  SourceMgr &SM;
+  MachineFunction &MF;
+  SMDiagnostic &Error;
+  StringRef Source, CurrentSource;
+  MIToken Token;
+  /// Maps from basic block numbers to MBBs.
+  const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots;
+  /// Maps from indices to unnamed global values and metadata nodes.
+  const SlotMapping &IRSlots;
+  /// Maps from instruction names to op codes.
+  StringMap<unsigned> Names2InstrOpCodes;
+  /// Maps from register names to registers.
+  StringMap<unsigned> Names2Regs;
+  /// Maps from register mask names to register masks.
+  StringMap<const uint32_t *> Names2RegMasks;
+
+public:
+  MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
+           StringRef Source,
+           const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+           const SlotMapping &IRSlots);
+
+  void lex();
+
+  /// Report an error at the current location with the given message.
+  ///
+  /// This function always return true.
+  bool error(const Twine &Msg);
+
+  /// Report an error at the given location with the given message.
+  ///
+  /// This function always return true.
+  bool error(StringRef::iterator Loc, const Twine &Msg);
+
+  bool parse(MachineInstr *&MI);
+  bool parseMBB(MachineBasicBlock *&MBB);
+
+  bool parseRegister(unsigned &Reg);
+  bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false);
+  bool parseImmediateOperand(MachineOperand &Dest);
+  bool parseMBBReference(MachineBasicBlock *&MBB);
+  bool parseMBBOperand(MachineOperand &Dest);
+  bool parseGlobalAddressOperand(MachineOperand &Dest);
+  bool parseMachineOperand(MachineOperand &Dest);
+
+private:
+  /// Convert the integer literal in the current token into an unsigned integer.
+  ///
+  /// Return true if an error occurred.
+  bool getUnsigned(unsigned &Result);
+
+  void initNames2InstrOpCodes();
+
+  /// Try to convert an instruction name to an opcode. Return true if the
+  /// instruction name is invalid.
+  bool parseInstrName(StringRef InstrName, unsigned &OpCode);
+
+  bool parseInstruction(unsigned &OpCode);
+
+  void initNames2Regs();
+
+  /// Try to convert a register name to a register number. Return true if the
+  /// register name is invalid.
+  bool getRegisterByName(StringRef RegName, unsigned &Reg);
+
+  void initNames2RegMasks();
+
+  /// Check if the given identifier is a name of a register mask.
+  ///
+  /// Return null if the identifier isn't a register mask.
+  const uint32_t *getRegMask(StringRef Identifier);
+};
+
+} // end anonymous namespace
+
+MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
+                   StringRef Source,
+                   const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+                   const SlotMapping &IRSlots)
+    : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source),
+      Token(MIToken::Error, StringRef()), MBBSlots(MBBSlots), IRSlots(IRSlots) {
+}
+
+void MIParser::lex() {
+  CurrentSource = lexMIToken(
+      CurrentSource, Token,
+      [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); });
+}
+
+bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); }
+
+bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
+  // TODO: Get the proper location in the MIR file, not just a location inside
+  // the string.
+  assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
+  Error = SMDiagnostic(
+      SM, SMLoc(),
+      SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1,
+      Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None);
+  return true;
+}
+
+bool MIParser::parse(MachineInstr *&MI) {
+  lex();
+
+  // Parse any register operands before '='
+  // TODO: Allow parsing of multiple operands before '='
+  MachineOperand MO = MachineOperand::CreateImm(0);
+  SmallVector<MachineOperand, 8> Operands;
+  if (Token.isRegister()) {
+    if (parseRegisterOperand(MO, /*IsDef=*/true))
+      return true;
+    Operands.push_back(MO);
+    if (Token.isNot(MIToken::equal))
+      return error("expected '='");
+    lex();
+  }
+
+  unsigned OpCode;
+  if (Token.isError() || parseInstruction(OpCode))
+    return true;
+
+  // TODO: Parse the instruction flags and memory operands.
+
+  // Parse the remaining machine operands.
+  while (Token.isNot(MIToken::Eof)) {
+    if (parseMachineOperand(MO))
+      return true;
+    Operands.push_back(MO);
+    if (Token.is(MIToken::Eof))
+      break;
+    if (Token.isNot(MIToken::comma))
+      return error("expected ',' before the next machine operand");
+    lex();
+  }
+
+  const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
+
+  // Verify machine operands.
+  if (!MCID.isVariadic()) {
+    for (size_t I = 0, E = Operands.size(); I < E; ++I) {
+      if (I < MCID.getNumOperands())
+        continue;
+      // Mark this register as implicit to prevent an assertion when it's added
+      // to an instruction. This is a temporary workaround until the implicit
+      // register flag can be parsed.
+      if (Operands[I].isReg())
+        Operands[I].setImplicit();
+    }
+  }
+
+  // TODO: Determine the implicit behaviour when implicit register flags are
+  // parsed.
+  MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true);
+  for (const auto &Operand : Operands)
+    MI->addOperand(MF, Operand);
+  return false;
+}
+
+bool MIParser::parseMBB(MachineBasicBlock *&MBB) {
+  lex();
+  if (Token.isNot(MIToken::MachineBasicBlock))
+    return error("expected a machine basic block reference");
+  if (parseMBBReference(MBB))
+    return true;
+  lex();
+  if (Token.isNot(MIToken::Eof))
+    return error(
+        "expected end of string after the machine basic block reference");
+  return false;
+}
+
+bool MIParser::parseInstruction(unsigned &OpCode) {
+  if (Token.isNot(MIToken::Identifier))
+    return error("expected a machine instruction");
+  StringRef InstrName = Token.stringValue();
+  if (parseInstrName(InstrName, OpCode))
+    return error(Twine("unknown machine instruction name '") + InstrName + "'");
+  lex();
+  return false;
+}
+
+bool MIParser::parseRegister(unsigned &Reg) {
+  switch (Token.kind()) {
+  case MIToken::underscore:
+    Reg = 0;
+    break;
+  case MIToken::NamedRegister: {
+    StringRef Name = Token.stringValue();
+    if (getRegisterByName(Name, Reg))
+      return error(Twine("unknown register name '") + Name + "'");
+    break;
+  }
+  // TODO: Parse other register kinds.
+  default:
+    llvm_unreachable("The current token should be a register");
+  }
+  return false;
+}
+
+bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) {
+  unsigned Reg;
+  // TODO: Parse register flags.
+  if (parseRegister(Reg))
+    return true;
+  lex();
+  // TODO: Parse subregister.
+  Dest = MachineOperand::CreateReg(Reg, IsDef);
+  return false;
+}
+
+bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::IntegerLiteral));
+  const APSInt &Int = Token.integerValue();
+  if (Int.getMinSignedBits() > 64)
+    // TODO: Replace this with an error when we can parse CIMM Machine Operands.
+    llvm_unreachable("Can't parse large integer literals yet!");
+  Dest = MachineOperand::CreateImm(Int.getExtValue());
+  lex();
+  return false;
+}
+
+bool MIParser::getUnsigned(unsigned &Result) {
+  assert(Token.hasIntegerValue() && "Expected a token with an integer value");
+  const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
+  uint64_t Val64 = Token.integerValue().getLimitedValue(Limit);
+  if (Val64 == Limit)
+    return error("expected 32-bit integer (too large)");
+  Result = Val64;
+  return false;
+}
+
+bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
+  assert(Token.is(MIToken::MachineBasicBlock));
+  unsigned Number;
+  if (getUnsigned(Number))
+    return true;
+  auto MBBInfo = MBBSlots.find(Number);
+  if (MBBInfo == MBBSlots.end())
+    return error(Twine("use of undefined machine basic block #") +
+                 Twine(Number));
+  MBB = MBBInfo->second;
+  if (!Token.stringValue().empty() && Token.stringValue() != MBB->getName())
+    return error(Twine("the name of machine basic block #") + Twine(Number) +
+                 " isn't '" + Token.stringValue() + "'");
+  return false;
+}
+
+bool MIParser::parseMBBOperand(MachineOperand &Dest) {
+  MachineBasicBlock *MBB;
+  if (parseMBBReference(MBB))
+    return true;
+  Dest = MachineOperand::CreateMBB(MBB);
+  lex();
+  return false;
+}
+
+bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
+  switch (Token.kind()) {
+  case MIToken::NamedGlobalValue: {
+    auto Name = Token.stringValue();
+    const Module *M = MF.getFunction()->getParent();
+    if (const auto *GV = M->getNamedValue(Name)) {
+      Dest = MachineOperand::CreateGA(GV, /*Offset=*/0);
+      break;
+    }
+    return error(Twine("use of undefined global value '@") + Name + "'");
+  }
+  case MIToken::GlobalValue: {
+    unsigned GVIdx;
+    if (getUnsigned(GVIdx))
+      return true;
+    if (GVIdx >= IRSlots.GlobalValues.size())
+      return error(Twine("use of undefined global value '@") + Twine(GVIdx) +
+                   "'");
+    Dest = MachineOperand::CreateGA(IRSlots.GlobalValues[GVIdx],
+                                    /*Offset=*/0);
+    break;
+  }
+  default:
+    llvm_unreachable("The current token should be a global value");
+  }
+  // TODO: Parse offset and target flags.
+  lex();
+  return false;
+}
+
+bool MIParser::parseMachineOperand(MachineOperand &Dest) {
+  switch (Token.kind()) {
+  case MIToken::underscore:
+  case MIToken::NamedRegister:
+    return parseRegisterOperand(Dest);
+  case MIToken::IntegerLiteral:
+    return parseImmediateOperand(Dest);
+  case MIToken::MachineBasicBlock:
+    return parseMBBOperand(Dest);
+  case MIToken::GlobalValue:
+  case MIToken::NamedGlobalValue:
+    return parseGlobalAddressOperand(Dest);
+  case MIToken::Error:
+    return true;
+  case MIToken::Identifier:
+    if (const auto *RegMask = getRegMask(Token.stringValue())) {
+      Dest = MachineOperand::CreateRegMask(RegMask);
+      lex();
+      break;
+    }
+  // fallthrough
+  default:
+    // TODO: parse the other machine operands.
+    return error("expected a machine operand");
+  }
+  return false;
+}
+
+void MIParser::initNames2InstrOpCodes() {
+  if (!Names2InstrOpCodes.empty())
+    return;
+  const auto *TII = MF.getSubtarget().getInstrInfo();
+  assert(TII && "Expected target instruction info");
+  for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I)
+    Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I));
+}
+
+bool MIParser::parseInstrName(StringRef InstrName, unsigned &OpCode) {
+  initNames2InstrOpCodes();
+  auto InstrInfo = Names2InstrOpCodes.find(InstrName);
+  if (InstrInfo == Names2InstrOpCodes.end())
+    return true;
+  OpCode = InstrInfo->getValue();
+  return false;
+}
+
+void MIParser::initNames2Regs() {
+  if (!Names2Regs.empty())
+    return;
+  // The '%noreg' register is the register 0.
+  Names2Regs.insert(std::make_pair("noreg", 0));
+  const auto *TRI = MF.getSubtarget().getRegisterInfo();
+  assert(TRI && "Expected target register info");
+  for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) {
+    bool WasInserted =
+        Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I))
+            .second;
+    (void)WasInserted;
+    assert(WasInserted && "Expected registers to be unique case-insensitively");
+  }
+}
+
+bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) {
+  initNames2Regs();
+  auto RegInfo = Names2Regs.find(RegName);
+  if (RegInfo == Names2Regs.end())
+    return true;
+  Reg = RegInfo->getValue();
+  return false;
+}
+
+void MIParser::initNames2RegMasks() {
+  if (!Names2RegMasks.empty())
+    return;
+  const auto *TRI = MF.getSubtarget().getRegisterInfo();
+  assert(TRI && "Expected target register info");
+  ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks();
+  ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames();
+  assert(RegMasks.size() == RegMaskNames.size());
+  for (size_t I = 0, E = RegMasks.size(); I < E; ++I)
+    Names2RegMasks.insert(
+        std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I]));
+}
+
+const uint32_t *MIParser::getRegMask(StringRef Identifier) {
+  initNames2RegMasks();
+  auto RegMaskInfo = Names2RegMasks.find(Identifier);
+  if (RegMaskInfo == Names2RegMasks.end())
+    return nullptr;
+  return RegMaskInfo->getValue();
+}
+
+bool llvm::parseMachineInstr(
+    MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF, StringRef Src,
+    const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+    const SlotMapping &IRSlots, SMDiagnostic &Error) {
+  return MIParser(SM, MF, Error, Src, MBBSlots, IRSlots).parse(MI);
+}
+
+bool llvm::parseMBBReference(
+    MachineBasicBlock *&MBB, SourceMgr &SM, MachineFunction &MF, StringRef Src,
+    const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+    const SlotMapping &IRSlots, SMDiagnostic &Error) {
+  return MIParser(SM, MF, Error, Src, MBBSlots, IRSlots).parseMBB(MBB);
+}
diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h
new file mode 100644
index 0000000..4d6d4e7
--- /dev/null
+++ b/lib/CodeGen/MIRParser/MIParser.h
@@ -0,0 +1,41 @@
+//===- MIParser.h - Machine Instructions Parser ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the function that parses the machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
+#define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineInstr;
+class MachineFunction;
+struct SlotMapping;
+class SMDiagnostic;
+class SourceMgr;
+
+bool parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF,
+                       StringRef Src,
+                       const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+                       const SlotMapping &IRSlots, SMDiagnostic &Error);
+
+bool parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM,
+                       MachineFunction &MF, StringRef Src,
+                       const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+                       const SlotMapping &IRSlots, SMDiagnostic &Error);
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index 1fef3f6..3974583 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -13,11 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MIRParser/MIRParser.h"
+#include "MIParser.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/AsmParser/Parser.h"
+#include "llvm/AsmParser/SlotMapping.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DiagnosticInfo.h"
@@ -43,6 +47,7 @@ class MIRParserImpl {
   StringRef Filename;
   LLVMContext &Context;
   StringMap<std::unique_ptr<yaml::MachineFunction>> Functions;
+  SlotMapping IRSlots;
 
 public:
   MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
@@ -55,6 +60,12 @@ public:
   /// Always returns true.
   bool error(const Twine &Message);
 
+  /// Report a given error with the location translated from the location in an
+  /// embedded string literal to a location in the MIR file.
+  ///
+  /// Always returns true.
+  bool error(const SMDiagnostic &Error, SMRange SourceRange);
+
   /// Try to parse the optional LLVM module and the machine functions in the MIR
   /// file.
   ///
@@ -79,10 +90,19 @@ public:
   /// Initialize the machine basic block using it's YAML representation.
   ///
   /// Return true if an error occurred.
-  bool initializeMachineBasicBlock(MachineBasicBlock &MBB,
-                                   const yaml::MachineBasicBlock &YamlMBB);
+  bool initializeMachineBasicBlock(
+      MachineFunction &MF, MachineBasicBlock &MBB,
+      const yaml::MachineBasicBlock &YamlMBB,
+      const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
+
+  bool initializeRegisterInfo(MachineRegisterInfo &RegInfo,
+                              const yaml::MachineFunction &YamlMF);
 
 private:
+  /// Return a MIR diagnostic converted from an MI string diagnostic.
+  SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error,
+                                    SMRange SourceRange);
+
   /// Return a MIR diagnostic converted from an LLVM assembly diagnostic.
   SMDiagnostic diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
                                         SMRange SourceRange);
@@ -105,6 +125,12 @@ bool MIRParserImpl::error(const Twine &Message) {
   return true;
 }
 
+bool MIRParserImpl::error(const SMDiagnostic &Error, SMRange SourceRange) {
+  assert(Error.getKind() == SourceMgr::DK_Error && "Expected an error");
+  reportDiagnostic(diagFromMIStringDiag(Error, SourceRange));
+  return true;
+}
+
 void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) {
   DiagnosticSeverity Kind;
   switch (Diag.getKind()) {
@@ -128,6 +154,7 @@ static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) {
 std::unique_ptr<Module> MIRParserImpl::parse() {
   yaml::Input In(SM.getMemoryBuffer(SM.getMainFileID())->getBuffer(),
                  /*Ctxt=*/nullptr, handleYAMLDiag, this);
+  In.setContext(&In);
 
   if (!In.setCurrentDocument()) {
     if (In.error())
@@ -144,7 +171,7 @@ std::unique_ptr<Module> MIRParserImpl::parse() {
           dyn_cast_or_null<yaml::BlockScalarNode>(In.getCurrentNode())) {
     SMDiagnostic Error;
     M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error,
-                      Context);
+                      Context, &IRSlots);
     if (!M) {
       reportDiagnostic(diagFromLLVMAssemblyDiag(Error, BSN->getSourceRange()));
       return M;
@@ -206,7 +233,11 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
     MF.setAlignment(YamlMF.Alignment);
   MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
   MF.setHasInlineAsm(YamlMF.HasInlineAsm);
+  if (initializeRegisterInfo(MF.getRegInfo(), YamlMF))
+    return true;
+
   const auto &F = *MF.getFunction();
+  DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
   for (const auto &YamlMBB : YamlMF.BasicBlocks) {
     const BasicBlock *BB = nullptr;
     if (!YamlMBB.Name.empty()) {
@@ -218,21 +249,79 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
     }
     auto *MBB = MF.CreateMachineBasicBlock(BB);
     MF.insert(MF.end(), MBB);
-    if (initializeMachineBasicBlock(*MBB, YamlMBB))
+    bool WasInserted = MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second;
+    if (!WasInserted)
+      return error(Twine("redefinition of machine basic block with id #") +
+                   Twine(YamlMBB.ID));
+  }
+
+  // Initialize the machine basic blocks after creating them all so that the
+  // machine instructions parser can resolve the MBB references.
+  unsigned I = 0;
+  for (const auto &YamlMBB : YamlMF.BasicBlocks) {
+    if (initializeMachineBasicBlock(MF, *MF.getBlockNumbered(I++), YamlMBB,
+                                    MBBSlots))
       return true;
   }
   return false;
 }
 
 bool MIRParserImpl::initializeMachineBasicBlock(
-    MachineBasicBlock &MBB, const yaml::MachineBasicBlock &YamlMBB) {
+    MachineFunction &MF, MachineBasicBlock &MBB,
+    const yaml::MachineBasicBlock &YamlMBB,
+    const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
   MBB.setAlignment(YamlMBB.Alignment);
   if (YamlMBB.AddressTaken)
     MBB.setHasAddressTaken();
   MBB.setIsLandingPad(YamlMBB.IsLandingPad);
+  SMDiagnostic Error;
+  // Parse the successors.
+  for (const auto &MBBSource : YamlMBB.Successors) {
+    MachineBasicBlock *SuccMBB = nullptr;
+    if (parseMBBReference(SuccMBB, SM, MF, MBBSource.Value, MBBSlots, IRSlots,
+                          Error))
+      return error(Error, MBBSource.SourceRange);
+    // TODO: Report an error when adding the same successor more than once.
+    MBB.addSuccessor(SuccMBB);
+  }
+  // Parse the instructions.
+  for (const auto &MISource : YamlMBB.Instructions) {
+    MachineInstr *MI = nullptr;
+    if (parseMachineInstr(MI, SM, MF, MISource.Value, MBBSlots, IRSlots, Error))
+      return error(Error, MISource.SourceRange);
+    MBB.insert(MBB.end(), MI);
+  }
   return false;
 }
 
+bool MIRParserImpl::initializeRegisterInfo(
+    MachineRegisterInfo &RegInfo, const yaml::MachineFunction &YamlMF) {
+  assert(RegInfo.isSSA());
+  if (!YamlMF.IsSSA)
+    RegInfo.leaveSSA();
+  assert(RegInfo.tracksLiveness());
+  if (!YamlMF.TracksRegLiveness)
+    RegInfo.invalidateLiveness();
+  RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness);
+  return false;
+}
+
+SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error,
+                                                 SMRange SourceRange) {
+  assert(SourceRange.isValid() && "Invalid source range");
+  SMLoc Loc = SourceRange.Start;
+  bool HasQuote = Loc.getPointer() < SourceRange.End.getPointer() &&
+                  *Loc.getPointer() == '\'';
+  // Translate the location of the error from the location in the MI string to
+  // the corresponding location in the MIR file.
+  Loc = Loc.getFromPointer(Loc.getPointer() + Error.getColumnNo() +
+                           (HasQuote ? 1 : 0));
+
+  // TODO: Translate any source ranges as well.
+  return SM.GetMessage(Loc, Error.getKind(), Error.getMessage(), None,
+                       Error.getFixIts());
+}
+
 SMDiagnostic MIRParserImpl::diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
                                                      SMRange SourceRange) {
   assert(SourceRange.isValid());
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index bbf163a..76cbe29 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -15,12 +15,15 @@
 #include "MIRPrinter.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/YAMLTraits.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -30,13 +33,36 @@ namespace {
 /// format.
 class MIRPrinter {
   raw_ostream &OS;
+  DenseMap<const uint32_t *, unsigned> RegisterMaskIds;
 
 public:
   MIRPrinter(raw_ostream &OS) : OS(OS) {}
 
   void print(const MachineFunction &MF);
 
-  void convert(yaml::MachineBasicBlock &YamlMBB, const MachineBasicBlock &MBB);
+  void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo);
+  void convert(const Module &M, yaml::MachineBasicBlock &YamlMBB,
+               const MachineBasicBlock &MBB);
+
+private:
+  void initRegisterMaskIds(const MachineFunction &MF);
+};
+
+/// This class prints out the machine instructions using the MIR serialization
+/// format.
+class MIPrinter {
+  const Module &M;
+  raw_ostream &OS;
+  const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds;
+
+public:
+  MIPrinter(const Module &M, raw_ostream &OS,
+            const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds)
+      : M(M), OS(OS), RegisterMaskIds(RegisterMaskIds) {}
+
+  void print(const MachineInstr &MI);
+  void printMBBReference(const MachineBasicBlock &MBB);
+  void print(const MachineOperand &Op, const TargetRegisterInfo *TRI);
 };
 
 } // end anonymous namespace
@@ -59,22 +85,44 @@ template <> struct BlockScalarTraits<Module> {
 } // end namespace llvm
 
 void MIRPrinter::print(const MachineFunction &MF) {
+  initRegisterMaskIds(MF);
+
   yaml::MachineFunction YamlMF;
   YamlMF.Name = MF.getName();
   YamlMF.Alignment = MF.getAlignment();
   YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
   YamlMF.HasInlineAsm = MF.hasInlineAsm();
+  convert(YamlMF, MF.getRegInfo());
+
+  int I = 0;
+  const auto &M = *MF.getFunction()->getParent();
   for (const auto &MBB : MF) {
+    // TODO: Allow printing of non sequentially numbered MBBs.
+    // This is currently needed as the basic block references get their index
+    // from MBB.getNumber(), thus it should be sequential so that the parser can
+    // map back to the correct MBBs when parsing the output.
+    assert(MBB.getNumber() == I++ &&
+           "Can't print MBBs that aren't sequentially numbered");
+    (void)I;
     yaml::MachineBasicBlock YamlMBB;
-    convert(YamlMBB, MBB);
+    convert(M, YamlMBB, MBB);
     YamlMF.BasicBlocks.push_back(YamlMBB);
   }
   yaml::Output Out(OS);
   Out << YamlMF;
 }
 
-void MIRPrinter::convert(yaml::MachineBasicBlock &YamlMBB,
+void MIRPrinter::convert(yaml::MachineFunction &MF,
+                         const MachineRegisterInfo &RegInfo) {
+  MF.IsSSA = RegInfo.isSSA();
+  MF.TracksRegLiveness = RegInfo.tracksLiveness();
+  MF.TracksSubRegLiveness = RegInfo.subRegLivenessEnabled();
+}
+
+void MIRPrinter::convert(const Module &M, yaml::MachineBasicBlock &YamlMBB,
                          const MachineBasicBlock &MBB) {
+  assert(MBB.getNumber() >= 0 && "Invalid MBB number");
+  YamlMBB.ID = (unsigned)MBB.getNumber();
   // TODO: Serialize unnamed BB references.
   if (const auto *BB = MBB.getBasicBlock())
     YamlMBB.Name = BB->hasName() ? BB->getName() : "<unnamed bb>";
@@ -83,6 +131,115 @@ void MIRPrinter::convert(yaml::MachineBasicBlock &YamlMBB,
   YamlMBB.Alignment = MBB.getAlignment();
   YamlMBB.AddressTaken = MBB.hasAddressTaken();
   YamlMBB.IsLandingPad = MBB.isLandingPad();
+  for (const auto *SuccMBB : MBB.successors()) {
+    std::string Str;
+    raw_string_ostream StrOS(Str);
+    MIPrinter(M, StrOS, RegisterMaskIds).printMBBReference(*SuccMBB);
+    YamlMBB.Successors.push_back(StrOS.str());
+  }
+
+  // Print the machine instructions.
+  YamlMBB.Instructions.reserve(MBB.size());
+  std::string Str;
+  for (const auto &MI : MBB) {
+    raw_string_ostream StrOS(Str);
+    MIPrinter(M, StrOS, RegisterMaskIds).print(MI);
+    YamlMBB.Instructions.push_back(StrOS.str());
+    Str.clear();
+  }
+}
+
+void MIRPrinter::initRegisterMaskIds(const MachineFunction &MF) {
+  const auto *TRI = MF.getSubtarget().getRegisterInfo();
+  unsigned I = 0;
+  for (const uint32_t *Mask : TRI->getRegMasks())
+    RegisterMaskIds.insert(std::make_pair(Mask, I++));
+}
+
+void MIPrinter::print(const MachineInstr &MI) {
+  const auto &SubTarget = MI.getParent()->getParent()->getSubtarget();
+  const auto *TRI = SubTarget.getRegisterInfo();
+  assert(TRI && "Expected target register info");
+  const auto *TII = SubTarget.getInstrInfo();
+  assert(TII && "Expected target instruction info");
+
+  unsigned I = 0, E = MI.getNumOperands();
+  for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
+         !MI.getOperand(I).isImplicit();
+       ++I) {
+    if (I)
+      OS << ", ";
+    print(MI.getOperand(I), TRI);
+  }
+
+  if (I)
+    OS << " = ";
+  OS << TII->getName(MI.getOpcode());
+  // TODO: Print the instruction flags, machine mem operands.
+  if (I < E)
+    OS << ' ';
+
+  bool NeedComma = false;
+  for (; I < E; ++I) {
+    if (NeedComma)
+      OS << ", ";
+    print(MI.getOperand(I), TRI);
+    NeedComma = true;
+  }
+}
+
+static void printReg(unsigned Reg, raw_ostream &OS,
+                     const TargetRegisterInfo *TRI) {
+  // TODO: Print Stack Slots.
+  // TODO: Print virtual registers.
+  if (!Reg)
+    OS << '_';
+  else if (Reg < TRI->getNumRegs())
+    OS << '%' << StringRef(TRI->getName(Reg)).lower();
+  else
+    llvm_unreachable("Can't print this kind of register yet");
+}
+
+void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
+  OS << "%bb." << MBB.getNumber();
+  if (const auto *BB = MBB.getBasicBlock()) {
+    if (BB->hasName())
+      OS << '.' << BB->getName();
+  }
+}
+
+void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) {
+  switch (Op.getType()) {
+  case MachineOperand::MO_Register:
+    // TODO: Print register flags.
+    printReg(Op.getReg(), OS, TRI);
+    // TODO: Print sub register.
+    break;
+  case MachineOperand::MO_Immediate:
+    OS << Op.getImm();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    printMBBReference(*Op.getMBB());
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    // FIXME: Make this faster - print as operand will create a slot tracker to
+    // print unnamed values for the whole module every time it's called, which
+    // is inefficient.
+    Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, &M);
+    // TODO: Print offset and target flags.
+    break;
+  case MachineOperand::MO_RegisterMask: {
+    auto RegMaskInfo = RegisterMaskIds.find(Op.getRegMask());
+    if (RegMaskInfo != RegisterMaskIds.end())
+      OS << StringRef(TRI->getRegMaskNames()[RegMaskInfo->second]).lower();
+    else
+      llvm_unreachable("Can't print this machine register mask yet.");
+    break;
+  }
+  default:
+    // TODO: Print the other machine operands.
+    llvm_unreachable("Can't print this machine operand at the moment");
+  }
 }
 
 void llvm::printMIR(raw_ostream &OS, const Module &M) {
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index d5fdf8e..5d3f7eb 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Support/Debug.h"
@@ -171,9 +172,8 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
   return I;
 }
 
-MachineBasicBlock::const_iterator
-MachineBasicBlock::getFirstTerminator() const {
-  const_iterator B = begin(), E = end(), I = E;
+MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
+  instr_iterator B = instr_begin(), E = instr_end(), I = E;
   while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
     ; /*noop */
   while (I != E && !I->isTerminator())
@@ -181,11 +181,10 @@ MachineBasicBlock::getFirstTerminator() const {
   return I;
 }
 
-MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
-  instr_iterator B = instr_begin(), E = instr_end(), I = E;
-  while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
-    ; /*noop */
-  while (I != E && !I->isTerminator())
+MachineBasicBlock::iterator MachineBasicBlock::getFirstNonDebugInstr() {
+  // Skip over begin-of-block dbg_value instructions.
+  iterator I = begin(), E = end();
+  while (I != E && I->isDebugValue())
     ++I;
   return I;
 }
@@ -204,21 +203,6 @@ MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
   return end();
 }
 
-MachineBasicBlock::const_iterator
-MachineBasicBlock::getLastNonDebugInstr() const {
-  // Skip over end-of-block dbg_value instructions.
-  const_instr_iterator B = instr_begin(), I = instr_end();
-  while (I != B) {
-    --I;
-    // Return instruction that starts a bundle.
-    if (I->isDebugValue() || I->isInsideBundle())
-      continue;
-    return I;
-  }
-  // The block is all debug values.
-  return end();
-}
-
 const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
   // A block with a landing pad successor only has one other successor.
   if (succ_size() > 2)
@@ -261,6 +245,20 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
        << " is null\n";
     return;
   }
+  const Function *F = MF->getFunction();
+  const Module *M = F ? F->getParent() : nullptr;
+  ModuleSlotTracker MST(M);
+  print(OS, MST, Indexes);
+}
+
+void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
+                              SlotIndexes *Indexes) const {
+  const MachineFunction *MF = getParent();
+  if (!MF) {
+    OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+       << " is null\n";
+    return;
+  }
 
   if (Indexes)
     OS << Indexes->getMBBStartIdx(this) << '\t';
@@ -270,7 +268,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
   const char *Comma = "";
   if (const BasicBlock *LBB = getBasicBlock()) {
     OS << Comma << "derived from LLVM BB ";
-    LBB->printAsOperand(OS, /*PrintType=*/false);
+    LBB->printAsOperand(OS, /*PrintType=*/false, MST);
     Comma = ", ";
   }
   if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
@@ -307,7 +305,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
     OS << '\t';
     if (I->isInsideBundle())
       OS << "  * ";
-    I->print(OS);
+    I->print(OS, MST);
   }
 
   // Print the successors of this block according to the CFG.
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 141990b..2969bad 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -179,7 +179,7 @@ public:
   /// in-loop predecessors of this chain.
   unsigned LoopPredecessors;
 };
-} // namespace
+}
 
 namespace {
 class MachineBlockPlacement : public MachineFunctionPass {
@@ -267,7 +267,7 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
-} // namespace
+}
 
 char MachineBlockPlacement::ID = 0;
 char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
@@ -1185,7 +1185,7 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
-} // namespace
+}
 
 char MachineBlockPlacementStats::ID = 0;
 char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index 5019e8e..f33d0e6 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -67,10 +67,11 @@ private:
   unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
                       MachineTraceMetrics::Trace BlockTrace);
   bool
-  preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
+  improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
                            MachineTraceMetrics::Trace BlockTrace,
                            SmallVectorImpl<MachineInstr *> &InsInstrs,
-                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg);
+                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+                           bool NewCodeHasLessInsts);
   bool preservesResourceLen(MachineBasicBlock *MBB,
                             MachineTraceMetrics::Trace BlockTrace,
                             SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -78,7 +79,7 @@ private:
   void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
                      SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
 };
-} // namespace
+}
 
 char MachineCombiner::ID = 0;
 char &llvm::MachineCombinerID = MachineCombiner::ID;
@@ -208,19 +209,24 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
   return NewRootLatency;
 }
 
-/// True when the new instruction sequence does not
-/// lengthen the critical path. The DAGCombine code sequence ends in MI
-/// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A
-/// necessary condition for the new sequence to replace the old sequence is that
-/// it cannot lengthen the critical path. This is decided by the formula
+/// True when the new instruction sequence does not lengthen the critical path
+/// and the new sequence has less instructions or the new sequence improves the
+/// critical path.
+/// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
+/// The new code sequence ends in MI NewRoot. A necessary condition for the new
+/// sequence to replace the old sequence is that it cannot lengthen the critical
+/// path. This is decided by the formula:
 /// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
-/// The slack is the number of cycles Root can be delayed before the critical
-/// patch becomes longer.
-bool MachineCombiner::preservesCriticalPathLen(
+/// If the new sequence has an equal length critical path but does not reduce
+/// the number of instructions (NewCodeHasLessInsts is false), then it is not
+/// considered an improvement. The slack is the number of cycles Root can be
+/// delayed before the critical patch becomes longer.
+bool MachineCombiner::improvesCriticalPathLen(
     MachineBasicBlock *MBB, MachineInstr *Root,
     MachineTraceMetrics::Trace BlockTrace,
     SmallVectorImpl<MachineInstr *> &InsInstrs,
-    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
+    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+    bool NewCodeHasLessInsts) {
 
   assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
   // NewRoot is the last instruction in the \p InsInstrs vector.
@@ -245,9 +251,13 @@ bool MachineCombiner::preservesCriticalPathLen(
         dbgs() << " RootDepth + RootLatency + RootSlack "
                << RootDepth + RootLatency + RootSlack << "\n";);
 
-  /// True when the new sequence does not lengthen the critical path.
-  return ((NewRootDepth + NewRootLatency) <=
-          (RootDepth + RootLatency + RootSlack));
+  unsigned NewCycleCount = NewRootDepth + NewRootLatency;
+  unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
+  
+  if (NewCodeHasLessInsts)
+    return NewCycleCount <= OldCycleCount;
+  else
+    return NewCycleCount < OldCycleCount;
 }
 
 /// helper routine to convert instructions into SC
@@ -359,18 +369,21 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
         Traces->verifyAnalysis();
         TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
                                         InstrIdxForVirtReg);
+        unsigned NewInstCount = InsInstrs.size();
+        unsigned OldInstCount = DelInstrs.size();
         // Found pattern, but did not generate alternative sequence.
         // This can happen e.g. when an immediate could not be materialized
         // in a single instruction.
-        if (!InsInstrs.size())
+        if (!NewInstCount)
           continue;
         // Substitute when we optimize for codesize and the new sequence has
         // fewer instructions OR
         // the new sequence neither lengthens the critical path nor increases
         // resource pressure.
-        if (doSubstitute(InsInstrs.size(), DelInstrs.size()) ||
-            (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
-                                      InstrIdxForVirtReg) &&
+        if (doSubstitute(NewInstCount, OldInstCount) ||
+            (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
+                                      InstrIdxForVirtReg,
+                                      NewInstCount < OldInstCount) &&
              preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
           for (auto *InstrPtr : InsInstrs)
             MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index ec171b0..a686341 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -55,7 +55,7 @@ namespace {
                                  DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
     bool CopyPropagateBlock(MachineBasicBlock &MBB);
   };
-} // namespace
+}
 char MachineCopyPropagation::ID = 0;
 char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
 
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 67b9d77..800d1b5 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -29,6 +29,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Support/Debug.h"
@@ -361,9 +362,11 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
     OS << '\n';
   }
 
+  ModuleSlotTracker MST(getFunction()->getParent());
+  MST.incorporateFunction(*getFunction());
   for (const auto &BB : *this) {
     OS << '\n';
-    BB.print(OS, Indexes);
+    BB.print(OS, MST, Indexes);
   }
 
   OS << "\n# End machine code for function " << getName() << ".\n\n";
@@ -404,7 +407,7 @@ namespace llvm {
       return OutStr;
     }
   };
-} // namespace llvm
+}
 
 void MachineFunction::viewCFG() const
 {
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 57b7230..790f5ac 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -49,7 +49,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
 };
 
 char MachineFunctionPrinterPass::ID = 0;
-} // namespace
+}
 
 char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
 INITIALIZE_PASS(MachineFunctionPrinterPass, "machineinstr-printer",
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 19ba5cf..fdc4226 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -28,6 +28,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/MC/MCInstrDesc.h"
@@ -296,10 +297,14 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
   llvm_unreachable("Invalid machine operand type");
 }
 
-/// print - Print the specified machine operand.
-///
 void MachineOperand::print(raw_ostream &OS,
                            const TargetRegisterInfo *TRI) const {
+  ModuleSlotTracker DummyMST(nullptr);
+  print(OS, DummyMST, TRI);
+}
+
+void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
+                           const TargetRegisterInfo *TRI) const {
   switch (getType()) {
   case MachineOperand::MO_Register:
     OS << PrintReg(getReg(), TRI, getSubReg());
@@ -387,7 +392,7 @@ void MachineOperand::print(raw_ostream &OS,
     break;
   case MachineOperand::MO_GlobalAddress:
     OS << "<ga:";
-    getGlobal()->printAsOperand(OS, /*PrintType=*/false);
+    getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
     if (getOffset()) OS << "+" << getOffset();
     OS << '>';
     break;
@@ -398,7 +403,7 @@ void MachineOperand::print(raw_ostream &OS,
     break;
   case MachineOperand::MO_BlockAddress:
     OS << '<';
-    getBlockAddress()->printAsOperand(OS, /*PrintType=*/false);
+    getBlockAddress()->printAsOperand(OS, /*PrintType=*/false, MST);
     if (getOffset()) OS << "+" << getOffset();
     OS << '>';
     break;
@@ -410,7 +415,7 @@ void MachineOperand::print(raw_ostream &OS,
     break;
   case MachineOperand::MO_Metadata:
     OS << '<';
-    getMetadata()->printAsOperand(OS);
+    getMetadata()->printAsOperand(OS, MST);
     OS << '>';
     break;
   case MachineOperand::MO_MCSymbol:
@@ -505,63 +510,66 @@ uint64_t MachineMemOperand::getAlignment() const {
   return MinAlign(getBaseAlignment(), getOffset());
 }
 
-raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
-  assert((MMO.isLoad() || MMO.isStore()) &&
+void MachineMemOperand::print(raw_ostream &OS) const {
+  ModuleSlotTracker DummyMST(nullptr);
+  print(OS, DummyMST);
+}
+void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
+  assert((isLoad() || isStore()) &&
          "SV has to be a load, store or both.");
 
-  if (MMO.isVolatile())
+  if (isVolatile())
     OS << "Volatile ";
 
-  if (MMO.isLoad())
+  if (isLoad())
     OS << "LD";
-  if (MMO.isStore())
+  if (isStore())
     OS << "ST";
-  OS << MMO.getSize();
+  OS << getSize();
 
   // Print the address information.
   OS << "[";
-  if (const Value *V = MMO.getValue())
-    V->printAsOperand(OS, /*PrintType=*/false);
-  else if (const PseudoSourceValue *PSV = MMO.getPseudoValue())
+  if (const Value *V = getValue())
+    V->printAsOperand(OS, /*PrintType=*/false, MST);
+  else if (const PseudoSourceValue *PSV = getPseudoValue())
     PSV->printCustom(OS);
   else
     OS << "<unknown>";
 
-  unsigned AS = MMO.getAddrSpace();
+  unsigned AS = getAddrSpace();
   if (AS != 0)
     OS << "(addrspace=" << AS << ')';
 
   // If the alignment of the memory reference itself differs from the alignment
   // of the base pointer, print the base alignment explicitly, next to the base
   // pointer.
-  if (MMO.getBaseAlignment() != MMO.getAlignment())
-    OS << "(align=" << MMO.getBaseAlignment() << ")";
+  if (getBaseAlignment() != getAlignment())
+    OS << "(align=" << getBaseAlignment() << ")";
 
-  if (MMO.getOffset() != 0)
-    OS << "+" << MMO.getOffset();
+  if (getOffset() != 0)
+    OS << "+" << getOffset();
   OS << "]";
 
   // Print the alignment of the reference.
-  if (MMO.getBaseAlignment() != MMO.getAlignment() ||
-      MMO.getBaseAlignment() != MMO.getSize())
-    OS << "(align=" << MMO.getAlignment() << ")";
+  if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize())
+    OS << "(align=" << getAlignment() << ")";
 
   // Print TBAA info.
-  if (const MDNode *TBAAInfo = MMO.getAAInfo().TBAA) {
+  if (const MDNode *TBAAInfo = getAAInfo().TBAA) {
     OS << "(tbaa=";
     if (TBAAInfo->getNumOperands() > 0)
-      TBAAInfo->getOperand(0)->printAsOperand(OS);
+      TBAAInfo->getOperand(0)->printAsOperand(OS, MST);
     else
       OS << "<unknown>";
     OS << ")";
   }
 
   // Print AA scope info.
-  if (const MDNode *ScopeInfo = MMO.getAAInfo().Scope) {
+  if (const MDNode *ScopeInfo = getAAInfo().Scope) {
     OS << "(alias.scope=";
     if (ScopeInfo->getNumOperands() > 0)
       for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) {
-        ScopeInfo->getOperand(i)->printAsOperand(OS);
+        ScopeInfo->getOperand(i)->printAsOperand(OS, MST);
         if (i != ie-1)
           OS << ",";
       }
@@ -571,11 +579,11 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
   }
 
   // Print AA noalias scope info.
-  if (const MDNode *NoAliasInfo = MMO.getAAInfo().NoAlias) {
+  if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) {
     OS << "(noalias=";
     if (NoAliasInfo->getNumOperands() > 0)
       for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) {
-        NoAliasInfo->getOperand(i)->printAsOperand(OS);
+        NoAliasInfo->getOperand(i)->printAsOperand(OS, MST);
         if (i != ie-1)
           OS << ",";
       }
@@ -585,10 +593,11 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
   }
 
   // Print nontemporal info.
-  if (MMO.isNonTemporal())
+  if (isNonTemporal())
     OS << "(nontemporal)";
 
-  return OS;
+  if (isInvariant())
+    OS << "(invariant)";
 }
 
 //===----------------------------------------------------------------------===//
@@ -1523,6 +1532,17 @@ void MachineInstr::dump() const {
 }
 
 void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
+  const Module *M = nullptr;
+  if (const MachineBasicBlock *MBB = getParent())
+    if (const MachineFunction *MF = MBB->getParent())
+      M = MF->getFunction()->getParent();
+
+  ModuleSlotTracker MST(M);
+  print(OS, MST, SkipOpers);
+}
+
+void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
+                         bool SkipOpers) const {
   // We can be a bit tidier if we know the MachineFunction.
   const MachineFunction *MF = nullptr;
   const TargetRegisterInfo *TRI = nullptr;
@@ -1547,7 +1567,7 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
          !getOperand(StartOp).isImplicit();
        ++StartOp) {
     if (StartOp != 0) OS << ", ";
-    getOperand(StartOp).print(OS, TRI);
+    getOperand(StartOp).print(OS, MST, TRI);
     unsigned Reg = getOperand(StartOp).getReg();
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       VirtRegs.push_back(Reg);
@@ -1574,7 +1594,7 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
   if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
     // Print asm string.
     OS << " ";
-    getOperand(InlineAsm::MIOp_AsmString).print(OS, TRI);
+    getOperand(InlineAsm::MIOp_AsmString).print(OS, MST, TRI);
 
     // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
     unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -1642,7 +1662,7 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
       if (DIV && !DIV->getName().empty())
         OS << "!\"" << DIV->getName() << '\"';
       else
-        MO.print(OS, TRI);
+        MO.print(OS, MST, TRI);
     } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
       OS << TRI->getSubRegIndexName(MO.getImm());
     } else if (i == AsmDescOp && MO.isImm()) {
@@ -1676,7 +1696,7 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
       // Compute the index of the next operand descriptor.
       AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
     } else
-      MO.print(OS, TRI);
+      MO.print(OS, MST, TRI);
   }
 
   // Briefly indicate whether any call clobbers were omitted.
@@ -1701,7 +1721,7 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
     OS << " mem:";
     for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
          i != e; ++i) {
-      OS << **i;
+      (*i)->print(OS, MST);
       if (std::next(i) != e)
         OS << " ";
     }
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index a303426..42d0603 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -54,9 +54,8 @@ public:
 class MMIAddrLabelMap {
   MCContext &Context;
   struct AddrLabelSymEntry {
-    /// Symbols - The symbols for the label.  This is a pointer union that is
-    /// either one symbol (the common case) or a list of symbols.
-    PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols;
+    /// Symbols - The symbols for the label.
+    TinyPtrVector<MCSymbol *> Symbols;
 
     Function *Fn;   // The containing function of the BasicBlock.
     unsigned Index; // The index in BBCallbacks for the BasicBlock.
@@ -80,16 +79,9 @@ public:
   ~MMIAddrLabelMap() {
     assert(DeletedAddrLabelsNeedingEmission.empty() &&
            "Some labels for deleted blocks never got emitted");
-
-    // Deallocate any of the 'list of symbols' case.
-    for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
-         I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
-      if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
-        delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
   }
 
-  MCSymbol *getAddrLabelSymbol(BasicBlock *BB);
-  std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB);
+  ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB);
 
   void takeDeletedSymbolsForFunction(Function *F,
                                      std::vector<MCSymbol*> &Result);
@@ -97,51 +89,29 @@ public:
   void UpdateForDeletedBlock(BasicBlock *BB);
   void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
 };
-} // namespace llvm
+}
 
-MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
+ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
   assert(BB->hasAddressTaken() &&
          "Shouldn't get label for block without address taken");
   AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
 
   // If we already had an entry for this block, just return it.
-  if (!Entry.Symbols.isNull()) {
+  if (!Entry.Symbols.empty()) {
     assert(BB->getParent() == Entry.Fn && "Parent changed");
-    if (Entry.Symbols.is<MCSymbol*>())
-      return Entry.Symbols.get<MCSymbol*>();
-    return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0];
+    return Entry.Symbols;
   }
 
   // Otherwise, this is a new entry, create a new symbol for it and add an
   // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
   BBCallbacks.emplace_back(BB);
   BBCallbacks.back().setMap(this);
-  Entry.Index = BBCallbacks.size()-1;
+  Entry.Index = BBCallbacks.size() - 1;
   Entry.Fn = BB->getParent();
-  MCSymbol *Result = Context.createTempSymbol();
-  Entry.Symbols = Result;
-  return Result;
+  Entry.Symbols.push_back(Context.createTempSymbol());
+  return Entry.Symbols;
 }
 
-std::vector<MCSymbol*>
-MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
-  assert(BB->hasAddressTaken() &&
-         "Shouldn't get label for block without address taken");
-  AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
-
-  std::vector<MCSymbol*> Result;
-
-  // If we already had an entry for this block, just return it.
-  if (Entry.Symbols.isNull())
-    Result.push_back(getAddrLabelSymbol(BB));
-  else if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>())
-    Result.push_back(Sym);
-  else
-    Result = *Entry.Symbols.get<std::vector<MCSymbol*>*>();
-  return Result;
-}
-
-
 /// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return
 /// them.
 void MMIAddrLabelMap::
@@ -162,16 +132,15 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
   // If the block got deleted, there is no need for the symbol.  If the symbol
   // was already emitted, we can just forget about it, otherwise we need to
   // queue it up for later emission when the function is output.
-  AddrLabelSymEntry Entry = AddrLabelSymbols[BB];
+  AddrLabelSymEntry Entry = std::move(AddrLabelSymbols[BB]);
   AddrLabelSymbols.erase(BB);
-  assert(!Entry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+  assert(!Entry.Symbols.empty() && "Didn't have a symbol, why a callback?");
   BBCallbacks[Entry.Index] = nullptr;  // Clear the callback.
 
   assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) &&
          "Block/parent mismatch");
 
-  // Handle both the single and the multiple symbols cases.
-  if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) {
+  for (MCSymbol *Sym : Entry.Symbols) {
     if (Sym->isDefined())
       return;
 
@@ -180,64 +149,29 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
     // for the containing Function.  Since the block is being deleted, its
     // parent may already be removed, we have to get the function from 'Entry'.
     DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
-  } else {
-    std::vector<MCSymbol*> *Syms = Entry.Symbols.get<std::vector<MCSymbol*>*>();
-
-    for (unsigned i = 0, e = Syms->size(); i != e; ++i) {
-      MCSymbol *Sym = (*Syms)[i];
-      if (Sym->isDefined()) continue;  // Ignore already emitted labels.
-
-      // If the block is not yet defined, we need to emit it at the end of the
-      // function.  Add the symbol to the DeletedAddrLabelsNeedingEmission list
-      // for the containing Function.  Since the block is being deleted, its
-      // parent may already be removed, we have to get the function from
-      // 'Entry'.
-      DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
-    }
-
-    // The entry is deleted, free the memory associated with the symbol list.
-    delete Syms;
   }
 }
 
 void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
   // Get the entry for the RAUW'd block and remove it from our map.
-  AddrLabelSymEntry OldEntry = AddrLabelSymbols[Old];
+  AddrLabelSymEntry OldEntry = std::move(AddrLabelSymbols[Old]);
   AddrLabelSymbols.erase(Old);
-  assert(!OldEntry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+  assert(!OldEntry.Symbols.empty() && "Didn't have a symbol, why a callback?");
 
   AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
 
   // If New is not address taken, just move our symbol over to it.
-  if (NewEntry.Symbols.isNull()) {
+  if (NewEntry.Symbols.empty()) {
     BBCallbacks[OldEntry.Index].setPtr(New);    // Update the callback.
-    NewEntry = OldEntry;     // Set New's entry.
+    NewEntry = std::move(OldEntry);             // Set New's entry.
     return;
   }
 
   BBCallbacks[OldEntry.Index] = nullptr;    // Update the callback.
 
-  // Otherwise, we need to add the old symbol to the new block's set.  If it is
-  // just a single entry, upgrade it to a symbol list.
-  if (MCSymbol *PrevSym = NewEntry.Symbols.dyn_cast<MCSymbol*>()) {
-    std::vector<MCSymbol*> *SymList = new std::vector<MCSymbol*>();
-    SymList->push_back(PrevSym);
-    NewEntry.Symbols = SymList;
-  }
-
-  std::vector<MCSymbol*> *SymList =
-    NewEntry.Symbols.get<std::vector<MCSymbol*>*>();
-
-  // If the old entry was a single symbol, add it.
-  if (MCSymbol *Sym = OldEntry.Symbols.dyn_cast<MCSymbol*>()) {
-    SymList->push_back(Sym);
-    return;
-  }
-
-  // Otherwise, concatenate the list.
-  std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>();
-  SymList->insert(SymList->end(), Syms->begin(), Syms->end());
-  delete Syms;
+  // Otherwise, we need to add the old symbols to the new block's set.
+  NewEntry.Symbols.insert(NewEntry.Symbols.end(), OldEntry.Symbols.begin(),
+                          OldEntry.Symbols.end());
 }
 
 
@@ -273,8 +207,8 @@ bool MachineModuleInfo::doInitialization(Module &M) {
 
   ObjFileMMI = nullptr;
   CurCallSite = 0;
-  CallsEHReturn = 0;
-  CallsUnwindInit = 0;
+  CallsEHReturn = false;
+  CallsUnwindInit = false;
   DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
   // Always emit some info, by default "no personality" info.
   Personalities.push_back(nullptr);
@@ -313,29 +247,18 @@ void MachineModuleInfo::EndFunction() {
   TypeInfos.clear();
   FilterIds.clear();
   FilterEnds.clear();
-  CallsEHReturn = 0;
-  CallsUnwindInit = 0;
+  CallsEHReturn = false;
+  CallsUnwindInit = false;
   VariableDbgInfos.clear();
 }
 
 //===- Address of Block Management ----------------------------------------===//
 
-
-/// getAddrLabelSymbol - Return the symbol to be used for the specified basic
-/// block when its address is taken.  This cannot be its normal LBB label
-/// because the block may be accessed outside its containing function.
-MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) {
-  // Lazily create AddrLabelSymbols.
-  if (!AddrLabelSymbols)
-    AddrLabelSymbols = new MMIAddrLabelMap(Context);
-  return AddrLabelSymbols->getAddrLabelSymbol(const_cast<BasicBlock*>(BB));
-}
-
 /// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
 /// basic block when its address is taken.  If other blocks were RAUW'd to
 /// this one, we may have to emit them as well, return the whole set.
-std::vector<MCSymbol*> MachineModuleInfo::
-getAddrLabelSymbolToEmit(const BasicBlock *BB) {
+ArrayRef<MCSymbol *>
+MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
   // Lazily create AddrLabelSymbols.
   if (!AddrLabelSymbols)
     AddrLabelSymbols = new MMIAddrLabelMap(Context);
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index fd1bf31..71a6eba 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -340,7 +340,7 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
 /// for the specified BB and if so, return it.  If not, construct SSA form by
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index dd7654b..a48e54c 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1262,7 +1262,7 @@ public:
 protected:
   void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
 };
-} // namespace
+} // anonymous
 
 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
                                                   ScheduleDAGMI *DAG) {
@@ -1355,7 +1355,7 @@ public:
 
   void apply(ScheduleDAGMI *DAG) override;
 };
-} // namespace
+} // anonymous
 
 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
 /// fused operations.
@@ -1407,7 +1407,7 @@ public:
 protected:
   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
 };
-} // namespace
+} // anonymous
 
 /// constrainLocalCopy handles two possibilities:
 /// 1) Local src:
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 7704d14..f9adba0 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -306,7 +306,7 @@ public:
   MinInstrCountEnsemble(MachineTraceMetrics *mtm)
     : MachineTraceMetrics::Ensemble(mtm) {}
 };
-} // namespace
+}
 
 // Select the preferred predecessor for MBB.
 const MachineBasicBlock*
@@ -414,7 +414,7 @@ struct LoopBounds {
              const MachineLoopInfo *loops)
     : Blocks(blocks), Loops(loops), Downward(false) {}
 };
-} // namespace
+}
 
 // Specialize po_iterator_storage in order to prune the post-order traversal so
 // it is limited to the current loop and doesn't traverse the loop back edges.
@@ -447,7 +447,7 @@ public:
     return LB.Visited.insert(To).second;
   }
 };
-} // namespace llvm
+}
 
 /// Compute the trace through MBB.
 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
@@ -619,7 +619,7 @@ struct DataDep {
     assert((++DefI).atEnd() && "Register has multiple defs");
   }
 };
-} // namespace
+}
 
 // Get the input data dependencies that must be ready before UseMI can issue.
 // Return true if UseMI has any physreg operands.
@@ -681,7 +681,7 @@ struct LiveRegUnit {
 
   LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {}
 };
-} // namespace
+}
 
 // Identify physreg dependencies for UseMI, and update the live regunit
 // tracking set when scanning instructions downwards.
@@ -829,8 +829,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
 
       // Filter and process dependencies, computing the earliest issue cycle.
       unsigned Cycle = 0;
-      for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
-        const DataDep &Dep = Deps[i];
+      for (const DataDep &Dep : Deps) {
         const TraceBlockInfo&DepTBI =
           BlockInfo[Dep.DefMI->getParent()->getNumber()];
         // Ignore dependencies from outside the current trace.
@@ -1088,9 +1087,9 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
                                       MTM.SchedModel, MTM.TII, MTM.TRI);
 
       // Update the required height of any virtual registers read by MI.
-      for (unsigned i = 0, e = Deps.size(); i != e; ++i)
-        if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
-          addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack);
+      for (const DataDep &Dep : Deps)
+        if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
+          addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
 
       InstrCycles &MICycles = Cycles[MI];
       MICycles.Height = Cycle;
@@ -1106,8 +1105,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
     // Update virtual live-in heights. They were added by addLiveIns() with a 0
     // height because the final height isn't known until now.
     DEBUG(dbgs() << "BB#" << MBB->getNumber() <<  " Live-ins:");
-    for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
-      LiveInReg &LIR = TBI.LiveIns[i];
+    for (LiveInReg &LIR : TBI.LiveIns) {
       const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
       LIR.Height = Heights.lookup(DefMI);
       DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 72a6769..ca35ec5 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -258,7 +258,7 @@ namespace {
     }
   };
 
-} // namespace
+}
 
 char MachineVerifierPass::ID = 0;
 INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
@@ -1710,7 +1710,7 @@ namespace {
     bool EntryIsSetup;
     bool ExitIsSetup;
   };
-} // namespace
+}
 
 /// Make sure on every path through the CFG, a FrameSetup <n> is always followed
 /// by a FrameDestroy <n>, stack adjustments are identical on all
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 9780d75..a1042e7 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -55,7 +55,7 @@ namespace {
     bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
     bool OptimizeBB(MachineBasicBlock &MBB);
   };
-} // namespace
+}
 
 char OptimizePHIs::ID = 0;
 char &llvm::OptimizePHIsID = OptimizePHIs::ID;
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 471c78a..d343301 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -104,7 +104,7 @@ namespace {
                      MachineInstrExpressionTrait> LoweredPHIMap;
     LoweredPHIMap LoweredPHIs;
   };
-} // namespace
+}
 
 STATISTIC(NumLowered, "Number of phis lowered");
 STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 71c0a64..ebe05e3 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -293,7 +293,7 @@ namespace {
     /// register of the last source.
     unsigned getReg() const { return Reg; }
   };
-} // namespace
+}
 
 char PeepholeOptimizer::ID = 0;
 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 6760b5f..6f76116 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -184,7 +184,7 @@ namespace {
     void dumpSchedule() const;
     void emitNoop(unsigned CurCycle);
   };
-} // namespace
+}
 
 char &llvm::PostRASchedulerID = PostRAScheduler::ID;
 
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 4a46638..fd3d4d7 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -194,7 +194,7 @@ namespace {
     bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
   };
   char RAFast::ID = 0;
-} // namespace
+}
 
 /// getStackSpaceFor - This allocates space for the specified virtual register
 /// to be held on the stack.
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index e2061fe..7afea2a 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -1449,6 +1449,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
                    << format("%04X", S.LaneMask) << ")\n");
       LIS->shrinkToUses(S, LI.reg);
     }
+    LI.removeEmptySubRanges();
   }
   if (ShrinkMainRange) {
     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
@@ -2697,7 +2698,7 @@ struct MBBPriorityInfo {
   MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
     : MBB(mbb), Depth(depth), IsSplit(issplit) {}
 };
-} // namespace
+}
 
 /// C-style comparator that sorts first based on the loop depth of the basic
 /// block (the unsigned), and then on the MBB number.
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 4ba7441..04067a1 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -111,6 +111,6 @@ namespace llvm {
     /// Return the register class of the coalesced register.
     const TargetRegisterClass *getNewRC() const { return NewRC; }
   };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index ae4b935..390b6d2 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -574,13 +574,13 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
   int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
   int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
 
-  AliasAnalysis::AliasResult AAResult =
+  AliasResult AAResult =
       AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
                                UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
                 MemoryLocation(MMOb->getValue(), Overlapb,
                                UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
 
-  return (AAResult != AliasAnalysis::NoAlias);
+  return (AAResult != NoAlias);
 }
 
 /// This recursive function iterates over chain deps of SUb looking for
@@ -1508,7 +1508,7 @@ public:
     return getCurr()->Preds.end();
   }
 };
-} // namespace
+} // anonymous
 
 static bool hasDataSucc(const SUnit *SU) {
   for (SUnit::const_succ_iterator
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index cdf27ae..b2e4617 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -72,7 +72,7 @@ namespace llvm {
       return G->addCustomGraphFeatures(GW);
     }
   };
-} // namespace llvm
+}
 
 std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
                                                        const ScheduleDAG *G) {
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5fea52c..6056d93 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -459,7 +459,7 @@ namespace {
       return TLI.getSetCCResultType(*DAG.getContext(), VT);
     }
   };
-} // namespace
+}
 
 
 namespace {
@@ -475,7 +475,7 @@ public:
     DC.removeFromWorklist(N);
   }
 };
-} // namespace
+}
 
 //===----------------------------------------------------------------------===//
 //  TargetLowering::DAGCombinerInfo implementation
@@ -1192,8 +1192,8 @@ bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
       continue;
 
     if (N->use_empty()) {
-      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-        Nodes.insert(N->getOperand(i).getNode());
+      for (const SDValue &ChildN : N->op_values())
+        Nodes.insert(ChildN.getNode());
 
       removeFromWorklist(N);
       DAG.DeleteNode(N);
@@ -1266,9 +1266,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
     // worklist as well. Because the worklist uniques things already, this
     // won't repeatedly process the same operand.
     CombinedNodes.insert(N);
-    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-      if (!CombinedNodes.count(N->getOperand(i).getNode()))
-        AddToWorklist(N->getOperand(i).getNode());
+    for (const SDValue &ChildN : N->op_values())
+      if (!CombinedNodes.count(ChildN.getNode()))
+        AddToWorklist(ChildN.getNode());
 
     SDValue RV = combine(N);
 
@@ -1523,8 +1523,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
     SDNode *TF = TFs[i];
 
     // Check each of the operands.
-    for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
-      SDValue Op = TF->getOperand(i);
+    for (const SDValue &Op : TF->op_values()) {
 
       switch (Op.getOpcode()) {
       case ISD::EntryToken:
@@ -2179,7 +2178,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   }
 
   // fold (sdiv X, pow2) -> simple ops after legalize
+  // FIXME: We check for the exact bit here because the generic lowering gives
+  // better results in that case. The target-specific lowering should learn how
+  // to handle exact sdivs efficiently.
   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+      !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
       (N1C->getAPIntValue().isPowerOf2() ||
        (-N1C->getAPIntValue()).isPowerOf2())) {
     // If dividing by powers of two is cheap, then don't perform the following
@@ -4275,7 +4278,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   if (isNullConstant(N0))
     return N0;
   // fold (shl x, c >= size(x)) -> undef
-  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
     return DAG.getUNDEF(VT);
   // fold (shl x, 0) -> x
   if (N1C && N1C->isNullValue())
@@ -4362,6 +4365,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     }
   }
 
+  // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
+  // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
+  if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
+      cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
+    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+      uint64_t C1 = N0C1->getZExtValue();
+      uint64_t C2 = N1C->getZExtValue();
+      SDLoc DL(N);
+      if (C1 <= C2)
+        return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+                           DAG.getConstant(C2 - C1, DL, N1.getValueType()));
+      return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
+                         DAG.getConstant(C1 - C2, DL, N1.getValueType()));
+    }
+  }
+
   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
   //                               (and (srl x, (sub c1, c2), MASK)
   // Only fold this if the inner shift has no other uses -- if it does, folding
@@ -5560,12 +5579,12 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
                        SDLoc(N));
 }
 
-// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext
-// dag node into a ConstantSDNode or a build_vector of constants.
-// This function is called by the DAGCombiner when visiting sext/zext/aext
-// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
-// Vector extends are not folded if operations are legal; this is to
-// avoid introducing illegal build_vector dag nodes.
+/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or 
+/// a build_vector of constants.
+/// This function is called by the DAGCombiner when visiting sext/zext/aext
+/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
+/// Vector extends are not folded if operations are legal; this is to
+/// avoid introducing illegal build_vector dag nodes.
 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
                                          SelectionDAG &DAG, bool LegalTypes,
                                          bool LegalOperations) {
@@ -5595,7 +5614,6 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   // We can fold this node into a build_vector.
   unsigned VTBits = SVT.getSizeInBits();
   unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
-  unsigned ShAmt = VTBits - EVTBits;
   SmallVector<SDValue, 8> Elts;
   unsigned NumElts = VT.getVectorNumElements();
   SDLoc DL(N);
@@ -5608,14 +5626,13 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
     }
 
     SDLoc DL(Op);
-    ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
-    const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
+    // Get the constant value and if needed trunc it to the size of the type.
+    // Nodes like build_vector might have constants wider than the scalar type.
+    APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
-      Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
-                                     DL, SVT));
+      Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
     else
-      Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(),
-                                     DL, SVT));
+      Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
   }
 
   return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
@@ -7307,8 +7324,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
                                      DstEltVT, BV->getOperand(0)));
 
     SmallVector<SDValue, 8> Ops;
-    for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
-      SDValue Op = BV->getOperand(i);
+    for (SDValue Op : BV->op_values()) {
       // If the vector element type is not legal, the BUILD_VECTOR operands
       // are promoted and implicitly truncated.  Make that explicit here.
       if (Op.getValueType() != SrcEltVT)
@@ -7383,13 +7399,13 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
                             NumOutputsPerInput*BV->getNumOperands());
   SmallVector<SDValue, 8> Ops;
 
-  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
-    if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+  for (const SDValue &Op : BV->op_values()) {
+    if (Op.getOpcode() == ISD::UNDEF) {
       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
       continue;
     }
 
-    APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+    APInt OpVal = cast<ConstantSDNode>(Op)->
                   getAPIntValue().zextOrTrunc(SrcBitSize);
 
     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
@@ -9954,7 +9970,7 @@ struct LoadedSlice {
     return true;
   }
 };
-} // namespace
+}
 
 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
 /// \p UsedBits looks like 0..0 1..1 0..0.
@@ -10218,8 +10234,8 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
     return Result; // Fail.
   else {
     bool isOk = false;
-    for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
-      if (Chain->getOperand(i).getNode() == LD) {
+    for (const SDValue &ChainOp : Chain->op_values())
+      if (ChainOp.getNode() == LD) {
         isOk = true;
         break;
       }
@@ -13884,12 +13900,12 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
         Op0->getSrcValueOffset() - MinOffset;
     int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
         Op1->getSrcValueOffset() - MinOffset;
-    AliasAnalysis::AliasResult AAResult =
+    AliasResult AAResult =
         AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
                                 UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
                  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
                                 UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
-    if (AAResult == AliasAnalysis::NoAlias)
+    if (AAResult == NoAlias)
       return false;
   }
 
@@ -13915,8 +13931,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
   // aliases list.  If not, then continue up the chain looking for the next
   // candidate.
   while (!Chains.empty()) {
-    SDValue Chain = Chains.back();
-    Chains.pop_back();
+    SDValue Chain = Chains.pop_back_val();
 
     // For TokenFactor nodes, look at each operand and only continue up the
     // chain until we find two aliases.  If we've seen two aliases, assume we'll
@@ -14023,7 +14038,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
          UIE = M->use_end(); UI != UIE; ++UI)
       if (UI.getUse().getValueType() == MVT::Other &&
           Visited.insert(*UI).second) {
-        if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) {
+        if (isa<MemSDNode>(*UI)) {
           // We've not visited this use, and we care about it (it could have an
           // ordering dependency with the original node).
           Aliases.clear();
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 0351c33..5452b17 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -59,6 +59,7 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -701,6 +702,15 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
   return lowerCallTo(CLI);
 }
 
+FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee(
+    const DataLayout &DL, MCContext &Ctx, CallingConv::ID CC, Type *ResultTy,
+    const char *Target, ArgListTy &&ArgsList, unsigned FixedArgs) {
+  SmallString<32> MangledName;
+  Mangler::getNameWithPrefix(MangledName, Target, DL);
+  MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName);
+  return setCallee(CC, ResultTy, Sym, std::move(ArgsList), FixedArgs);
+}
+
 bool FastISel::selectPatchpoint(const CallInst *I) {
   // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
   //                                                 i32 <numBytes>,
@@ -856,6 +866,15 @@ static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
 
 bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName,
                            unsigned NumArgs) {
+  MCContext &Ctx = MF->getContext();
+  SmallString<32> MangledName;
+  Mangler::getNameWithPrefix(MangledName, SymName, DL);
+  MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName);
+  return lowerCallTo(CI, Sym, NumArgs);
+}
+
+bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol,
+                           unsigned NumArgs) {
   ImmutableCallSite CS(CI);
 
   PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
@@ -880,7 +899,7 @@ bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName,
   }
 
   CallLoweringInfo CLI;
-  CLI.setCallee(RetTy, FTy, SymName, std::move(Args), CS, NumArgs);
+  CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), CS, NumArgs);
 
   return lowerCallTo(CLI);
 }
@@ -1331,7 +1350,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
 
     // Don't handle Intrinsic::trap if a trap funciton is specified.
     if (F && F->getIntrinsicID() == Intrinsic::trap &&
-        !TM.Options.getTrapFunctionName().empty())
+        Call->hasFnAttr("trap-func-name"))
       return false;
   }
 
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 7abc0c4..42595cb 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -422,6 +422,8 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
     MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags());
   } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
     MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags());
+  } else if (auto *SymNode = dyn_cast<MCSymbolSDNode>(Op)) {
+    MIB.addSym(SymNode->getMCSymbol());
   } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
     MIB.addBlockAddress(BA->getBlockAddress(),
                         BA->getOffset(),
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 2a61914..3b24d93 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -26,7 +26,7 @@ class MachineInstrBuilder;
 class MCInstrDesc;
 class SDDbgValue;
 
-class InstrEmitter {
+class LLVM_LIBRARY_VISIBILITY InstrEmitter {
   MachineFunction *MF;
   MachineRegisterInfo *MRI;
   const TargetInstrInfo *TII;
@@ -140,6 +140,6 @@ private:
                        DenseMap<SDValue, unsigned> &VRBaseMap);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 37f95e5..c0d7871 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -198,7 +198,7 @@ public:
     ReplacedNode(Old);
   }
 };
-} // namespace
+}
 
 /// Return a vector shuffle operation which
 /// performs the same shuffe in terms of order or result bytes, but on a type
@@ -1165,17 +1165,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
     return;
 
+#ifndef NDEBUG
   for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
     assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
              TargetLowering::TypeLegal &&
            "Unexpected illegal type!");
 
-  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+  for (const SDValue &Op : Node->op_values())
     assert((TLI.getTypeAction(*DAG.getContext(),
-                              Node->getOperand(i).getValueType()) ==
-              TargetLowering::TypeLegal ||
-            Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
-           "Unexpected illegal type!");
+                              Op.getValueType()) == TargetLowering::TypeLegal ||
+                              Op.getOpcode() == ISD::TargetConstant) &&
+                              "Unexpected illegal type!");
+#endif
 
   // Figure out the correct action; the way to query this varies by opcode
   TargetLowering::LegalizeAction Action = TargetLowering::Legal;
@@ -2047,10 +2048,11 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
                                             bool isSigned) {
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
-    EVT ArgVT = Node->getOperand(i).getValueType();
+  for (const SDValue &Op : Node->op_values()) {
+    EVT ArgVT = Op.getValueType();
     Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-    Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+    Entry.Node = Op;
+    Entry.Ty = ArgTy;
     Entry.isSExt = isSigned;
     Entry.isZExt = !isSigned;
     Args.push_back(Entry);
@@ -2256,10 +2258,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
 
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
-    EVT ArgVT = Node->getOperand(i).getValueType();
+  for (const SDValue &Op : Node->op_values()) {
+    EVT ArgVT = Op.getValueType();
     Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-    Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+    Entry.Node = Op;
+    Entry.Ty = ArgTy;
     Entry.isSExt = isSigned;
     Entry.isZExt = !isSigned;
     Args.push_back(Entry);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 96e2ff8..f41202c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2435,10 +2435,10 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
 
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    EVT ArgVT = N->getOperand(i).getValueType();
+  for (const SDValue &Op : N->op_values()) {
+    EVT ArgVT = Op.getValueType();
     Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-    Entry.Node = N->getOperand(i);
+    Entry.Node = Op;
     Entry.Ty = ArgTy;
     Entry.isSExt = true;
     Entry.isZExt = false;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index c3e3b7c..9c29769 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -676,7 +676,7 @@ namespace {
       NodesToAnalyze.insert(N);
     }
   };
-} // namespace
+}
 
 
 /// ReplaceValueWith - The specified value was legalized to the specified other
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 50ad239..ee844a8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -191,8 +191,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
 
   // Legalize the operands
   SmallVector<SDValue, 8> Ops;
-  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
-    Ops.push_back(LegalizeOp(Node->getOperand(i)));
+  for (const SDValue &Op : Node->op_values())
+    Ops.push_back(LegalizeOp(Op));
 
   SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0);
 
@@ -1010,7 +1010,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
 }
 
-} // namespace
+}
 
 bool SelectionDAG::LegalizeVectors() {
   return VectorLegalizer(*this).Run();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 445e882..905492c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1760,8 +1760,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
   // a new CONCAT_VECTORS node with elements that are half-wide.
   SmallVector<SDValue, 32> Elts;
   EVT EltVT = N->getValueType(0).getVectorElementType();
-  for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
-    SDValue Op = N->getOperand(op);
+  for (const SDValue &Op : N->op_values()) {
     for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
          i != e; ++i) {
       Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 9493532..c27f8de 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -119,6 +119,6 @@ public:
   bool isInvalidated() const { return Invalid; }
 };
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 61a3fd7..00cbae3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -227,8 +227,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     else if (VT == MVT::Other)
       TryUnfold = true;
   }
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    const SDValue &Op = N->getOperand(i);
+  for (const SDValue &Op : N->op_values()) {
     MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
     if (VT == MVT::Glue)
       return nullptr;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index fd0fa31..e9bd520 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -415,8 +415,8 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
     // to get to the CALLSEQ_BEGIN, but we need to find the path with the
     // most nesting in order to ensure that we find the corresponding match.
     if (N->getOpcode() == ISD::TokenFactor) {
-      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-        if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII))
+      for (const SDValue &Op : N->op_values())
+        if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
           return true;
       return false;
     }
@@ -433,9 +433,9 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
       }
     }
     // Otherwise, find the chain and continue climbing.
-    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-      if (N->getOperand(i).getValueType() == MVT::Other) {
-        N = N->getOperand(i).getNode();
+    for (const SDValue &Op : N->op_values())
+      if (Op.getValueType() == MVT::Other) {
+        N = Op.getNode();
         goto found_chain_operand;
       }
     return false;
@@ -464,10 +464,10 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
     if (N->getOpcode() == ISD::TokenFactor) {
       SDNode *Best = nullptr;
       unsigned BestMaxNest = MaxNest;
-      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+      for (const SDValue &Op : N->op_values()) {
         unsigned MyNestLevel = NestLevel;
         unsigned MyMaxNest = MaxNest;
-        if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
+        if (SDNode *New = FindCallSeqStart(Op.getNode(),
                                            MyNestLevel, MyMaxNest, TII))
           if (!Best || (MyMaxNest > BestMaxNest)) {
             Best = New;
@@ -493,9 +493,9 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
       }
     }
     // Otherwise, find the chain and continue climbing.
-    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-      if (N->getOperand(i).getValueType() == MVT::Other) {
-        N = N->getOperand(i).getNode();
+    for (const SDValue &Op : N->op_values())
+      if (Op.getValueType() == MVT::Other) {
+        N = Op.getNode();
         goto found_chain_operand;
       }
     return nullptr;
@@ -848,17 +848,26 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
       }
     }
 
-  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I) {
-    if (I->isAssignedRegDep()) {
-      if (!LiveRegDefs[I->getReg()])
+  for (auto &Succ : SU->Succs) {
+    if (Succ.isAssignedRegDep()) {
+      auto Reg = Succ.getReg();
+      if (!LiveRegDefs[Reg])
         ++NumLiveRegs;
       // This becomes the nearest def. Note that an earlier def may still be
       // pending if this is a two-address node.
-      LiveRegDefs[I->getReg()] = SU;
-      if (LiveRegGens[I->getReg()] == nullptr ||
-          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
-        LiveRegGens[I->getReg()] = I->getSUnit();
+      LiveRegDefs[Reg] = SU;
+
+      // Update LiveRegGen only if was empty before this unscheduling.
+      // This is to avoid incorrect updating LiveRegGen set in previous run.
+      if (!LiveRegGens[Reg]) {
+        // Find the successor with the lowest height.
+        LiveRegGens[Reg] = Succ.getSUnit();
+        for (auto &Succ2 : SU->Succs) {
+          if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
+              Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
+            LiveRegGens[Reg] = Succ2.getSUnit();
+        }
+      }
     }
   }
   if (SU->getHeight() < MinAvailableCycle)
@@ -951,8 +960,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     else if (VT == MVT::Other)
       TryUnfold = true;
   }
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    const SDValue &Op = N->getOperand(i);
+  for (const SDValue &Op : N->op_values()) {
     MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
     if (VT == MVT::Glue)
       return nullptr;
@@ -1247,10 +1255,9 @@ static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
 
 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
 static const uint32_t *getNodeRegMask(const SDNode *N) {
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    if (const RegisterMaskSDNode *Op =
-        dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
-      return Op->getRegMask();
+  for (const SDValue &Op : N->op_values())
+    if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
+      return RegOp->getRegMask();
   return nullptr;
 }
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index f4c7b59..b22d6ed 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -332,9 +332,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
     SDNode *NI = Worklist.pop_back_val();
 
     // Add all operands to the worklist unless they've already been added.
-    for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
-      if (Visited.insert(NI->getOperand(i).getNode()).second)
-        Worklist.push_back(NI->getOperand(i).getNode());
+    for (const SDValue &Op : NI->op_values())
+      if (Visited.insert(Op.getNode()).second)
+        Worklist.push_back(Op.getNode());
 
     if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
       continue;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 4c74182..159c28c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -64,6 +64,7 @@ namespace llvm {
       if (isa<TargetIndexSDNode>(Node))    return true;
       if (isa<JumpTableSDNode>(Node))      return true;
       if (isa<ExternalSymbolSDNode>(Node)) return true;
+      if (isa<MCSymbolSDNode>(Node))       return true;
       if (isa<BlockAddressSDNode>(Node))   return true;
       if (Node->getOpcode() == ISD::EntryToken ||
           isa<MDNodeSDNode>(Node)) return true;
@@ -180,6 +181,6 @@ namespace llvm {
     void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
                          MachineBasicBlock::iterator InsertPos);
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0eff930..be54782 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -187,8 +187,7 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
   if (N->getOpcode() != ISD::BUILD_VECTOR)
     return false;
 
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    SDValue Op = N->getOperand(i);
+  for (const SDValue &Op : N->op_values()) {
     if (Op.getOpcode() == ISD::UNDEF)
       continue;
     if (!isa<ConstantSDNode>(Op))
@@ -203,8 +202,7 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
   if (N->getOpcode() != ISD::BUILD_VECTOR)
     return false;
 
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    SDValue Op = N->getOperand(i);
+  for (const SDValue &Op : N->op_values()) {
     if (Op.getOpcode() == ISD::UNDEF)
       continue;
     if (!isa<ConstantFPSDNode>(Op))
@@ -244,8 +242,8 @@ bool ISD::allOperandsUndef(const SDNode *N) {
   if (N->getNumOperands() == 0)
     return false;
 
-  for (unsigned i = 0, e = N->getNumOperands(); i != e ; ++i)
-    if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+  for (const SDValue &Op : N->op_values())
+    if (Op.getOpcode() != ISD::UNDEF)
       return false;
 
   return true;
@@ -427,12 +425,12 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
   AddNodeIDOperands(ID, OpList);
 }
 
-/// AddNodeIDCustom - If this is an SDNode with special info, add this info to
-/// the NodeID data.
+/// If this is an SDNode with special info, add this info to the NodeID data.
 static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   switch (N->getOpcode()) {
   case ISD::TargetExternalSymbol:
   case ISD::ExternalSymbol:
+  case ISD::MCSymbol:
     llvm_unreachable("Should only be used on nodes with operands");
   default: break;  // Normal nodes don't need extra info.
   case ISD::TargetConstant:
@@ -797,6 +795,11 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
                                                     ESN->getTargetFlags()));
     break;
   }
+  case ISD::MCSymbol: {
+    auto *MCSN = cast<MCSymbolSDNode>(N);
+    Erased = MCSymbols.erase(MCSN->getMCSymbol());
+    break;
+  }
   case ISD::VALUETYPE: {
     EVT VT = cast<VTSDNode>(N)->getVT();
     if (VT.isExtended()) {
@@ -1014,6 +1017,7 @@ void SelectionDAG::clear() {
   ExtendedValueTypeNodes.clear();
   ExternalSymbols.clear();
   TargetExternalSymbols.clear();
+  MCSymbols.clear();
   std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
             static_cast<CondCodeSDNode*>(nullptr));
   std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
@@ -1469,6 +1473,15 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
   return SDValue(N, 0);
 }
 
+SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) {
+  SDNode *&N = MCSymbols[Sym];
+  if (N)
+    return SDValue(N, 0);
+  N = new (NodeAllocator) MCSymbolSDNode(Sym, VT);
+  InsertNode(N);
+  return SDValue(N, 0);
+}
+
 SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
                                               unsigned char TargetFlags) {
   SDNode *&N =
@@ -6134,7 +6147,7 @@ public:
     : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
 };
 
-} // namespace
+}
 
 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
 /// This can cause recursive merging of nodes in the DAG.
@@ -6344,7 +6357,7 @@ namespace {
   bool operator<(const UseMemo &L, const UseMemo &R) {
     return (intptr_t)L.User < (intptr_t)R.User;
   }
-} // namespace
+}
 
 /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
 /// uses of other values produced by From.getNode() alone.  The same value
@@ -6589,7 +6602,7 @@ namespace {
         VTs.push_back(MVT((MVT::SimpleValueType)i));
     }
   };
-} // namespace
+}
 
 static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
 static ManagedStatic<EVTArray> SimpleVTArray;
@@ -6659,8 +6672,8 @@ bool SDNode::isOnlyUserOf(SDNode *N) const {
 /// isOperand - Return true if this node is an operand of N.
 ///
 bool SDValue::isOperandOf(SDNode *N) const {
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    if (*this == N->getOperand(i))
+  for (const SDValue &Op : N->op_values())
+    if (*this == Op)
       return true;
   return false;
 }
@@ -6728,8 +6741,8 @@ SDNode::hasPredecessorHelper(const SDNode *N,
   // Haven't visited N yet. Continue the search.
   while (!Worklist.empty()) {
     const SDNode *M = Worklist.pop_back_val();
-    for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
-      SDNode *Op = M->getOperand(i).getNode();
+    for (const SDValue &OpV : M->op_values()) {
+      SDNode *Op = OpV.getNode();
       if (Visited.insert(Op).second)
         Worklist.push_back(Op);
       if (Op == N)
@@ -7078,8 +7091,8 @@ BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
 }
 
 bool BuildVectorSDNode::isConstant() const {
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    unsigned Opc = getOperand(i).getOpcode();
+  for (const SDValue &Op : op_values()) {
+    unsigned Opc = Op.getOpcode();
     if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP)
       return false;
   }
@@ -7120,8 +7133,8 @@ static void checkForCyclesHelper(const SDNode *N,
     abort();
   }
 
-  for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked, DAG);
+  for (const SDValue &Op : N->op_values())
+    checkForCyclesHelper(Op.getNode(), Visited, Checked, DAG);
 
   Checked.insert(N);
   Visited.erase(N);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8313a48..4897082 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -261,8 +261,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
     assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
     NumParts = NumRegs; // Silence a compiler warning.
     assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
-    assert(RegisterVT == Parts[0].getSimpleValueType() &&
-           "Part type doesn't match part!");
+    assert(RegisterVT.getSizeInBits() ==
+           Parts[0].getSimpleValueType().getSizeInBits() &&
+           "Part type sizes don't match!");
 
     // Assemble the parts into intermediate operands.
     SmallVector<SDValue, 8> Ops(NumIntermediates);
@@ -1445,8 +1446,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
     // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
     // The requirement is that
     //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
-    //     = TrueProb for orignal BB.
-    // Assuming the orignal weights are A and B, one choice is to set BB1's
+    //     = TrueProb for original BB.
+    // Assuming the original weights are A and B, one choice is to set BB1's
     // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
     // assumes that
     //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
@@ -1481,8 +1482,8 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
     // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
     // The requirement is that
     //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
-    //     = FalseProb for orignal BB.
-    // Assuming the orignal weights are A and B, one choice is to set BB1's
+    //     = FalseProb for original BB.
+    // Assuming the original weights are A and B, one choice is to set BB1's
     // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
     // assumes that
     //   FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
@@ -2238,17 +2239,11 @@ void SelectionDAGBuilder::visitSDiv(const User &I) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
 
-  // Turn exact SDivs into multiplications.
-  // FIXME: This should be in DAGCombiner, but it doesn't have access to the
-  // exact bit.
-  if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
-      !isa<ConstantSDNode>(Op1) &&
-      isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
-    setValue(&I, DAG.getTargetLoweringInfo()
-                     .BuildExactSDIV(Op1, Op2, getCurSDLoc(), DAG));
-  else
-    setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(),
-                             Op1, Op2));
+  SDNodeFlags Flags;
+  Flags.setExact(isa<PossiblyExactOperator>(&I) &&
+                 cast<PossiblyExactOperator>(&I)->isExact());
+  setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
+                           Op2, &Flags));
 }
 
 void SelectionDAGBuilder::visitICmp(const User &I) {
@@ -4786,7 +4781,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
   case Intrinsic::debugtrap:
   case Intrinsic::trap: {
-    StringRef TrapFuncName = TM.Options.getTrapFunctionName();
+    StringRef TrapFuncName =
+        I.getAttributes()
+            .getAttribute(AttributeSet::FunctionIndex, "trap-func-name")
+            .getValueAsString();
     if (TrapFuncName.empty()) {
       ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
         ISD::TRAP : ISD::DEBUGTRAP;
@@ -4976,11 +4974,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
         MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
             GlobalValue::getRealLinkageName(Fn->getName()), IdxVal);
 
-    // Create a TargetExternalSymbol for the label to avoid any target lowering
+    // Create a MCSymbol for the label to avoid any target lowering
     // that would make this PC relative.
-    StringRef Name = FrameAllocSym->getName();
-    assert(Name.data()[Name.size()] == '\0' && "not null terminated");
-    SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT);
+    SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
     SDValue OffsetVal =
         DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym);
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 96ee899..ef468a2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -130,6 +130,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::TargetJumpTable:            return "TargetJumpTable";
   case ISD::TargetConstantPool:         return "TargetConstantPool";
   case ISD::TargetExternalSymbol:       return "TargetExternalSymbol";
+  case ISD::MCSymbol:                   return "MCSymbol";
   case ISD::TargetBlockAddress:         return "TargetBlockAddress";
 
   case ISD::CopyToReg:                  return "CopyToReg";
@@ -545,12 +546,12 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
 }
 
 static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    if (N->getOperand(i).getNode()->hasOneUse())
-      DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+  for (const SDValue &Op : N->op_values())
+    if (Op.getNode()->hasOneUse())
+      DumpNodes(Op.getNode(), indent+2, G);
     else
       dbgs() << "\n" << std::string(indent+2, ' ')
-             << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+             << (void*)Op.getNode() << ": <multiple use>";
 
   dbgs() << '\n';
   dbgs().indent(indent);
@@ -607,10 +608,8 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
   OS << "\n";
 
   // Dump children that have grandchildren on their own line(s).
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    const SDNode *child = N->getOperand(i).getNode();
-    DumpNodesr(OS, child, indent+2, G, once);
-  }
+  for (const SDValue &Op : N->op_values())
+    DumpNodesr(OS, Op.getNode(), indent+2, G, once);
 }
 
 void SDNode::dumpr() const {
@@ -636,12 +635,12 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
   if (depth < 1)
     return;
 
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+  for (const SDValue &Op : N->op_values()) {
     // Don't follow chain operands.
-    if (N->getOperand(i).getValueType() == MVT::Other)
+    if (Op.getValueType() == MVT::Other)
       continue;
     OS << '\n';
-    printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+    printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2);
   }
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index c5562cd..31f8210 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -307,7 +307,7 @@ namespace llvm {
            "Unknown sched type!");
     return createILPListDAGScheduler(IS, OptLevel);
   }
-} // namespace llvm
+}
 
 // EmitInstrWithCustomInserter - This method should be implemented by targets
 // that mark instructions with the 'usesCustomInserter' flag.  These
@@ -637,9 +637,9 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
       continue;
 
     // Otherwise, add all chain operands to the worklist.
-    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-      if (N->getOperand(i).getValueType() == MVT::Other)
-        Worklist.push_back(N->getOperand(i).getNode());
+    for (const SDValue &Op : N->op_values())
+      if (Op.getValueType() == MVT::Other)
+        Worklist.push_back(Op.getNode());
 
     // If this is a CopyToReg with a vreg dest, process it.
     if (N->getOpcode() != ISD::CopyToReg)
@@ -1814,12 +1814,12 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
   if (!Visited.insert(Use).second)
     return false;
 
-  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+  for (const SDValue &Op : Use->op_values()) {
     // Ignore chain uses, they are validated by HandleMergeInputChains.
-    if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
+    if (Op.getValueType() == MVT::Other && IgnoreChains)
       continue;
 
-    SDNode *N = Use->getOperand(i).getNode();
+    SDNode *N = Op.getNode();
     if (N == Def) {
       if (Use == ImmedUse || Use == Root)
         continue;  // We are not looking for immediate use.
@@ -2212,10 +2212,10 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
 
     // If we have a token factor, we want to add all inputs of the token factor
     // that are not part of the pattern we're matching.
-    for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+    for (const SDValue &Op : N->op_values()) {
       if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
-                      N->getOperand(op).getNode()))
-        InputChains.push_back(N->getOperand(op));
+                      Op.getNode()))
+        InputChains.push_back(Op);
     }
   }
 
@@ -2542,7 +2542,7 @@ public:
           J.setNode(E);
   }
 };
-} // namespace
+}
 
 SDNode *SelectionDAGISel::
 SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
@@ -2562,6 +2562,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   case ISD::TargetConstantPool:
   case ISD::TargetFrameIndex:
   case ISD::TargetExternalSymbol:
+  case ISD::MCSymbol:
   case ISD::TargetBlockAddress:
   case ISD::TargetJumpTable:
   case ISD::TargetGlobalTLSAddress:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 19b5d16..4df5ede 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -132,7 +132,7 @@ namespace llvm {
                     "color=blue,style=dashed");
     }
   };
-} // namespace llvm
+}
 
 std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
                                                         const SelectionDAG *G) {
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index a6b3fc6..bd40cac 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -289,7 +289,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
 
   ImmutableCallSite CS(ISP.getCallSite());
 
-  SDValue ActualCallee = Builder.getValue(ISP.getActualCallee());
+  SDValue ActualCallee = Builder.getValue(ISP.getCalledValue());
 
   assert(CS.getCallingConv() != CallingConv::AnyReg &&
          "anyregcc is not supported on statepoints!");
@@ -815,8 +815,8 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
     // register because statepoint and actuall call return types can be
     // different, and getValue() will use CopyFromReg of the wrong type,
     // which is always i32 in our case.
-    PointerType *CalleeType =
-        cast<PointerType>(ImmutableStatepoint(I).getActualCallee()->getType());
+    PointerType *CalleeType = cast<PointerType>(
+        ImmutableStatepoint(I).getCalledValue()->getType());
     Type *RetTy =
         cast<FunctionType>(CalleeType->getElementType())->getReturnType();
     SDValue CopyFromReg = getCopyFromRegs(I, RetTy);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index c70c3a2..e7722b3 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -700,6 +700,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       if (ShAmt >= BitWidth)
         break;
 
+      APInt InDemandedMask = (NewMask << ShAmt);
+
+      // If the shift is exact, then it does demand the low bits (and knows that
+      // they are zero).
+      if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
+        InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt);
+
       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
       // single shift.  We can do this if the top bits (which are shifted out)
       // are never demanded.
@@ -722,7 +729,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       }
 
       // Compute the new bits that are at the top now.
-      if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
+      if (SimplifyDemandedBits(InOp, InDemandedMask,
                                KnownZero, KnownOne, TLO, Depth+1))
         return true;
       assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
@@ -753,6 +760,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
       APInt InDemandedMask = (NewMask << ShAmt);
 
+      // If the shift is exact, then it does demand the low bits (and knows that
+      // they are zero).
+      if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
+        InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt);
+
       // If any of the demanded bits are produced by the sign extension, we also
       // demand the input sign bit.
       APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
@@ -771,10 +783,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
       // If the input sign bit is known to be zero, or if none of the top bits
       // are demanded, turn this into an unsigned shift right.
-      if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits)
-        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
-                                                 Op.getOperand(0),
-                                                 Op.getOperand(1)));
+      if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+        SDNodeFlags Flags;
+        Flags.setExact(cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact());
+        return TLO.CombineTo(Op,
+                             TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
+                                             Op.getOperand(1), &Flags));
+      }
 
       int Log2 = NewMask.exactLogBase2();
       if (Log2 >= 0) {
@@ -2659,10 +2674,9 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
 
 /// \brief Given an exact SDIV by a constant, create a multiplication
 /// with the multiplicative inverse of the constant.
-SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
-                                       SelectionDAG &DAG) const {
-  ConstantSDNode *C = cast<ConstantSDNode>(Op2);
-  APInt d = C->getAPIntValue();
+static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
+                              SDLoc dl, SelectionDAG &DAG,
+                              std::vector<SDNode *> &Created) {
   assert(d != 0 && "Division by zero!");
 
   // Shift the value upfront if it is even, so the LSB is one.
@@ -2670,10 +2684,11 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
   if (ShAmt) {
     // TODO: For UDIV use SRL instead of SRA.
     SDValue Amt =
-        DAG.getConstant(ShAmt, dl, getShiftAmountTy(Op1.getValueType()));
+        DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType()));
     SDNodeFlags Flags;
     Flags.setExact(true);
     Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags);
+    Created.push_back(Op1.getNode());
     d = d.ashr(ShAmt);
   }
 
@@ -2682,8 +2697,10 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
   while ((t = d*xn) != 1)
     xn *= APInt(d.getBitWidth(), 2) - t;
 
-  Op2 = DAG.getConstant(xn, dl, Op1.getValueType());
-  return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
+  SDValue Op2 = DAG.getConstant(xn, dl, Op1.getValueType());
+  SDValue Mul = DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
+  Created.push_back(Mul.getNode());
+  return Mul;
 }
 
 /// \brief Given an ISD::SDIV node expressing a divide by constant,
@@ -2703,6 +2720,10 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
   if (!isTypeLegal(VT))
     return SDValue();
 
+  // If the sdiv has an 'exact' bit we can use a simpler lowering.
+  if (cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact())
+    return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created);
+
   APInt::ms magics = Divisor.magic();
 
   // Multiply the numerator (operand 0) by the magic value
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index d60e5f9..e7b2a8e 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -59,7 +59,7 @@ private:
                                       Type *Ty, Value *BasePtr, int Idx1, int Idx2,
                                       const char *Name);
 };
-} // namespace
+}
 
 INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering",
                       "Shadow Stack GC Lowering", false, false)
@@ -189,7 +189,7 @@ public:
     }
   }
 };
-} // namespace
+}
 
 
 Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index b1019c1..08f99ec 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -37,6 +37,6 @@ namespace llvm {
                                MachineFunction &mf,
                                VirtRegMap &vrm);
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 4eaf03e..69c65ff 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -39,7 +39,7 @@ class raw_ostream;
 
 /// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
 /// opportunities.
-class SplitAnalysis {
+class LLVM_LIBRARY_VISIBILITY SplitAnalysis {
 public:
   const MachineFunction &MF;
   const VirtRegMap &VRM;
@@ -208,7 +208,7 @@ public:
 /// - Finish the current interval with closeIntv and repeat from 2.
 /// - Rewrite instructions with finish().
 ///
-class SplitEditor {
+class LLVM_LIBRARY_VISIBILITY SplitEditor {
   SplitAnalysis &SA;
   LiveIntervals &LIS;
   VirtRegMap &VRM;
@@ -466,6 +466,6 @@ public:
                         unsigned IntvOut, SlotIndex EnterAfter);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/CodeGen/StatepointExampleGC.cpp b/lib/CodeGen/StatepointExampleGC.cpp
index b9523e5..95dfd75 100644
--- a/lib/CodeGen/StatepointExampleGC.cpp
+++ b/lib/CodeGen/StatepointExampleGC.cpp
@@ -45,7 +45,7 @@ public:
     return (1 == PT->getAddressSpace());
   }
 };
-} // namespace
+}
 
 static GCRegistry::Add<StatepointGC> X("statepoint-example",
                                        "an example strategy for statepoint");
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 164badd..237460c 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -125,7 +125,7 @@ namespace {
   };
 
   char TailDuplicatePass::ID = 0;
-} // namespace
+}
 
 char &llvm::TailDuplicateID = TailDuplicatePass::ID;
 
@@ -627,11 +627,8 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
     return false;
   if (TailBB->pred_empty())
     return false;
-  MachineBasicBlock::iterator I = TailBB->begin();
-  MachineBasicBlock::iterator E = TailBB->end();
-  while (I != E && I->isDebugValue())
-    ++I;
-  if (I == E)
+  MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr();
+  if (I == TailBB->end())
     return true;
   return I->isUnconditionalBranch();
 }
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 1bc89aa..78492a6 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -38,6 +38,11 @@
 #include <cctype>
 using namespace llvm;
 
+static cl::opt<bool> JumpIsExpensiveOverride(
+    "jump-is-expensive", cl::init(false),
+    cl::desc("Do not create extra branches to split comparison logic."),
+    cl::Hidden);
+
 /// InitLibcallNames - Set default libcall names.
 ///
 static void InitLibcallNames(const char **Names, const Triple &TT) {
@@ -757,7 +762,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
   IntDivIsCheap = false;
   FsqrtIsCheap = false;
   Pow2SDivIsCheap = false;
-  JumpIsExpensive = false;
+  JumpIsExpensive = JumpIsExpensiveOverride;
   PredictableSelectIsExpensive = false;
   MaskAndBranchFoldingIsLegal = false;
   EnableExtLdPromotion = false;
@@ -778,7 +783,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
   InsertFencesForAtomic = false;
   MinimumJumpTableEntries = 4;
 
-  InitLibcallNames(LibcallRoutineNames, Triple(TM.getTargetTriple()));
+  InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple());
   InitCmpLibcallCCs(CmpLibcallCCs);
   InitLibcallCallingConvs(LibcallCallingConvs);
 }
@@ -915,6 +920,12 @@ bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
   }
 }
 
+void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {
+  // If the command-line option was specified, ignore this request.
+  if (!JumpIsExpensiveOverride.getNumOccurrences())
+    JumpIsExpensive = isExpensive;
+}
+
 TargetLoweringBase::LegalizeKind
 TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
   // If this is a simple type, use the ComputeRegisterProp mechanism.
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index d7b043d..2f78763 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -440,16 +440,6 @@ TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
   SupportIndirectSymViaGOTPCRel = true;
 }
 
-/// getDepLibFromLinkerOpt - Extract the dependent library name from a linker
-/// option string. Returns StringRef() if the option does not specify a library.
-StringRef TargetLoweringObjectFileMachO::
-getDepLibFromLinkerOpt(StringRef LinkerOption) const {
-  const char *LibCmd = "-l";
-  if (LinkerOption.startswith(LibCmd))
-    return LinkerOption.substr(strlen(LibCmd));
-  return StringRef();
-}
-
 /// emitModuleFlags - Perform code emission for module flags.
 void TargetLoweringObjectFileMachO::
 emitModuleFlags(MCStreamer &Streamer,
@@ -850,8 +840,6 @@ static int getSelectionForCOFF(const GlobalValue *GV) {
     } else {
       return COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE;
     }
-  } else if (GV->isWeakForLinker()) {
-    return COFF::IMAGE_COMDAT_SELECT_ANY;
   }
   return 0;
 }
@@ -990,14 +978,6 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
                                      COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
 }
 
-StringRef TargetLoweringObjectFileCOFF::
-getDepLibFromLinkerOpt(StringRef LinkerOption) const {
-  const char *LibCmd = "/DEFAULTLIB:";
-  if (LinkerOption.startswith(LibCmd))
-    return LinkerOption.substr(strlen(LibCmd));
-  return StringRef();
-}
-
 void TargetLoweringObjectFileCOFF::
 emitModuleFlags(MCStreamer &Streamer,
                 ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
@@ -1045,3 +1025,36 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
   return getContext().getAssociativeCOFFSection(
       cast<MCSectionCOFF>(StaticDtorSection), KeySym);
 }
+
+void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
+    raw_ostream &OS, const GlobalValue *GV, const Mangler &Mang) const {
+  if (!GV->hasDLLExportStorageClass() || GV->isDeclaration())
+    return;
+
+  const Triple &TT = getTargetTriple();
+
+  if (TT.isKnownWindowsMSVCEnvironment())
+    OS << " /EXPORT:";
+  else
+    OS << " -export:";
+
+  if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) {
+    std::string Flag;
+    raw_string_ostream FlagOS(Flag);
+    Mang.getNameWithPrefix(FlagOS, GV, false);
+    FlagOS.flush();
+    if (Flag[0] == DL->getGlobalPrefix())
+      OS << Flag.substr(1);
+    else
+      OS << Flag;
+  } else {
+    Mang.getNameWithPrefix(OS, GV, false);
+  }
+
+  if (!GV->getValueType()->isFunctionTy()) {
+    if (TT.isKnownWindowsMSVCEnvironment())
+      OS << ",DATA";
+    else
+      OS << ",data";
+  }
+}
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index f4926cb..8d2048f 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -47,10 +47,3 @@ bool TargetOptions::LessPreciseFPMAD() const {
 bool TargetOptions::HonorSignDependentRoundingFPMath() const {
   return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
 }
-
-/// getTrapFunctionName - If this returns a non-empty string, this means isel
-/// should lower Intrinsic::trap to a call to the specified function name
-/// instead of an ISD::TRAP node.
-StringRef TargetOptions::getTrapFunctionName() const {
-  return TrapFuncName;
-}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 6bceccc..e84bea6 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1207,12 +1207,24 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
     }
   }
 
+  // If the instruction is convertible to 3 Addr, instead
+  // of returning try 3 Addr transformation aggresively and
+  // use this variable to check later. Because it might be better.
+  // For example, we can just use `leal (%rsi,%rdi), %eax` and `ret`
+  // instead of the following code.
+  //   addl	%esi, %edi
+  //   movl	%edi, %eax
+  //   ret
+  bool commuted = false;
+
   // If it's profitable to commute, try to do so.
   if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) {
+    commuted = true;
     ++NumCommuted;
     if (AggressiveCommute)
       ++NumAggrCommuted;
-    return false;
+    if (!MI.isConvertibleTo3Addr())
+      return false;
   }
 
   if (shouldOnlyCommute)
@@ -1220,7 +1232,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
 
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule this MI below it.
-  if (EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
+  if (!commuted && EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
     ++NumReSchedDowns;
     return true;
   }
@@ -1237,6 +1249,10 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
     }
   }
 
+  // Return if it is commuted but 3 addr conversion is failed.
+  if (commuted)
+    return false;
+
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule it before this MI if it's legal.
   if (EnableRescheduling && rescheduleKillAboveMI(mi, nmi, regB)) {
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 5c54cdb..d393e10 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -51,7 +51,7 @@ namespace {
       AU.addPreserved<DominatorTreeWrapperPass>();
     }
   };
-} // namespace
+}
 char UnreachableBlockElim::ID = 0;
 INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
                 "Remove unreachable blocks from the CFG", false, false)
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 8c932cf..dbc0d91 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -76,7 +76,7 @@ public:
   WinEHPrepare(const TargetMachine *TM = nullptr)
       : FunctionPass(ID) {
     if (TM)
-      TheTriple = Triple(TM->getTargetTriple());
+      TheTriple = TM->getTargetTriple();
   }
 
   bool runOnFunction(Function &Fn) override;
@@ -106,8 +106,8 @@ private:
                                 LandingPadInst *OutlinedLPad,
                                 const LandingPadInst *OriginalLPad,
                                 FrameVarInfoMap &VarInfo);
-  Function *createHandlerFunc(Type *RetTy, const Twine &Name, Module *M,
-                              Value *&ParentFP);
+  Function *createHandlerFunc(Function *ParentFn, Type *RetTy,
+                              const Twine &Name, Module *M, Value *&ParentFP);
   bool outlineHandler(ActionHandler *Action, Function *SrcFn,
                       LandingPadInst *LPad, BasicBlock *StartBB,
                       FrameVarInfoMap &VarInfo);
@@ -1329,14 +1329,15 @@ void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler) {
 
 // FIXME: Consider sinking this into lib/Target/X86 somehow. TargetLowering
 // usually doesn't build LLVM IR, so that's probably the wrong place.
-Function *WinEHPrepare::createHandlerFunc(Type *RetTy, const Twine &Name,
-                                          Module *M, Value *&ParentFP) {
+Function *WinEHPrepare::createHandlerFunc(Function *ParentFn, Type *RetTy,
+                                          const Twine &Name, Module *M,
+                                          Value *&ParentFP) {
   // x64 uses a two-argument prototype where the parent FP is the second
   // argument. x86 uses no arguments, just the incoming EBP value.
   LLVMContext &Context = M->getContext();
+  Type *Int8PtrType = Type::getInt8PtrTy(Context);
   FunctionType *FnType;
   if (TheTriple.getArch() == Triple::x86_64) {
-    Type *Int8PtrType = Type::getInt8PtrTy(Context);
     Type *ArgTys[2] = {Int8PtrType, Int8PtrType};
     FnType = FunctionType::get(RetTy, ArgTys, false);
   } else {
@@ -1353,9 +1354,13 @@ Function *WinEHPrepare::createHandlerFunc(Type *RetTy, const Twine &Name,
     assert(M);
     Function *FrameAddressFn =
         Intrinsic::getDeclaration(M, Intrinsic::frameaddress);
-    Value *Args[1] = {ConstantInt::get(Type::getInt32Ty(Context), 1)};
-    ParentFP = CallInst::Create(FrameAddressFn, Args, "parent_fp",
-                                &Handler->getEntryBlock());
+    Function *RecoverFPFn =
+        Intrinsic::getDeclaration(M, Intrinsic::x86_seh_recoverfp);
+    IRBuilder<> Builder(&Handler->getEntryBlock());
+    Value *EBP =
+        Builder.CreateCall(FrameAddressFn, {Builder.getInt32(1)}, "ebp");
+    Value *ParentI8Fn = Builder.CreateBitCast(ParentFn, Int8PtrType);
+    ParentFP = Builder.CreateCall(RecoverFPFn, {ParentI8Fn, EBP});
   }
   return Handler;
 }
@@ -1371,10 +1376,10 @@ bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
   Value *ParentFP;
   Function *Handler;
   if (Action->getType() == Catch) {
-    Handler = createHandlerFunc(Int8PtrType, SrcFn->getName() + ".catch", M,
+    Handler = createHandlerFunc(SrcFn, Int8PtrType, SrcFn->getName() + ".catch", M,
                                 ParentFP);
   } else {
-    Handler = createHandlerFunc(Type::getVoidTy(Context),
+    Handler = createHandlerFunc(SrcFn, Type::getVoidTy(Context),
                                 SrcFn->getName() + ".cleanup", M, ParentFP);
   }
   Handler->setPersonalityFn(SrcFn->getPersonalityFn());
@@ -2395,40 +2400,43 @@ void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions,
           MaybeCall = MaybeCall->getNextNode();
       }
 
-      // Look for outlined finally calls.
-      if (CallSite FinallyCall = matchOutlinedFinallyCall(BB, MaybeCall)) {
-        Function *Fin = FinallyCall.getCalledFunction();
-        assert(Fin && "outlined finally call should be direct");
-        auto *Action = new CleanupHandler(BB);
-        Action->setHandlerBlockOrFunc(Fin);
-        Actions.insertCleanupHandler(Action);
-        CleanupHandlerMap[BB] = Action;
-        DEBUG(dbgs() << "  Found frontend-outlined finally call to "
-                     << Fin->getName() << " in block "
-                     << Action->getStartBlock()->getName() << "\n");
-
-        // Split the block if there were more interesting instructions and look
-        // for finally calls in the normal successor block.
-        BasicBlock *SuccBB = BB;
-        if (FinallyCall.getInstruction() != BB->getTerminator() &&
-            FinallyCall.getInstruction()->getNextNode() !=
-                BB->getTerminator()) {
-          SuccBB =
-              SplitBlock(BB, FinallyCall.getInstruction()->getNextNode(), DT);
-        } else {
-          if (FinallyCall.isInvoke()) {
+      // Look for outlined finally calls on x64, since those happen to match the
+      // prototype provided by the runtime.
+      if (TheTriple.getArch() == Triple::x86_64) {
+        if (CallSite FinallyCall = matchOutlinedFinallyCall(BB, MaybeCall)) {
+          Function *Fin = FinallyCall.getCalledFunction();
+          assert(Fin && "outlined finally call should be direct");
+          auto *Action = new CleanupHandler(BB);
+          Action->setHandlerBlockOrFunc(Fin);
+          Actions.insertCleanupHandler(Action);
+          CleanupHandlerMap[BB] = Action;
+          DEBUG(dbgs() << "  Found frontend-outlined finally call to "
+                       << Fin->getName() << " in block "
+                       << Action->getStartBlock()->getName() << "\n");
+
+          // Split the block if there were more interesting instructions and
+          // look for finally calls in the normal successor block.
+          BasicBlock *SuccBB = BB;
+          if (FinallyCall.getInstruction() != BB->getTerminator() &&
+              FinallyCall.getInstruction()->getNextNode() !=
+                  BB->getTerminator()) {
             SuccBB =
-                cast<InvokeInst>(FinallyCall.getInstruction())->getNormalDest();
+                SplitBlock(BB, FinallyCall.getInstruction()->getNextNode(), DT);
           } else {
-            SuccBB = BB->getUniqueSuccessor();
-            assert(SuccBB &&
-                   "splitOutlinedFinallyCalls didn't insert a branch");
+            if (FinallyCall.isInvoke()) {
+              SuccBB = cast<InvokeInst>(FinallyCall.getInstruction())
+                           ->getNormalDest();
+            } else {
+              SuccBB = BB->getUniqueSuccessor();
+              assert(SuccBB &&
+                     "splitOutlinedFinallyCalls didn't insert a branch");
+            }
           }
+          BB = SuccBB;
+          if (BB == EndBB)
+            return;
+          continue;
         }
-        BB = SuccBB;
-        if (BB == EndBB)
-          return;
-        continue;
       }
     }
 
@@ -2518,7 +2526,7 @@ struct WinEHNumbering {
   void calculateStateNumbers(const Function &F);
   void findActionRootLPads(const Function &F);
 };
-} // namespace
+}
 
 void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) {
   WinEHUnwindMapEntry UME;
diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index fd33c7d..8ae0543 100644
--- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -129,4 +129,4 @@ void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
     }
   }
 }
-} // namespace llvm
+}
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 32654f8..c25ddad 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -667,10 +667,8 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
     if (Section.relocation_begin() != Section.relocation_end()) {
       uint64_t SectionSize = RelocatedSection->getSize();
       for (const RelocationRef &Reloc : Section.relocations()) {
-        uint64_t Address;
-        Reloc.getOffset(Address);
-        uint64_t Type;
-        Reloc.getType(Type);
+        uint64_t Address = Reloc.getOffset();
+        uint64_t Type = Reloc.getType();
         uint64_t SymAddr = 0;
         uint64_t SectionLoadAddress = 0;
         object::symbol_iterator Sym = Reloc.getSymbol();
@@ -709,10 +707,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
         object::RelocToApply R(V.visit(Type, Reloc, SymAddr));
         if (V.error()) {
           SmallString<32> Name;
-          std::error_code ec(Reloc.getTypeName(Name));
-          if (ec) {
-            errs() << "Aaaaaa! Nameless relocation! Aaaaaa!\n";
-          }
+          Reloc.getTypeName(Name);
           errs() << "error: failed to compute relocation: "
                  << Name << "\n";
           continue;
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 48e1d55..53a676e 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -61,7 +61,7 @@ ArrayRef<uint8_t> makeFixedFormSizesArrayRef() {
   };
   return makeArrayRef(sizes);
 }
-} // namespace
+}
 
 ArrayRef<uint8_t> DWARFFormValue::getFixedFormSizes(uint8_t AddrSize,
                                                     uint16_t Version) {
diff --git a/lib/DebugInfo/DWARF/SyntaxHighlighting.h b/lib/DebugInfo/DWARF/SyntaxHighlighting.h
index 84afd37..946a313 100644
--- a/lib/DebugInfo/DWARF/SyntaxHighlighting.h
+++ b/lib/DebugInfo/DWARF/SyntaxHighlighting.h
@@ -32,8 +32,8 @@ public:
   llvm::raw_ostream& get() { return OS; }
   operator llvm::raw_ostream& () { return OS; }
 };
-} // namespace syntax
-} // namespace dwarf
-} // namespace llvm
+}
+}
+}
 
 #endif
diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
index 8f56de8..0aff327 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -80,7 +80,7 @@ private:
   ArgListType Args;
   ArgListType::const_iterator CurIter;
 };
-} // namespace
+}
 
 PDBSymbolFunc::PDBSymbolFunc(const IPDBSession &PDBSession,
                              std::unique_ptr<IPDBRawSymbol> Symbol)
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
index fcee182..af3563f 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
@@ -63,7 +63,7 @@ private:
   const IPDBSession &Session;
   std::unique_ptr<ArgEnumeratorType> Enumerator;
 };
-} // namespace
+}
 
 PDBSymbolTypeFunctionSig::PDBSymbolTypeFunctionSig(
     const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 94e8090..c2ff8e2 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -181,9 +181,9 @@ uint64_t ExecutionEngineState::RemoveMapping(StringRef Name) {
 
 std::string ExecutionEngine::getMangledName(const GlobalValue *GV) {
   MutexGuard locked(lock);
-  Mangler Mang(DL);
+  Mangler Mang;
   SmallString<128> FullName;
-  Mang.getNameWithPrefix(FullName, GV->getName());
+  Mang.getNameWithPrefix(FullName, GV, false);
   return FullName.str();
 }
 
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index 08d9d6b..9071440 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -24,6 +24,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolSize.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Errno.h"
 #include "llvm/Support/raw_ostream.h"
@@ -107,30 +108,27 @@ void IntelJITEventListener::NotifyObjectEmitted(
   MethodAddressVector Functions;
 
   // Use symbol info to iterate functions in the object.
-  for (symbol_iterator I = DebugObj.symbol_begin(),
-                       E = DebugObj.symbol_end();
-                        I != E;
-                        ++I) {
+  for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
+    SymbolRef Sym = P.first;
     std::vector<LineNumberInfo> LineInfo;
     std::string SourceFileName;
 
-    SymbolRef::Type SymType;
-    if (I->getType(SymType)) continue;
-    if (SymType == SymbolRef::ST_Function) {
-      StringRef  Name;
-      uint64_t   Addr;
-      if (I->getName(Name)) continue;
-      if (I->getAddress(Addr)) continue;
-      uint64_t Size = I->getSize();
+    if (Sym.getType() == SymbolRef::ST_Function) {
+      ErrorOr<StringRef> Name = Sym.getName();
+      if (!Name)
+        continue;
+
+      uint64_t Addr;
+      if (Sym.getAddress(Addr))
+        continue;
+      uint64_t Size = P.second;
 
       // Record this address in a local vector
       Functions.push_back((void*)Addr);
 
       // Build the function loaded notification message
-      iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper,
-                                           Name.data(),
-                                           Addr,
-                                           Size);
+      iJIT_Method_Load FunctionMessage =
+          FunctionDescToIntelJITFormat(*Wrapper, Name->data(), Addr, Size);
       if (Context) {
         DILineInfoTable  Lines = Context->getLineInfoForAddressRange(Addr, Size);
         DILineInfoTable::iterator  Begin = Lines.begin();
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index f6cac58..f976641 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -251,6 +251,6 @@ private:  // Helper functions
 
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 87243e4..a7d6705 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -147,8 +147,6 @@ std::unique_ptr<MemoryBuffer> MCJIT::emitObject(Module *M) {
 
   legacy::PassManager PM;
 
-  M->setDataLayout(*TM->getDataLayout());
-
   // The RuntimeDyld will take ownership of this shortly
   SmallVector<char, 4096> ObjBufferSV;
   raw_svector_ostream ObjStream(ObjBufferSV);
@@ -195,6 +193,8 @@ void MCJIT::generateCodeForModule(Module *M) {
   if (ObjCache)
     ObjectToLoad = ObjCache->getObject(M);
 
+  M->setDataLayout(*TM->getDataLayout());
+
   // If the cache did not contain a suitable object, compile the object
   if (!ObjectToLoad) {
     ObjectToLoad = emitObject(M);
@@ -264,9 +264,8 @@ void MCJIT::finalizeModule(Module *M) {
 }
 
 RuntimeDyld::SymbolInfo MCJIT::findExistingSymbol(const std::string &Name) {
-  Mangler Mang(TM->getDataLayout());
   SmallString<128> FullName;
-  Mang.getNameWithPrefix(FullName, Name);
+  Mangler::getNameWithPrefix(FullName, Name, *TM->getDataLayout());
   return Dyld.getSymbol(FullName);
 }
 
@@ -369,7 +368,7 @@ uint64_t MCJIT::getFunctionAddress(const std::string &Name) {
 void *MCJIT::getPointerToFunction(Function *F) {
   MutexGuard locked(lock);
 
-  Mangler Mang(TM->getDataLayout());
+  Mangler Mang;
   SmallString<128> Name;
   TM->getNameWithPrefix(Name, F, Mang);
 
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 7fda1e0..a45173c 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -335,6 +335,6 @@ protected:
                               bool CheckFunctionsOnly);
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 23e7662..b720338 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolSize.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Errno.h"
 #include "llvm/Support/raw_ostream.h"
@@ -85,17 +86,16 @@ void OProfileJITEventListener::NotifyObjectEmitted(
   const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
 
   // Use symbol info to iterate functions in the object.
-  for (symbol_iterator I = DebugObj.symbol_begin(), E = DebugObj.symbol_end();
-       I != E; ++I) {
-    SymbolRef::Type SymType;
-    if (I->getType(SymType)) continue;
-    if (SymType == SymbolRef::ST_Function) {
+  for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
+    SymbolRef Sym = P.first;
+    if (Sym.getType() == SymbolRef::ST_Function) {
       StringRef  Name;
       uint64_t   Addr;
-      uint64_t   Size;
-      if (I->getName(Name)) continue;
-      if (I->getAddress(Addr)) continue;
-      if (I->getSize(Size)) continue;
+      if (Sym.getName(Name))
+        continue;
+      if (Sym.getAddress(Addr))
+        continue;
+      uint64_t Size = P.second;
 
       if (Wrapper->op_write_native_code(Name.data(), Addr, (void*)Addr, Size)
                         == -1) {
@@ -125,9 +125,7 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
     for (symbol_iterator I = DebugObj.symbol_begin(),
                          E = DebugObj.symbol_end();
          I != E; ++I) {
-      SymbolRef::Type SymType;
-      if (I->getType(SymType)) continue;
-      if (SymType == SymbolRef::ST_Function) {
+      if (I->getType() == SymbolRef::ST_Function) {
         uint64_t   Addr;
         if (I->getAddress(Addr)) continue;
 
diff --git a/lib/ExecutionEngine/Orc/CMakeLists.txt b/lib/ExecutionEngine/Orc/CMakeLists.txt
index 1da1642..99fe22c 100644
--- a/lib/ExecutionEngine/Orc/CMakeLists.txt
+++ b/lib/ExecutionEngine/Orc/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_library(LLVMOrcJIT
   ExecutionUtils.cpp
   IndirectionUtils.cpp
+  NullResolver.cpp
   OrcMCJITReplacement.cpp
   OrcTargetSupport.cpp
 
diff --git a/lib/ExecutionEngine/Orc/NullResolver.cpp b/lib/ExecutionEngine/Orc/NullResolver.cpp
new file mode 100644
index 0000000..57666a9
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/NullResolver.cpp
@@ -0,0 +1,27 @@
+//===---------- NullResolver.cpp - Reject symbol lookup requests ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/NullResolver.h"
+
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+namespace orc {
+
+RuntimeDyld::SymbolInfo NullResolver::findSymbol(const std::string &Name) {
+  llvm_unreachable("Unexpected cross-object symbol reference");
+}
+
+RuntimeDyld::SymbolInfo
+NullResolver::findSymbolInLogicalDylib(const std::string &Name) {
+  llvm_unreachable("Unexpected cross-object symbol reference");
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index eb39798..7dc5164 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -142,7 +142,6 @@ public:
                     std::unique_ptr<TargetMachine> TM)
       : TM(std::move(TM)), MemMgr(*this, std::move(MemMgr)),
         Resolver(*this), ClientResolver(std::move(ClientResolver)),
-        Mang(this->TM->getDataLayout()),
         NotifyObjectLoaded(*this), NotifyFinalized(*this),
         ObjectLayer(NotifyObjectLoaded, NotifyFinalized),
         CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)),
@@ -311,7 +310,7 @@ private:
     std::string MangledName;
     {
       raw_string_ostream MangledNameStream(MangledName);
-      Mang.getNameWithPrefix(MangledNameStream, Name);
+      Mang.getNameWithPrefix(MangledNameStream, Name, *TM->getDataLayout());
     }
     return MangledName;
   }
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 6d64d68..fa50182 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -118,8 +118,8 @@ static std::error_code getOffset(const SymbolRef &Sym, uint64_t &Result) {
   if (std::error_code EC = Sym.getAddress(Address))
     return EC;
 
-  if (Address == UnknownAddressOrSize) {
-    Result = UnknownAddressOrSize;
+  if (Address == UnknownAddress) {
+    Result = UnknownAddress;
     return std::error_code();
   }
 
@@ -129,7 +129,7 @@ static std::error_code getOffset(const SymbolRef &Sym, uint64_t &Result) {
     return EC;
 
   if (SecI == Obj->section_end()) {
-    Result = UnknownAddressOrSize;
+    Result = UnknownAddress;
     return std::error_code();
   }
 
@@ -175,16 +175,16 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
     if (IsCommon)
       CommonSymbols.push_back(*I);
     else {
-      object::SymbolRef::Type SymType;
-      Check(I->getType(SymType));
+      object::SymbolRef::Type SymType = I->getType();
 
       if (SymType == object::SymbolRef::ST_Function ||
           SymType == object::SymbolRef::ST_Data ||
           SymType == object::SymbolRef::ST_Unknown) {
 
-        StringRef Name;
+        ErrorOr<StringRef> NameOrErr = I->getName();
+        Check(NameOrErr.getError());
+        StringRef Name = *NameOrErr;
         uint64_t SectOffset;
-        Check(I->getName(Name));
         Check(getOffset(*I, SectOffset));
         section_iterator SI = Obj.section_end();
         Check(I->getSection(SI));
@@ -267,10 +267,10 @@ computeAllocationSizeForSections(std::vector<uint64_t> &SectionSizes,
   return TotalSize;
 }
 
-static bool isRequiredForExecution(const SectionRef &Section) {
+static bool isRequiredForExecution(const SectionRef Section) {
   const ObjectFile *Obj = Section.getObject();
-  if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj))
-    return ELFObj->getSectionFlags(Section) & ELF::SHF_ALLOC;
+  if (isa<object::ELFObjectFileBase>(Obj))
+    return ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC;
   if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj)) {
     const coff_section *CoffSection = COFFObj->getCOFFSection(Section);
     // Avoid loading zero-sized COFF sections.
@@ -287,12 +287,12 @@ static bool isRequiredForExecution(const SectionRef &Section) {
   
   assert(isa<MachOObjectFile>(Obj));
   return true;
- }
+}
 
-static bool isReadOnlyData(const SectionRef &Section) {
+static bool isReadOnlyData(const SectionRef Section) {
   const ObjectFile *Obj = Section.getObject();
-  if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj))
-    return !(ELFObj->getSectionFlags(Section) &
+  if (isa<object::ELFObjectFileBase>(Obj))
+    return !(ELFSectionRef(Section).getFlags() &
              (ELF::SHF_WRITE | ELF::SHF_EXECINSTR));
   if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj))
     return ((COFFObj->getCOFFSection(Section)->Characteristics &
@@ -307,10 +307,10 @@ static bool isReadOnlyData(const SectionRef &Section) {
   return false;
 }
 
-static bool isZeroInit(const SectionRef &Section) {
+static bool isZeroInit(const SectionRef Section) {
   const ObjectFile *Obj = Section.getObject();
-  if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj))
-    return ELFObj->getSectionType(Section) == ELF::SHT_NOBITS;
+  if (isa<object::ELFObjectFileBase>(Obj))
+    return ELFSectionRef(Section).getType() == ELF::SHT_NOBITS;
   if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj))
     return COFFObj->getCOFFSection(Section)->Characteristics &
             COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
@@ -387,7 +387,7 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
     uint32_t Flags = I->getFlags();
     if (Flags & SymbolRef::SF_Common) {
       // Add the common symbols to a list.  We'll allocate them all below.
-      uint64_t Size = I->getSize();
+      uint64_t Size = I->getCommonSize();
       CommonSize += Size;
     }
   }
@@ -482,8 +482,9 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
   DEBUG(dbgs() << "Processing common symbols...\n");
 
   for (const auto &Sym : CommonSymbols) {
-    StringRef Name;
-    Check(Sym.getName(Name));
+    ErrorOr<StringRef> NameOrErr = Sym.getName();
+    Check(NameOrErr.getError());
+    StringRef Name = *NameOrErr;
 
     // Skip common symbols already elsewhere.
     if (GlobalSymbolTable.count(Name) ||
@@ -494,7 +495,7 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
     }
 
     uint32_t Align = Sym.getAlignment();
-    uint64_t Size = Sym.getSize();
+    uint64_t Size = Sym.getCommonSize();
 
     CommonSize += Align + Size;
     SymbolsToAllocate.push_back(Sym);
@@ -516,9 +517,10 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
   // Assign the address of each symbol
   for (auto &Sym : SymbolsToAllocate) {
     uint32_t Align = Sym.getAlignment();
-    StringRef Name;
-    uint64_t Size = Sym.getSize();
-    Check(Sym.getName(Name));
+    uint64_t Size = Sym.getCommonSize();
+    ErrorOr<StringRef> NameOrErr = Sym.getName();
+    Check(NameOrErr.getError());
+    StringRef Name = *NameOrErr;
     if (Align) {
       // This symbol has an alignment requirement.
       uint64_t AlignOffset = OffsetToAlignment((uint64_t)Addr, Align);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 9f80e5a..1dacc13 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -36,7 +36,7 @@ public:
     return OwningBinary<ObjectFile>();
   }
 };
-} // namespace
+}
 
 namespace llvm {
 
@@ -62,23 +62,8 @@ RuntimeDyldCOFF::loadObject(const object::ObjectFile &O) {
 }
 
 uint64_t RuntimeDyldCOFF::getSymbolOffset(const SymbolRef &Sym) {
-  uint64_t Address;
-  if (Sym.getAddress(Address))
-    return UnknownAddressOrSize;
-
-  if (Address == UnknownAddressOrSize)
-    return UnknownAddressOrSize;
-
-  const ObjectFile *Obj = Sym.getObject();
-  section_iterator SecI(Obj->section_end());
-  if (Sym.getSection(SecI))
-    return UnknownAddressOrSize;
-
-  if (SecI == Obj->section_end())
-    return UnknownAddressOrSize;
-
-  uint64_t SectionAddress = SecI->getAddress();
-  return Address - SectionAddress;
+  // The value in a relocatable COFF object is the offset.
+  return Sym.getValue();
 }
 
 bool RuntimeDyldCOFF::isCompatibleFile(const object::ObjectFile &Obj) const {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index c8c2516..957571b 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -673,7 +673,7 @@ private:
     return (S == MCDisassembler::Success);
   }
 };
-} // namespace llvm
+}
 
 RuntimeDyldCheckerImpl::RuntimeDyldCheckerImpl(RuntimeDyld &RTDyld,
                                                MCDisassembler *Disassembler,
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
index a0a1118..69d2a7d 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
@@ -72,6 +72,6 @@ private:
 
   StubMap Stubs;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 967d7c0..f5069c0 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -630,7 +630,7 @@ RuntimeDyldELF::evaluateMIPS64Relocation(const SectionEntry &Section,
   }
   case ELF::R_MIPS_PC16: {
     uint64_t FinalAddress = (Section.LoadAddress + Offset);
-    return ((Value + Addend - FinalAddress - 4) >> 2) & 0xffff;
+    return ((Value + Addend - FinalAddress) >> 2) & 0xffff;
   }
   case ELF::R_MIPS_PC32: {
     uint64_t FinalAddress = (Section.LoadAddress + Offset);
@@ -767,23 +767,20 @@ void RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj,
     if (RelSectionName != ".opd")
       continue;
 
-    for (relocation_iterator i = si->relocation_begin(),
-                             e = si->relocation_end();
+    for (elf_relocation_iterator i = si->relocation_begin(),
+                                 e = si->relocation_end();
          i != e;) {
       // The R_PPC64_ADDR64 relocation indicates the first field
       // of a .opd entry
-      uint64_t TypeFunc;
-      check(i->getType(TypeFunc));
+      uint64_t TypeFunc = i->getType();
       if (TypeFunc != ELF::R_PPC64_ADDR64) {
         ++i;
         continue;
       }
 
-      uint64_t TargetSymbolOffset;
+      uint64_t TargetSymbolOffset = i->getOffset();
       symbol_iterator TargetSymbol = i->getSymbol();
-      check(i->getOffset(TargetSymbolOffset));
-      ErrorOr<int64_t> AddendOrErr =
-          Obj.getRelocationAddend(i->getRawDataRefImpl());
+      ErrorOr<int64_t> AddendOrErr = i->getAddend();
       Check(AddendOrErr.getError());
       int64_t Addend = *AddendOrErr;
 
@@ -792,8 +789,7 @@ void RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj,
         break;
 
       // Just check if following relocation is a R_PPC64_TOC
-      uint64_t TypeTOC;
-      check(i->getType(TypeTOC));
+      uint64_t TypeTOC = i->getType();
       if (TypeTOC != ELF::R_PPC64_TOC)
         continue;
 
@@ -1061,17 +1057,19 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
     unsigned SectionID, relocation_iterator RelI, const ObjectFile &O,
     ObjSectionToIDMap &ObjSectionToID, StubMap &Stubs) {
   const auto &Obj = cast<ELFObjectFileBase>(O);
-  uint64_t RelType;
-  Check(RelI->getType(RelType));
-  int64_t Addend = 0;
-  if (Obj.hasRelocationAddend(RelI->getRawDataRefImpl()))
-    Addend = *Obj.getRelocationAddend(RelI->getRawDataRefImpl());
-  symbol_iterator Symbol = RelI->getSymbol();
+  uint64_t RelType = RelI->getType();
+  ErrorOr<int64_t> AddendOrErr = ELFRelocationRef(*RelI).getAddend();
+  int64_t Addend = AddendOrErr ? *AddendOrErr : 0;
+  elf_symbol_iterator Symbol = RelI->getSymbol();
 
   // Obtain the symbol name which is referenced in the relocation
   StringRef TargetName;
-  if (Symbol != Obj.symbol_end())
-    Symbol->getName(TargetName);
+  if (Symbol != Obj.symbol_end()) {
+    ErrorOr<StringRef> TargetNameOrErr = Symbol->getName();
+    if (std::error_code EC = TargetNameOrErr.getError())
+      report_fatal_error(EC.message());
+    TargetName = *TargetNameOrErr;
+  }
   DEBUG(dbgs() << "\t\tRelType: " << RelType << " Addend: " << Addend
                << " TargetName: " << TargetName << "\n");
   RelocationValueRef Value;
@@ -1082,7 +1080,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
   RTDyldSymbolTable::const_iterator gsi = GlobalSymbolTable.end();
   if (Symbol != Obj.symbol_end()) {
     gsi = GlobalSymbolTable.find(TargetName.data());
-    Symbol->getType(SymType);
+    SymType = Symbol->getType();
   }
   if (gsi != GlobalSymbolTable.end()) {
     const auto &SymInfo = gsi->second;
@@ -1124,8 +1122,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
     }
   }
 
-  uint64_t Offset;
-  Check(RelI->getOffset(Offset));
+  uint64_t Offset = RelI->getOffset();
 
   DEBUG(dbgs() << "\t\tSectionID: " << SectionID << " Offset: " << Offset
                << "\n");
@@ -1312,8 +1309,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
         } else {
           // In the ELFv2 ABI, a function symbol may provide a local entry
           // point, which must be used for direct calls.
-          uint8_t SymOther;
-          Symbol->getOther(SymOther);
+          uint8_t SymOther = Symbol->getOther();
           Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther);
         }
         uint8_t *RelocTarget = Sections[Value.SectionID].Address + Value.Addend;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index f7a4fcc..74b13d6 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -39,7 +39,7 @@ public:
   }
 };
 
-} // namespace
+}
 
 namespace llvm {
 
@@ -63,8 +63,10 @@ RelocationValueRef RuntimeDyldMachO::getRelocationValueRef(
   bool IsExternal = Obj.getPlainRelocationExternal(RelInfo);
   if (IsExternal) {
     symbol_iterator Symbol = RI->getSymbol();
-    StringRef TargetName;
-    Symbol->getName(TargetName);
+    ErrorOr<StringRef> TargetNameOrErr = Symbol->getName();
+    if (std::error_code EC = TargetNameOrErr.getError())
+      report_fatal_error(EC.message());
+    StringRef TargetName = *TargetNameOrErr;
     RTDyldSymbolTable::const_iterator SI =
       GlobalSymbolTable.find(TargetName.data());
     if (SI != GlobalSymbolTable.end()) {
@@ -97,9 +99,8 @@ void RuntimeDyldMachO::makeValueAddendPCRel(RelocationValueRef &Value,
 
   bool IsPCRel = Obj.getAnyRelocationPCRel(RelInfo);
   if (IsPCRel) {
-    uint64_t RelocAddr = 0;
-    RI->getAddress(RelocAddr);
-    Value.Offset += RelocAddr + OffsetToNextPC;
+    ErrorOr<uint64_t> RelocAddr = RI->getAddress();
+    Value.Offset += *RelocAddr + OffsetToNextPC;
   }
 }
 
@@ -163,8 +164,10 @@ void RuntimeDyldMachO::populateIndirectSymbolPointersSection(
     unsigned SymbolIndex =
       Obj.getIndirectSymbolTableEntry(DySymTabCmd, FirstIndirectSymbol + i);
     symbol_iterator SI = Obj.getSymbolByIndex(SymbolIndex);
-    StringRef IndirectSymbolName;
-    SI->getName(IndirectSymbolName);
+    ErrorOr<StringRef> IndirectSymbolNameOrErr = SI->getName();
+    if (std::error_code EC = IndirectSymbolNameOrErr.getError())
+      report_fatal_error(EC.message());
+    StringRef IndirectSymbolName = *IndirectSymbolNameOrErr;
     DEBUG(dbgs() << "  " << IndirectSymbolName << ": index " << SymbolIndex
           << ", PT offset: " << PTEntryOffset << "\n");
     RelocationEntry RE(PTSectionID, PTEntryOffset,
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 45a94ba..36ba8d1 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -72,8 +72,7 @@ protected:
 
     bool IsPCRel = Obj.getAnyRelocationPCRel(RelInfo);
     unsigned Size = Obj.getAnyRelocationLength(RelInfo);
-    uint64_t Offset;
-    RI->getOffset(Offset);
+    uint64_t Offset = RI->getOffset();
     MachO::RelocationInfoType RelType =
       static_cast<MachO::RelocationInfoType>(Obj.getAnyRelocationType(RelInfo));
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index 478665e..408227e 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -125,10 +125,8 @@ public:
     const bool IsExtern = SecI == Obj.section_end();
 
     // Determine the Addend used to adjust the relocation value.
-    uint64_t RelType;
-    Check(RelI->getType(RelType));
-    uint64_t Offset;
-    Check(RelI->getOffset(Offset));
+    uint64_t RelType = RelI->getType();
+    uint64_t Offset = RelI->getOffset();
     uint64_t Addend = 0;
     SectionEntry &Section = Sections[SectionID];
     uintptr_t ObjTarget = Section.ObjAddress + Offset;
@@ -157,8 +155,10 @@ public:
       break;
     }
 
-    StringRef TargetName;
-    Symbol->getName(TargetName);
+    ErrorOr<StringRef> TargetNameOrErr = Symbol->getName();
+    if (std::error_code EC = TargetNameOrErr.getError())
+      report_fatal_error(EC.message());
+    StringRef TargetName = *TargetNameOrErr;
 
     DEBUG(dbgs() << "\t\tIn Section " << SectionID << " Offset " << Offset
                  << " RelType: " << RelType << " TargetName: " << TargetName
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
index 5149d01..99fd6e3 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
@@ -400,7 +400,7 @@ private:
     addRelocationForSection(TargetRE, RE.SectionID);
   }
 };
-} // namespace llvm
+}
 
 #undef DEBUG_TYPE
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index 8600763..0d9445e 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -220,8 +220,7 @@ private:
     SectionEntry &Section = Sections[SectionID];
     uint32_t RelocType = MachO.getAnyRelocationType(RE);
     bool IsPCRel = MachO.getAnyRelocationPCRel(RE);
-    uint64_t Offset;
-    RelI->getOffset(Offset);
+    uint64_t Offset = RelI->getOffset();
     uint8_t *LocalAddress = Section.Address + Offset;
     int64_t Immediate = readBytesUnaligned(LocalAddress, 4); // Copy the whole instruction out.
     Immediate = ((Immediate >> 4) & 0xf000) | (Immediate & 0xfff);
@@ -272,7 +271,7 @@ private:
   }
 
 };
-} // namespace llvm
+}
 
 #undef DEBUG_TYPE
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index f36f940..aceb304 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -138,8 +138,7 @@ private:
     uint32_t RelocType = Obj.getAnyRelocationType(RE);
     bool IsPCRel = Obj.getAnyRelocationPCRel(RE);
     unsigned Size = Obj.getAnyRelocationLength(RE);
-    uint64_t Offset;
-    RelI->getOffset(Offset);
+    uint64_t Offset = RelI->getOffset();
     uint8_t *LocalAddress = Section.Address + Offset;
     unsigned NumBytes = 1 << Size;
     uint64_t Addend = readBytesUnaligned(LocalAddress, NumBytes);
@@ -197,8 +196,7 @@ private:
     uint32_t RelocType = Obj.getAnyRelocationType(RE);
     bool IsPCRel = Obj.getAnyRelocationPCRel(RE);
     unsigned Size = Obj.getAnyRelocationLength(RE);
-    uint64_t Offset;
-    RelI->getOffset(Offset);
+    uint64_t Offset = RelI->getOffset();
     uint8_t *LocalAddress = Section.Address + Offset;
     unsigned NumBytes = 1 << Size;
     int64_t Addend = readBytesUnaligned(LocalAddress, NumBytes);
@@ -242,19 +240,20 @@ private:
       unsigned SymbolIndex =
           Obj.getIndirectSymbolTableEntry(DySymTabCmd, FirstIndirectSymbol + i);
       symbol_iterator SI = Obj.getSymbolByIndex(SymbolIndex);
-      StringRef IndirectSymbolName;
-      SI->getName(IndirectSymbolName);
+      ErrorOr<StringRef> IndirectSymbolName = SI->getName();
+      if (std::error_code EC = IndirectSymbolName.getError())
+        report_fatal_error(EC.message());
       uint8_t *JTEntryAddr = JTSectionAddr + JTEntryOffset;
       createStubFunction(JTEntryAddr);
       RelocationEntry RE(JTSectionID, JTEntryOffset + 1,
                          MachO::GENERIC_RELOC_VANILLA, 0, true, 2);
-      addRelocationForSymbol(RE, IndirectSymbolName);
+      addRelocationForSymbol(RE, *IndirectSymbolName);
       JTEntryOffset += JTEntrySize;
     }
   }
 
 };
-} // namespace llvm
+}
 
 #undef DEBUG_TYPE
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index 419b27a..4b3b01b 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -131,7 +131,7 @@ private:
     resolveRelocation(TargetRE, (uint64_t)Addr);
   }
 };
-} // namespace llvm
+}
 
 #undef DEBUG_TYPE
 
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index bc35cb3..adc620d 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -30,6 +30,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Statepoint.h"
 #include "llvm/IR/TypeFinder.h"
@@ -67,7 +68,7 @@ struct OrderMap {
     IDs[V].first = ID;
   }
 };
-} // namespace
+}
 
 static void orderValue(const Value *V, OrderMap &OM) {
   if (OM.lookup(V).first)
@@ -544,7 +545,7 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
     OS << '>';
 }
 
-namespace {
+namespace llvm {
 //===----------------------------------------------------------------------===//
 // SlotTracker Class: Enumerate slot numbers for unnamed values
 //===----------------------------------------------------------------------===//
@@ -663,7 +664,32 @@ private:
   SlotTracker(const SlotTracker &) = delete;
   void operator=(const SlotTracker &) = delete;
 };
-} // namespace
+} // namespace llvm
+
+ModuleSlotTracker::ModuleSlotTracker(SlotTracker &Machine, const Module *M,
+                                     const Function *F)
+    : M(M), F(F), Machine(&Machine) {}
+
+ModuleSlotTracker::ModuleSlotTracker(const Module *M,
+                                     bool ShouldInitializeAllMetadata)
+    : MachineStorage(M ? new SlotTracker(M, ShouldInitializeAllMetadata)
+                       : nullptr),
+      M(M), Machine(MachineStorage.get()) {}
+
+ModuleSlotTracker::~ModuleSlotTracker() {}
+
+void ModuleSlotTracker::incorporateFunction(const Function &F) {
+  if (!Machine)
+    return;
+
+  // Nothing to do if this is the right function already.
+  if (this->F == &F)
+    return;
+  if (this->F)
+    Machine->purgeFunction();
+  Machine->incorporateFunction(&F);
+  this->F = &F;
+}
 
 static SlotTracker *createSlotTracker(const Module *M) {
   return new SlotTracker(M);
@@ -1697,6 +1723,20 @@ static void writeDINamespace(raw_ostream &Out, const DINamespace *N,
   Out << ")";
 }
 
+static void writeDIModule(raw_ostream &Out, const DIModule *N,
+                          TypePrinting *TypePrinter, SlotTracker *Machine,
+                          const Module *Context) {
+  Out << "!DIModule(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printString("name", N->getName());
+  Printer.printString("configMacros", N->getConfigurationMacros());
+  Printer.printString("includePath", N->getIncludePath());
+  Printer.printString("isysroot", N->getISysRoot());
+  Out << ")";
+}
+
+
 static void writeDITemplateTypeParameter(raw_ostream &Out,
                                          const DITemplateTypeParameter *N,
                                          TypePrinting *TypePrinter,
@@ -1915,8 +1955,11 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
                                    SlotTracker *Machine, const Module *Context,
                                    bool FromValue) {
   if (const MDNode *N = dyn_cast<MDNode>(MD)) {
-    if (!Machine)
-      Machine = new SlotTracker(Context);
+    std::unique_ptr<SlotTracker> MachineStorage;
+    if (!Machine) {
+      MachineStorage = make_unique<SlotTracker>(Context);
+      Machine = MachineStorage.get();
+    }
     int Slot = Machine->getMetadataSlot(N);
     if (Slot == -1)
       // Give the pointer value instead of "badref", since this comes up all
@@ -1948,7 +1991,7 @@ namespace {
 class AssemblyWriter {
   formatted_raw_ostream &Out;
   const Module *TheModule;
-  std::unique_ptr<SlotTracker> ModuleSlotTracker;
+  std::unique_ptr<SlotTracker> SlotTrackerStorage;
   SlotTracker &Machine;
   TypePrinting TypePrinter;
   AssemblyAnnotationWriter *AnnotationWriter;
@@ -2038,8 +2081,8 @@ AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
 AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, const Module *M,
                                AssemblyAnnotationWriter *AAW,
                                bool ShouldPreserveUseListOrder)
-    : Out(o), TheModule(M), ModuleSlotTracker(createSlotTracker(M)),
-      Machine(*ModuleSlotTracker), AnnotationWriter(AAW),
+    : Out(o), TheModule(M), SlotTrackerStorage(createSlotTracker(M)),
+      Machine(*SlotTrackerStorage), AnnotationWriter(AAW),
       ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
   init();
 }
@@ -3164,21 +3207,35 @@ static bool isReferencingMDNode(const Instruction &I) {
 }
 
 void Value::print(raw_ostream &ROS) const {
+  bool ShouldInitializeAllMetadata = false;
+  if (auto *I = dyn_cast<Instruction>(this))
+    ShouldInitializeAllMetadata = isReferencingMDNode(*I);
+  else if (isa<Function>(this) || isa<MetadataAsValue>(this))
+    ShouldInitializeAllMetadata = true;
+
+  ModuleSlotTracker MST(getModuleFromVal(this), ShouldInitializeAllMetadata);
+  print(ROS, MST);
+}
+
+void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST) const {
   formatted_raw_ostream OS(ROS);
+  SlotTracker EmptySlotTable(static_cast<const Module *>(nullptr));
+  SlotTracker &SlotTable =
+      MST.getMachine() ? *MST.getMachine() : EmptySlotTable;
+  auto incorporateFunction = [&](const Function *F) {
+    if (F)
+      MST.incorporateFunction(*F);
+  };
+
   if (const Instruction *I = dyn_cast<Instruction>(this)) {
-    const Function *F = I->getParent() ? I->getParent()->getParent() : nullptr;
-    SlotTracker SlotTable(
-        F,
-        /* ShouldInitializeAllMetadata */ isReferencingMDNode(*I));
+    incorporateFunction(I->getParent() ? I->getParent()->getParent() : nullptr);
     AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), nullptr);
     W.printInstruction(*I);
   } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) {
-    SlotTracker SlotTable(BB->getParent());
+    incorporateFunction(BB->getParent());
     AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), nullptr);
     W.printBasicBlock(BB);
   } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
-    SlotTracker SlotTable(GV->getParent(),
-                          /* ShouldInitializeAllMetadata */ isa<Function>(GV));
     AssemblyWriter W(OS, SlotTable, GV->getParent(), nullptr);
     if (const GlobalVariable *V = dyn_cast<GlobalVariable>(GV))
       W.printGlobal(V);
@@ -3187,69 +3244,108 @@ void Value::print(raw_ostream &ROS) const {
     else
       W.printAlias(cast<GlobalAlias>(GV));
   } else if (const MetadataAsValue *V = dyn_cast<MetadataAsValue>(this)) {
-    V->getMetadata()->print(ROS, getModuleFromVal(V));
+    V->getMetadata()->print(ROS, MST, getModuleFromVal(V));
   } else if (const Constant *C = dyn_cast<Constant>(this)) {
     TypePrinting TypePrinter;
     TypePrinter.print(C->getType(), OS);
     OS << ' ';
-    WriteConstantInternal(OS, C, TypePrinter, nullptr, nullptr);
+    WriteConstantInternal(OS, C, TypePrinter, MST.getMachine(), nullptr);
   } else if (isa<InlineAsm>(this) || isa<Argument>(this)) {
-    this->printAsOperand(OS);
+    this->printAsOperand(OS, /* PrintType */ true, MST);
   } else {
     llvm_unreachable("Unknown value to print out!");
   }
 }
 
-void Value::printAsOperand(raw_ostream &O, bool PrintType, const Module *M) const {
-  // Fast path: Don't construct and populate a TypePrinting object if we
-  // won't be needing any types printed.
-  bool IsMetadata = isa<MetadataAsValue>(this);
-  if (!PrintType && ((!isa<Constant>(this) && !IsMetadata) || hasName() ||
-                     isa<GlobalValue>(this))) {
-    WriteAsOperandInternal(O, this, nullptr, nullptr, M);
-    return;
+/// Print without a type, skipping the TypePrinting object.
+///
+/// \return \c true iff printing was succesful.
+static bool printWithoutType(const Value &V, raw_ostream &O,
+                             SlotTracker *Machine, const Module *M) {
+  if (V.hasName() || isa<GlobalValue>(V) ||
+      (!isa<Constant>(V) && !isa<MetadataAsValue>(V))) {
+    WriteAsOperandInternal(O, &V, nullptr, Machine, M);
+    return true;
   }
+  return false;
+}
 
-  if (!M)
-    M = getModuleFromVal(this);
-
+static void printAsOperandImpl(const Value &V, raw_ostream &O, bool PrintType,
+                               ModuleSlotTracker &MST) {
   TypePrinting TypePrinter;
-  if (M)
+  if (const Module *M = MST.getModule())
     TypePrinter.incorporateTypes(*M);
   if (PrintType) {
-    TypePrinter.print(getType(), O);
+    TypePrinter.print(V.getType(), O);
     O << ' ';
   }
 
-  SlotTracker Machine(M, /* ShouldInitializeAllMetadata */ IsMetadata);
-  WriteAsOperandInternal(O, this, &TypePrinter, &Machine, M);
+  WriteAsOperandInternal(O, &V, &TypePrinter, MST.getMachine(),
+                         MST.getModule());
+}
+
+void Value::printAsOperand(raw_ostream &O, bool PrintType,
+                           const Module *M) const {
+  if (!M)
+    M = getModuleFromVal(this);
+
+  if (!PrintType)
+    if (printWithoutType(*this, O, nullptr, M))
+      return;
+
+  SlotTracker Machine(
+      M, /* ShouldInitializeAllMetadata */ isa<MetadataAsValue>(this));
+  ModuleSlotTracker MST(Machine, M);
+  printAsOperandImpl(*this, O, PrintType, MST);
+}
+
+void Value::printAsOperand(raw_ostream &O, bool PrintType,
+                           ModuleSlotTracker &MST) const {
+  if (!PrintType)
+    if (printWithoutType(*this, O, MST.getMachine(), MST.getModule()))
+      return;
+
+  printAsOperandImpl(*this, O, PrintType, MST);
 }
 
 static void printMetadataImpl(raw_ostream &ROS, const Metadata &MD,
-                              const Module *M, bool OnlyAsOperand) {
+                              ModuleSlotTracker &MST, const Module *M,
+                              bool OnlyAsOperand) {
   formatted_raw_ostream OS(ROS);
 
-  auto *N = dyn_cast<MDNode>(&MD);
   TypePrinting TypePrinter;
-  SlotTracker Machine(M, /* ShouldInitializeAllMetadata */ N);
   if (M)
     TypePrinter.incorporateTypes(*M);
 
-  WriteAsOperandInternal(OS, &MD, &TypePrinter, &Machine, M,
+  WriteAsOperandInternal(OS, &MD, &TypePrinter, MST.getMachine(), M,
                          /* FromValue */ true);
+
+  auto *N = dyn_cast<MDNode>(&MD);
   if (OnlyAsOperand || !N)
     return;
 
   OS << " = ";
-  WriteMDNodeBodyInternal(OS, N, &TypePrinter, &Machine, M);
+  WriteMDNodeBodyInternal(OS, N, &TypePrinter, MST.getMachine(), M);
 }
 
 void Metadata::printAsOperand(raw_ostream &OS, const Module *M) const {
-  printMetadataImpl(OS, *this, M, /* OnlyAsOperand */ true);
+  ModuleSlotTracker MST(M, isa<MDNode>(this));
+  printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ true);
+}
+
+void Metadata::printAsOperand(raw_ostream &OS, ModuleSlotTracker &MST,
+                              const Module *M) const {
+  printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ true);
 }
 
 void Metadata::print(raw_ostream &OS, const Module *M) const {
-  printMetadataImpl(OS, *this, M, /* OnlyAsOperand */ false);
+  ModuleSlotTracker MST(M, isa<MDNode>(this));
+  printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false);
+}
+
+void Metadata::print(raw_ostream &OS, ModuleSlotTracker &MST,
+                     const Module *M) const {
+  printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false);
 }
 
 // Value::dump - allow easy printing of Values from the debugger.
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index 8159dce..6f338ae 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -278,6 +278,6 @@ static_assert(
         AlignOf<AttributeSetImpl::IndexAttrPair>::Alignment,
     "Alignment is insufficient for objects appended to AttributeSetImpl");
 
-} // namespace llvm
+} // end llvm namespace
 
 #endif
diff --git a/lib/IR/ConstantFold.h b/lib/IR/ConstantFold.h
index 715c429..42a9c6b 100644
--- a/lib/IR/ConstantFold.h
+++ b/lib/IR/ConstantFold.h
@@ -55,6 +55,6 @@ namespace llvm {
                                       ArrayRef<Constant *> Idxs);
   Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool inBounds,
                                       ArrayRef<Value *> Idxs);
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 76c55b6..308e6bd 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -276,8 +276,19 @@ Constant *Constant::getAggregateElement(Constant *Elt) const {
   return nullptr;
 }
 
+void Constant::destroyConstant() {
+  /// First call destroyConstantImpl on the subclass.  This gives the subclass
+  /// a chance to remove the constant from any maps/pools it's contained in.
+  switch (getValueID()) {
+  default:
+    llvm_unreachable("Not a constant!");
+#define HANDLE_CONSTANT(Name)                                                  \
+  case Value::Name##Val:                                                       \
+    cast<Name>(this)->destroyConstantImpl();                                   \
+    break;
+#include "llvm/IR/Value.def"
+  }
 
-void Constant::destroyConstantImpl() {
   // When a Constant is destroyed, there may be lingering
   // references to the constant by other constants in the constant pool.  These
   // constants are implicitly dependent on the module that is being deleted,
@@ -287,11 +298,11 @@ void Constant::destroyConstantImpl() {
   //
   while (!use_empty()) {
     Value *V = user_back();
-#ifndef NDEBUG      // Only in -g mode...
+#ifndef NDEBUG // Only in -g mode...
     if (!isa<Constant>(V)) {
       dbgs() << "While deleting: " << *this
-             << "\n\nUse still stuck around after Def is destroyed: "
-             << *V << "\n\n";
+             << "\n\nUse still stuck around after Def is destroyed: " << *V
+             << "\n\n";
     }
 #endif
     assert(isa<Constant>(V) && "References remain to Constant being destroyed");
@@ -608,6 +619,11 @@ ConstantInt *ConstantInt::get(IntegerType* Ty, StringRef Str,
   return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix));
 }
 
+/// Remove the constant from the constant table.
+void ConstantInt::destroyConstantImpl() {
+  llvm_unreachable("You can't ConstantInt->destroyConstantImpl()!");
+}
+
 //===----------------------------------------------------------------------===//
 //                                ConstantFP
 //===----------------------------------------------------------------------===//
@@ -743,6 +759,11 @@ bool ConstantFP::isExactlyValue(const APFloat &V) const {
   return Val.bitwiseIsEqual(V);
 }
 
+/// Remove the constant from the constant table.
+void ConstantFP::destroyConstantImpl() {
+  llvm_unreachable("You can't ConstantInt->destroyConstantImpl()!");
+}
+
 //===----------------------------------------------------------------------===//
 //                   ConstantAggregateZero Implementation
 //===----------------------------------------------------------------------===//
@@ -1366,16 +1387,14 @@ ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) {
 
 /// destroyConstant - Remove the constant from the constant table.
 ///
-void ConstantAggregateZero::destroyConstant() {
+void ConstantAggregateZero::destroyConstantImpl() {
   getContext().pImpl->CAZConstants.erase(getType());
-  destroyConstantImpl();
 }
 
 /// destroyConstant - Remove the constant from the constant table...
 ///
-void ConstantArray::destroyConstant() {
+void ConstantArray::destroyConstantImpl() {
   getType()->getContext().pImpl->ArrayConstants.remove(this);
-  destroyConstantImpl();
 }
 
 
@@ -1384,16 +1403,14 @@ void ConstantArray::destroyConstant() {
 
 // destroyConstant - Remove the constant from the constant table...
 //
-void ConstantStruct::destroyConstant() {
+void ConstantStruct::destroyConstantImpl() {
   getType()->getContext().pImpl->StructConstants.remove(this);
-  destroyConstantImpl();
 }
 
 // destroyConstant - Remove the constant from the constant table...
 //
-void ConstantVector::destroyConstant() {
+void ConstantVector::destroyConstantImpl() {
   getType()->getContext().pImpl->VectorConstants.remove(this);
-  destroyConstantImpl();
 }
 
 /// getSplatValue - If this is a splat vector constant, meaning that all of
@@ -1432,7 +1449,6 @@ const APInt &Constant::getUniqueInteger() const {
   return cast<ConstantInt>(C)->getValue();
 }
 
-
 //---- ConstantPointerNull::get() implementation.
 //
 
@@ -1446,10 +1462,8 @@ ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) {
 
 // destroyConstant - Remove the constant from the constant table...
 //
-void ConstantPointerNull::destroyConstant() {
+void ConstantPointerNull::destroyConstantImpl() {
   getContext().pImpl->CPNConstants.erase(getType());
-  // Free the constant and any dangling references to it.
-  destroyConstantImpl();
 }
 
 
@@ -1466,10 +1480,9 @@ UndefValue *UndefValue::get(Type *Ty) {
 
 // destroyConstant - Remove the constant from the constant table.
 //
-void UndefValue::destroyConstant() {
+void UndefValue::destroyConstantImpl() {
   // Free the constant and any dangling references to it.
   getContext().pImpl->UVConstants.erase(getType());
-  destroyConstantImpl();
 }
 
 //---- BlockAddress::get() implementation.
@@ -1512,14 +1525,13 @@ BlockAddress *BlockAddress::lookup(const BasicBlock *BB) {
 
 // destroyConstant - Remove the constant from the constant table.
 //
-void BlockAddress::destroyConstant() {
+void BlockAddress::destroyConstantImpl() {
   getFunction()->getType()->getContext().pImpl
     ->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock()));
   getBasicBlock()->AdjustBlockAddressRefCount(-1);
-  destroyConstantImpl();
 }
 
-void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
+Value *BlockAddress::handleOperandChangeImpl(Value *From, Value *To, Use *U) {
   // This could be replacing either the Basic Block or the Function.  In either
   // case, we have to remove the map entry.
   Function *NewF = getFunction();
@@ -1534,10 +1546,8 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
   // and return early.
   BlockAddress *&NewBA =
     getContext().pImpl->BlockAddresses[std::make_pair(NewF, NewBB)];
-  if (NewBA) {
-    replaceUsesOfWithOnConstantImpl(NewBA);
-    return;
-  }
+  if (NewBA)
+    return NewBA;
 
   getBasicBlock()->AdjustBlockAddressRefCount(-1);
 
@@ -1549,6 +1559,10 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
   setOperand(0, NewF);
   setOperand(1, NewBB);
   getBasicBlock()->AdjustBlockAddressRefCount(1);
+
+  // If we just want to keep the existing value, then return null.
+  // Callers know that this means we shouldn't delete this value.
+  return nullptr;
 }
 
 //---- ConstantExpr::get() implementations.
@@ -2372,9 +2386,8 @@ Constant *ConstantExpr::getBinOpAbsorber(unsigned Opcode, Type *Ty) {
 
 // destroyConstant - Remove the constant from the constant table...
 //
-void ConstantExpr::destroyConstant() {
+void ConstantExpr::destroyConstantImpl() {
   getType()->getContext().pImpl->ExprConstants.remove(this);
-  destroyConstantImpl();
 }
 
 const char *ConstantExpr::getOpcodeName() const {
@@ -2496,7 +2509,7 @@ Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) {
   return *Entry = new ConstantDataVector(Ty, Slot.first().data());
 }
 
-void ConstantDataSequential::destroyConstant() {
+void ConstantDataSequential::destroyConstantImpl() {
   // Remove the constant from the StringMap.
   StringMap<ConstantDataSequential*> &CDSConstants = 
     getType()->getContext().pImpl->CDSConstants;
@@ -2531,9 +2544,6 @@ void ConstantDataSequential::destroyConstant() {
   // If we were part of a list, make sure that we don't delete the list that is
   // still owned by the uniquing map.
   Next = nullptr;
-
-  // Finally, actually delete it.
-  destroyConstantImpl();
 }
 
 /// get() constructors - Return a constant with array type with an element
@@ -2814,20 +2824,36 @@ Constant *ConstantDataVector::getSplatValue() const {
 }
 
 //===----------------------------------------------------------------------===//
-//                replaceUsesOfWithOnConstant implementations
+//                handleOperandChange implementations
 
-/// replaceUsesOfWithOnConstant - Update this constant array to change uses of
+/// Update this constant array to change uses of
 /// 'From' to be uses of 'To'.  This must update the uniquing data structures
 /// etc.
 ///
 /// Note that we intentionally replace all uses of From with To here.  Consider
 /// a large array that uses 'From' 1000 times.  By handling this case all here,
-/// ConstantArray::replaceUsesOfWithOnConstant is only invoked once, and that
+/// ConstantArray::handleOperandChange is only invoked once, and that
 /// single invocation handles all 1000 uses.  Handling them one at a time would
 /// work, but would be really slow because it would have to unique each updated
 /// array instance.
 ///
-void Constant::replaceUsesOfWithOnConstantImpl(Constant *Replacement) {
+void Constant::handleOperandChange(Value *From, Value *To, Use *U) {
+  Value *Replacement = nullptr;
+  switch (getValueID()) {
+  default:
+    llvm_unreachable("Not a constant!");
+#define HANDLE_CONSTANT(Name)                                                  \
+  case Value::Name##Val:                                                       \
+    Replacement = cast<Name>(this)->handleOperandChangeImpl(From, To, U);      \
+    break;
+#include "llvm/IR/Value.def"
+  }
+
+  // If handleOperandChangeImpl returned nullptr, then it handled
+  // replacing itself and we don't want to delete or replace anything else here.
+  if (!Replacement)
+    return;
+
   // I do need to replace this with an existing value.
   assert(Replacement != this && "I didn't contain From!");
 
@@ -2838,8 +2864,34 @@ void Constant::replaceUsesOfWithOnConstantImpl(Constant *Replacement) {
   destroyConstant();
 }
 
-void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
-                                                Use *U) {
+Value *ConstantInt::handleOperandChangeImpl(Value *From, Value *To, Use *U) {
+  llvm_unreachable("Unsupported class for handleOperandChange()!");
+}
+
+Value *ConstantFP::handleOperandChangeImpl(Value *From, Value *To, Use *U) {
+  llvm_unreachable("Unsupported class for handleOperandChange()!");
+}
+
+Value *UndefValue::handleOperandChangeImpl(Value *From, Value *To, Use *U) {
+  llvm_unreachable("Unsupported class for handleOperandChange()!");
+}
+
+Value *ConstantPointerNull::handleOperandChangeImpl(Value *From, Value *To,
+                                                    Use *U) {
+  llvm_unreachable("Unsupported class for handleOperandChange()!");
+}
+
+Value *ConstantAggregateZero::handleOperandChangeImpl(Value *From, Value *To,
+                                                      Use *U) {
+  llvm_unreachable("Unsupported class for handleOperandChange()!");
+}
+
+Value *ConstantDataSequential::handleOperandChangeImpl(Value *From, Value *To,
+                                                       Use *U) {
+  llvm_unreachable("Unsupported class for handleOperandChange()!");
+}
+
+Value *ConstantArray::handleOperandChangeImpl(Value *From, Value *To, Use *U) {
   assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
   Constant *ToC = cast<Constant>(To);
 
@@ -2863,29 +2915,22 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
     AllSame &= Val == ToC;
   }
 
-  if (AllSame && ToC->isNullValue()) {
-    replaceUsesOfWithOnConstantImpl(ConstantAggregateZero::get(getType()));
-    return;
-  }
-  if (AllSame && isa<UndefValue>(ToC)) {
-    replaceUsesOfWithOnConstantImpl(UndefValue::get(getType()));
-    return;
-  }
+  if (AllSame && ToC->isNullValue())
+    return ConstantAggregateZero::get(getType());
+
+  if (AllSame && isa<UndefValue>(ToC))
+    return UndefValue::get(getType());
 
   // Check for any other type of constant-folding.
-  if (Constant *C = getImpl(getType(), Values)) {
-    replaceUsesOfWithOnConstantImpl(C);
-    return;
-  }
+  if (Constant *C = getImpl(getType(), Values))
+    return C;
 
   // Update to the new value.
-  if (Constant *C = getContext().pImpl->ArrayConstants.replaceOperandsInPlace(
-          Values, this, From, ToC, NumUpdated, U - OperandList))
-    replaceUsesOfWithOnConstantImpl(C);
+  return getContext().pImpl->ArrayConstants.replaceOperandsInPlace(
+      Values, this, From, ToC, NumUpdated, U - OperandList);
 }
 
-void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
-                                                 Use *U) {
+Value *ConstantStruct::handleOperandChangeImpl(Value *From, Value *To, Use *U) {
   assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
   Constant *ToC = cast<Constant>(To);
 
@@ -2920,23 +2965,18 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   }
   Values[OperandToUpdate] = ToC;
 
-  if (isAllZeros) {
-    replaceUsesOfWithOnConstantImpl(ConstantAggregateZero::get(getType()));
-    return;
-  }
-  if (isAllUndef) {
-    replaceUsesOfWithOnConstantImpl(UndefValue::get(getType()));
-    return;
-  }
+  if (isAllZeros)
+    return ConstantAggregateZero::get(getType());
+
+  if (isAllUndef)
+    return UndefValue::get(getType());
 
   // Update to the new value.
-  if (Constant *C = getContext().pImpl->StructConstants.replaceOperandsInPlace(
-          Values, this, From, ToC))
-    replaceUsesOfWithOnConstantImpl(C);
+  return getContext().pImpl->StructConstants.replaceOperandsInPlace(
+      Values, this, From, ToC);
 }
 
-void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
-                                                 Use *U) {
+Value *ConstantVector::handleOperandChangeImpl(Value *From, Value *To, Use *U) {
   assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
   Constant *ToC = cast<Constant>(To);
 
@@ -2952,20 +2992,16 @@ void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
     Values.push_back(Val);
   }
 
-  if (Constant *C = getImpl(Values)) {
-    replaceUsesOfWithOnConstantImpl(C);
-    return;
-  }
+  if (Constant *C = getImpl(Values))
+    return C;
 
   // Update to the new value.
   Use *OperandList = getOperandList();
-  if (Constant *C = getContext().pImpl->VectorConstants.replaceOperandsInPlace(
-          Values, this, From, ToC, NumUpdated, U - OperandList))
-    replaceUsesOfWithOnConstantImpl(C);
+  return getContext().pImpl->VectorConstants.replaceOperandsInPlace(
+      Values, this, From, ToC, NumUpdated, U - OperandList);
 }
 
-void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
-                                               Use *U) {
+Value *ConstantExpr::handleOperandChangeImpl(Value *From, Value *ToV, Use *U) {
   assert(isa<Constant>(ToV) && "Cannot make Constant refer to non-constant!");
   Constant *To = cast<Constant>(ToV);
 
@@ -2981,16 +3017,13 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
   }
   assert(NumUpdated && "I didn't contain From!");
 
-  if (Constant *C = getWithOperands(NewOps, getType(), true)) {
-    replaceUsesOfWithOnConstantImpl(C);
-    return;
-  }
+  if (Constant *C = getWithOperands(NewOps, getType(), true))
+    return C;
 
   // Update to the new value.
   Use *OperandList = getOperandList();
-  if (Constant *C = getContext().pImpl->ExprConstants.replaceOperandsInPlace(
-          NewOps, this, From, To, NumUpdated, U - OperandList))
-    replaceUsesOfWithOnConstantImpl(C);
+  return getContext().pImpl->ExprConstants.replaceOperandsInPlace(
+      NewOps, this, From, To, NumUpdated, U - OperandList);
 }
 
 Instruction *ConstantExpr::getAsInstruction() {
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index c41d844..6a3ff0e8 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -55,11 +55,10 @@ public:
     return HeaderBuilder().concat("0x" + Twine::utohexstr(Tag));
   }
 };
-} // namespace
+}
 
 DIBuilder::DIBuilder(Module &m, bool AllowUnresolvedNodes)
-    : M(m), VMContext(M.getContext()), TempEnumTypes(nullptr),
-      TempRetainTypes(nullptr), TempSubprograms(nullptr), TempGVs(nullptr),
+  : M(m), VMContext(M.getContext()), CUNode(nullptr),
       DeclareFn(nullptr), ValueFn(nullptr),
       AllowUnresolvedNodes(AllowUnresolvedNodes) {}
 
@@ -74,35 +73,37 @@ void DIBuilder::trackIfUnresolved(MDNode *N) {
 }
 
 void DIBuilder::finalize() {
-  TempEnumTypes->replaceAllUsesWith(MDTuple::get(VMContext, AllEnumTypes));
-
-  SmallVector<Metadata *, 16> RetainValues;
-  // Declarations and definitions of the same type may be retained. Some
-  // clients RAUW these pairs, leaving duplicates in the retained types
-  // list. Use a set to remove the duplicates while we transform the
-  // TrackingVHs back into Values.
-  SmallPtrSet<Metadata *, 16> RetainSet;
-  for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++)
-    if (RetainSet.insert(AllRetainTypes[I]).second)
-      RetainValues.push_back(AllRetainTypes[I]);
-  TempRetainTypes->replaceAllUsesWith(MDTuple::get(VMContext, RetainValues));
-
-  DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms);
-  TempSubprograms->replaceAllUsesWith(SPs.get());
-  for (auto *SP : SPs) {
-    if (MDTuple *Temp = SP->getVariables().get()) {
-      const auto &PV = PreservedVariables.lookup(SP);
-      SmallVector<Metadata *, 4> Variables(PV.begin(), PV.end());
-      DINodeArray AV = getOrCreateArray(Variables);
-      TempMDTuple(Temp)->replaceAllUsesWith(AV.get());
+  if (CUNode) {
+    CUNode->replaceEnumTypes(MDTuple::get(VMContext, AllEnumTypes));
+
+    SmallVector<Metadata *, 16> RetainValues;
+    // Declarations and definitions of the same type may be retained. Some
+    // clients RAUW these pairs, leaving duplicates in the retained types
+    // list. Use a set to remove the duplicates while we transform the
+    // TrackingVHs back into Values.
+    SmallPtrSet<Metadata *, 16> RetainSet;
+    for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++)
+      if (RetainSet.insert(AllRetainTypes[I]).second)
+        RetainValues.push_back(AllRetainTypes[I]);
+    CUNode->replaceRetainedTypes(MDTuple::get(VMContext, RetainValues));
+
+    DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms);
+    CUNode->replaceSubprograms(SPs.get());
+    for (auto *SP : SPs) {
+      if (MDTuple *Temp = SP->getVariables().get()) {
+        const auto &PV = PreservedVariables.lookup(SP);
+        SmallVector<Metadata *, 4> Variables(PV.begin(), PV.end());
+        DINodeArray AV = getOrCreateArray(Variables);
+        TempMDTuple(Temp)->replaceAllUsesWith(AV.get());
+      }
     }
-  }
 
-  TempGVs->replaceAllUsesWith(MDTuple::get(VMContext, AllGVs));
+    CUNode->replaceGlobalVariables(MDTuple::get(VMContext, AllGVs));
 
-  TempImportedModules->replaceAllUsesWith(MDTuple::get(
-      VMContext, SmallVector<Metadata *, 16>(AllImportedModules.begin(),
-                                             AllImportedModules.end())));
+    CUNode->replaceImportedEntities(MDTuple::get(
+        VMContext, SmallVector<Metadata *, 16>(AllImportedModules.begin(),
+                                               AllImportedModules.end())));
+  }
 
   // Now that all temp nodes have been replaced or deleted, resolve remaining
   // cycles.
@@ -133,21 +134,11 @@ DICompileUnit *DIBuilder::createCompileUnit(
   assert(!Filename.empty() &&
          "Unable to create compile unit without filename");
 
-  // TODO: Once we make DICompileUnit distinct, stop using temporaries here
-  // (just start with operands assigned to nullptr).
-  TempEnumTypes = MDTuple::getTemporary(VMContext, None);
-  TempRetainTypes = MDTuple::getTemporary(VMContext, None);
-  TempSubprograms = MDTuple::getTemporary(VMContext, None);
-  TempGVs = MDTuple::getTemporary(VMContext, None);
-  TempImportedModules = MDTuple::getTemporary(VMContext, None);
-
-  // TODO: Switch to getDistinct().  We never want to merge compile units based
-  // on contents.
-  DICompileUnit *CUNode = DICompileUnit::get(
+  assert(!CUNode && "Can only make one compile unit per DIBuilder instance");
+  CUNode = DICompileUnit::getDistinct(
       VMContext, Lang, DIFile::get(VMContext, Filename, Directory), Producer,
-      isOptimized, Flags, RunTimeVer, SplitName, Kind, TempEnumTypes.get(),
-      TempRetainTypes.get(), TempSubprograms.get(), TempGVs.get(),
-      TempImportedModules.get(), DWOId);
+      isOptimized, Flags, RunTimeVer, SplitName, Kind, nullptr,
+      nullptr, nullptr, nullptr, nullptr, DWOId);
 
   // Create a named metadata so that it is easier to find cu in a module.
   // Note that we only generate this when the caller wants to actually
@@ -186,6 +177,12 @@ DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context,
                                 Context, NS, Line, StringRef(), AllImportedModules);
 }
 
+DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context, DIModule *M,
+                                                  unsigned Line) {
+  return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
+                                Context, M, Line, StringRef(), AllImportedModules);
+}
+
 DIImportedEntity *DIBuilder::createImportedDeclaration(DIScope *Context,
                                                        DINode *Decl,
                                                        unsigned Line,
@@ -703,6 +700,14 @@ DINamespace *DIBuilder::createNameSpace(DIScope *Scope, StringRef Name,
                           LineNo);
 }
 
+DIModule *DIBuilder::createModule(DIScope *Scope, StringRef Name,
+                                  StringRef ConfigurationMacros,
+                                  StringRef IncludePath,
+                                  StringRef ISysRoot) {
+ return DIModule::get(VMContext, getNonCompileUnitScope(Scope), Name,
+                      ConfigurationMacros, IncludePath, ISysRoot);
+}
+
 DILexicalBlockFile *DIBuilder::createLexicalBlockFile(DIScope *Scope,
                                                       DIFile *File,
                                                       unsigned Discriminator) {
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index 2e161c2..9646d1a 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -145,6 +145,8 @@ void DebugInfoFinder::processModule(const Module &M) {
           processSubprogram(SP);
         else if (auto *NS = dyn_cast<DINamespace>(Entity))
           processScope(NS->getScope());
+        else if (auto *M = dyn_cast<DIModule>(Entity))
+          processScope(M->getScope());
       }
     }
   }
@@ -201,6 +203,8 @@ void DebugInfoFinder::processScope(DIScope *Scope) {
     processScope(LB->getScope());
   } else if (auto *NS = dyn_cast<DINamespace>(Scope)) {
     processScope(NS->getScope());
+  } else if (auto *M = dyn_cast<DIModule>(Scope)) {
+    processScope(M->getScope());
   }
 }
 
diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp
index 8b9857d..5e01748 100644
--- a/lib/IR/DebugInfoMetadata.cpp
+++ b/lib/IR/DebugInfoMetadata.cpp
@@ -138,6 +138,9 @@ DIScopeRef DIScope::getScope() const {
   if (auto *NS = dyn_cast<DINamespace>(this))
     return DIScopeRef(NS->getScope());
 
+  if (auto *M = dyn_cast<DIModule>(this))
+    return DIScopeRef(M->getScope());
+
   assert((isa<DIFile>(this) || isa<DICompileUnit>(this)) &&
          "Unhandled type of scope.");
   return nullptr;
@@ -150,6 +153,8 @@ StringRef DIScope::getName() const {
     return SP->getName();
   if (auto *NS = dyn_cast<DINamespace>(this))
     return NS->getName();
+  if (auto *M = dyn_cast<DIModule>(this))
+    return M->getName();
   assert((isa<DILexicalBlockBase>(this) || isa<DIFile>(this) ||
           isa<DICompileUnit>(this)) &&
          "Unhandled type of scope.");
@@ -410,6 +415,18 @@ DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
   DEFINE_GETIMPL_STORE(DINamespace, (Line), Ops);
 }
 
+DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *Scope,
+                            MDString *Name, MDString *ConfigurationMacros,
+                            MDString *IncludePath, MDString *ISysRoot,
+                            StorageType Storage, bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DIModule,
+    (Scope, getString(Name), getString(ConfigurationMacros),
+     getString(IncludePath), getString(ISysRoot)));
+  Metadata *Ops[] = {Scope, Name, ConfigurationMacros, IncludePath, ISysRoot};
+  DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(DIModule, Ops);
+}
+
 DITemplateTypeParameter *DITemplateTypeParameter::getImpl(LLVMContext &Context,
                                                           MDString *Name,
                                                           Metadata *Type,
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index 5de9289..b8f77ed 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -84,7 +84,7 @@ PassRemarksAnalysis(
         "the given regular expression"),
     cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired,
     cl::ZeroOrMore);
-} // namespace
+}
 
 int llvm::getNextAvailablePluginDiagnosticKind() {
   static std::atomic<int> PluginKindID(DK_FirstPluginKind);
diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp
index a0a3db4..6ed58913 100644
--- a/lib/IR/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
@@ -496,7 +496,7 @@ public:
     OS << format("%5u:", LineNum) << Line << "\n";
   }
 };
-} // namespace
+}
 
 /// Convert a path to a gcov filename. If PreservePaths is true, this
 /// translates "/" to "#", ".." to "^", and drops ".", to match gcov.
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index 79a458c..1d02826 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -42,10 +42,14 @@ void GlobalValue::dematerialize() {
   getParent()->dematerialize(this);
 }
 
-/// Override destroyConstant to make sure it doesn't get called on
+/// Override destroyConstantImpl to make sure it doesn't get called on
 /// GlobalValue's because they shouldn't be treated like other constants.
-void GlobalValue::destroyConstant() {
-  llvm_unreachable("You can't GV->destroyConstant()!");
+void GlobalValue::destroyConstantImpl() {
+  llvm_unreachable("You can't GV->destroyConstantImpl()!");
+}
+
+Value *GlobalValue::handleOperandChangeImpl(Value *From, Value *To, Use *U) {
+  llvm_unreachable("Unsupported class for handleOperandChange()!");
 }
 
 /// copyAttributesFrom - copy all additional attributes (those not needed to
@@ -191,26 +195,6 @@ void GlobalVariable::eraseFromParent() {
   getParent()->getGlobalList().erase(this);
 }
 
-void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To,
-                                                 Use *U) {
-  // If you call this, then you better know this GVar has a constant
-  // initializer worth replacing. Enforce that here.
-  assert(getNumOperands() == 1 &&
-         "Attempt to replace uses of Constants on a GVar with no initializer");
-
-  // And, since you know it has an initializer, the From value better be
-  // the initializer :)
-  assert(getOperand(0) == From &&
-         "Attempt to replace wrong constant initializer in GVar");
-
-  // And, you better have a constant for the replacement value
-  assert(isa<Constant>(To) &&
-         "Attempt to replace GVar initializer with non-constant");
-
-  // Okay, preconditions out of the way, replace the constant initializer.
-  this->setOperand(0, cast<Constant>(To));
-}
-
 void GlobalVariable::setInitializer(Constant *InitVal) {
   if (!InitVal) {
     if (hasInitializer()) {
diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp
index 03e7d55..c1ac336 100644
--- a/lib/IR/IRPrintingPasses.cpp
+++ b/lib/IR/IRPrintingPasses.cpp
@@ -103,7 +103,7 @@ public:
   }
 };
 
-} // namespace
+}
 
 char PrintModulePassWrapper::ID = 0;
 INITIALIZE_PASS(PrintModulePassWrapper, "print-module",
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index af42638..c57ba16 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -534,8 +534,23 @@ bool Instruction::isNilpotent(unsigned Opcode) {
   return Opcode == Xor;
 }
 
+Instruction *Instruction::cloneImpl() const {
+  llvm_unreachable("Subclass of Instruction failed to implement cloneImpl");
+}
+
 Instruction *Instruction::clone() const {
-  Instruction *New = clone_impl();
+  Instruction *New = nullptr;
+  switch (getOpcode()) {
+  default:
+    llvm_unreachable("Unhandled Opcode.");
+#define HANDLE_INST(num, opc, clas)                                            \
+  case Instruction::opc:                                                       \
+    New = cast<clas>(this)->cloneImpl();                                       \
+    break;
+#include "llvm/IR/Instruction.def"
+#undef HANDLE_INST
+  }
+
   New->SubclassOptionalData = SubclassOptionalData;
   if (!hasMetadata())
     return New;
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index d45b511..86c921a 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -292,6 +292,12 @@ void CallInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
   setAttributes(PAL);
 }
 
+void CallInst::addAttribute(unsigned i, StringRef Kind, StringRef Value) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addAttribute(getContext(), i, Kind, Value);
+  setAttributes(PAL);
+}
+
 void CallInst::removeAttribute(unsigned i, Attribute attr) {
   AttributeSet PAL = getAttributes();
   AttrBuilder B(attr);
@@ -313,14 +319,6 @@ void CallInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
   setAttributes(PAL);
 }
 
-bool CallInst::hasFnAttrImpl(Attribute::AttrKind A) const {
-  if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
-    return true;
-  if (const Function *F = getCalledFunction())
-    return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
-  return false;
-}
-
 bool CallInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
   if (AttributeList.hasAttribute(i, A))
     return true;
@@ -3448,55 +3446,55 @@ void IndirectBrInst::setSuccessorV(unsigned idx, BasicBlock *B) {
 }
 
 //===----------------------------------------------------------------------===//
-//                           clone_impl() implementations
+//                           cloneImpl() implementations
 //===----------------------------------------------------------------------===//
 
 // Define these methods here so vtables don't get emitted into every translation
 // unit that uses these classes.
 
-GetElementPtrInst *GetElementPtrInst::clone_impl() const {
+GetElementPtrInst *GetElementPtrInst::cloneImpl() const {
   return new (getNumOperands()) GetElementPtrInst(*this);
 }
 
-BinaryOperator *BinaryOperator::clone_impl() const {
+BinaryOperator *BinaryOperator::cloneImpl() const {
   return Create(getOpcode(), Op<0>(), Op<1>());
 }
 
-FCmpInst* FCmpInst::clone_impl() const {
+FCmpInst *FCmpInst::cloneImpl() const {
   return new FCmpInst(getPredicate(), Op<0>(), Op<1>());
 }
 
-ICmpInst* ICmpInst::clone_impl() const {
+ICmpInst *ICmpInst::cloneImpl() const {
   return new ICmpInst(getPredicate(), Op<0>(), Op<1>());
 }
 
-ExtractValueInst *ExtractValueInst::clone_impl() const {
+ExtractValueInst *ExtractValueInst::cloneImpl() const {
   return new ExtractValueInst(*this);
 }
 
-InsertValueInst *InsertValueInst::clone_impl() const {
+InsertValueInst *InsertValueInst::cloneImpl() const {
   return new InsertValueInst(*this);
 }
 
-AllocaInst *AllocaInst::clone_impl() const {
+AllocaInst *AllocaInst::cloneImpl() const {
   AllocaInst *Result = new AllocaInst(getAllocatedType(),
                                       (Value *)getOperand(0), getAlignment());
   Result->setUsedWithInAlloca(isUsedWithInAlloca());
   return Result;
 }
 
-LoadInst *LoadInst::clone_impl() const {
+LoadInst *LoadInst::cloneImpl() const {
   return new LoadInst(getOperand(0), Twine(), isVolatile(),
                       getAlignment(), getOrdering(), getSynchScope());
 }
 
-StoreInst *StoreInst::clone_impl() const {
+StoreInst *StoreInst::cloneImpl() const {
   return new StoreInst(getOperand(0), getOperand(1), isVolatile(),
                        getAlignment(), getOrdering(), getSynchScope());
   
 }
 
-AtomicCmpXchgInst *AtomicCmpXchgInst::clone_impl() const {
+AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const {
   AtomicCmpXchgInst *Result =
     new AtomicCmpXchgInst(getOperand(0), getOperand(1), getOperand(2),
                           getSuccessOrdering(), getFailureOrdering(),
@@ -3506,7 +3504,7 @@ AtomicCmpXchgInst *AtomicCmpXchgInst::clone_impl() const {
   return Result;
 }
 
-AtomicRMWInst *AtomicRMWInst::clone_impl() const {
+AtomicRMWInst *AtomicRMWInst::cloneImpl() const {
   AtomicRMWInst *Result =
     new AtomicRMWInst(getOperation(),getOperand(0), getOperand(1),
                       getOrdering(), getSynchScope());
@@ -3514,120 +3512,113 @@ AtomicRMWInst *AtomicRMWInst::clone_impl() const {
   return Result;
 }
 
-FenceInst *FenceInst::clone_impl() const {
+FenceInst *FenceInst::cloneImpl() const {
   return new FenceInst(getContext(), getOrdering(), getSynchScope());
 }
 
-TruncInst *TruncInst::clone_impl() const {
+TruncInst *TruncInst::cloneImpl() const {
   return new TruncInst(getOperand(0), getType());
 }
 
-ZExtInst *ZExtInst::clone_impl() const {
+ZExtInst *ZExtInst::cloneImpl() const {
   return new ZExtInst(getOperand(0), getType());
 }
 
-SExtInst *SExtInst::clone_impl() const {
+SExtInst *SExtInst::cloneImpl() const {
   return new SExtInst(getOperand(0), getType());
 }
 
-FPTruncInst *FPTruncInst::clone_impl() const {
+FPTruncInst *FPTruncInst::cloneImpl() const {
   return new FPTruncInst(getOperand(0), getType());
 }
 
-FPExtInst *FPExtInst::clone_impl() const {
+FPExtInst *FPExtInst::cloneImpl() const {
   return new FPExtInst(getOperand(0), getType());
 }
 
-UIToFPInst *UIToFPInst::clone_impl() const {
+UIToFPInst *UIToFPInst::cloneImpl() const {
   return new UIToFPInst(getOperand(0), getType());
 }
 
-SIToFPInst *SIToFPInst::clone_impl() const {
+SIToFPInst *SIToFPInst::cloneImpl() const {
   return new SIToFPInst(getOperand(0), getType());
 }
 
-FPToUIInst *FPToUIInst::clone_impl() const {
+FPToUIInst *FPToUIInst::cloneImpl() const {
   return new FPToUIInst(getOperand(0), getType());
 }
 
-FPToSIInst *FPToSIInst::clone_impl() const {
+FPToSIInst *FPToSIInst::cloneImpl() const {
   return new FPToSIInst(getOperand(0), getType());
 }
 
-PtrToIntInst *PtrToIntInst::clone_impl() const {
+PtrToIntInst *PtrToIntInst::cloneImpl() const {
   return new PtrToIntInst(getOperand(0), getType());
 }
 
-IntToPtrInst *IntToPtrInst::clone_impl() const {
+IntToPtrInst *IntToPtrInst::cloneImpl() const {
   return new IntToPtrInst(getOperand(0), getType());
 }
 
-BitCastInst *BitCastInst::clone_impl() const {
+BitCastInst *BitCastInst::cloneImpl() const {
   return new BitCastInst(getOperand(0), getType());
 }
 
-AddrSpaceCastInst *AddrSpaceCastInst::clone_impl() const {
+AddrSpaceCastInst *AddrSpaceCastInst::cloneImpl() const {
   return new AddrSpaceCastInst(getOperand(0), getType());
 }
 
-CallInst *CallInst::clone_impl() const {
+CallInst *CallInst::cloneImpl() const {
   return  new(getNumOperands()) CallInst(*this);
 }
 
-SelectInst *SelectInst::clone_impl() const {
+SelectInst *SelectInst::cloneImpl() const {
   return SelectInst::Create(getOperand(0), getOperand(1), getOperand(2));
 }
 
-VAArgInst *VAArgInst::clone_impl() const {
+VAArgInst *VAArgInst::cloneImpl() const {
   return new VAArgInst(getOperand(0), getType());
 }
 
-ExtractElementInst *ExtractElementInst::clone_impl() const {
+ExtractElementInst *ExtractElementInst::cloneImpl() const {
   return ExtractElementInst::Create(getOperand(0), getOperand(1));
 }
 
-InsertElementInst *InsertElementInst::clone_impl() const {
+InsertElementInst *InsertElementInst::cloneImpl() const {
   return InsertElementInst::Create(getOperand(0), getOperand(1), getOperand(2));
 }
 
-ShuffleVectorInst *ShuffleVectorInst::clone_impl() const {
+ShuffleVectorInst *ShuffleVectorInst::cloneImpl() const {
   return new ShuffleVectorInst(getOperand(0), getOperand(1), getOperand(2));
 }
 
-PHINode *PHINode::clone_impl() const {
-  return new PHINode(*this);
-}
+PHINode *PHINode::cloneImpl() const { return new PHINode(*this); }
 
-LandingPadInst *LandingPadInst::clone_impl() const {
+LandingPadInst *LandingPadInst::cloneImpl() const {
   return new LandingPadInst(*this);
 }
 
-ReturnInst *ReturnInst::clone_impl() const {
+ReturnInst *ReturnInst::cloneImpl() const {
   return new(getNumOperands()) ReturnInst(*this);
 }
 
-BranchInst *BranchInst::clone_impl() const {
+BranchInst *BranchInst::cloneImpl() const {
   return new(getNumOperands()) BranchInst(*this);
 }
 
-SwitchInst *SwitchInst::clone_impl() const {
-  return new SwitchInst(*this);
-}
+SwitchInst *SwitchInst::cloneImpl() const { return new SwitchInst(*this); }
 
-IndirectBrInst *IndirectBrInst::clone_impl() const {
+IndirectBrInst *IndirectBrInst::cloneImpl() const {
   return new IndirectBrInst(*this);
 }
 
-
-InvokeInst *InvokeInst::clone_impl() const {
+InvokeInst *InvokeInst::cloneImpl() const {
   return new(getNumOperands()) InvokeInst(*this);
 }
 
-ResumeInst *ResumeInst::clone_impl() const {
-  return new(1) ResumeInst(*this);
-}
+ResumeInst *ResumeInst::cloneImpl() const { return new (1) ResumeInst(*this); }
 
-UnreachableInst *UnreachableInst::clone_impl() const {
+UnreachableInst *UnreachableInst::cloneImpl() const {
   LLVMContext &Context = getContext();
   return new UnreachableInst(Context);
 }
diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp
index d3d2fcd..1e20807 100644
--- a/lib/IR/LLVMContextImpl.cpp
+++ b/lib/IR/LLVMContextImpl.cpp
@@ -65,7 +65,7 @@ struct DropFirst {
     P.first->dropAllReferences();
   }
 };
-} // namespace
+}
 
 LLVMContextImpl::~LLVMContextImpl() {
   // NOTE: We need to delete the contents of OwnedModules, but Module's dtor
@@ -199,7 +199,7 @@ namespace llvm {
 /// does not cause MDOperand to be transparent.  In particular, a bare pointer
 /// doesn't get hashed before it's combined, whereas \a MDOperand would.
 static const Metadata *get_hashable_data(const MDOperand &X) { return X.get(); }
-} // namespace llvm
+}
 
 unsigned MDNodeOpsKey::calculateHash(MDNode *N, unsigned Offset) {
   unsigned Hash = hash_combine_range(N->op_begin() + Offset, N->op_end());
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 41a898b..cbbf11e 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -651,6 +651,35 @@ template <> struct MDNodeKeyImpl<DINamespace> {
   }
 };
 
+template <> struct MDNodeKeyImpl<DIModule> {
+  Metadata *Scope;
+  StringRef Name;
+  StringRef ConfigurationMacros;
+  StringRef IncludePath;
+  StringRef ISysRoot;
+  MDNodeKeyImpl(Metadata *Scope, StringRef Name,
+                StringRef ConfigurationMacros,
+                StringRef IncludePath,
+                StringRef ISysRoot)
+    : Scope(Scope), Name(Name), ConfigurationMacros(ConfigurationMacros),
+      IncludePath(IncludePath), ISysRoot(ISysRoot) {}
+  MDNodeKeyImpl(const DIModule *N)
+    : Scope(N->getRawScope()), Name(N->getName()),
+      ConfigurationMacros(N->getConfigurationMacros()),
+      IncludePath(N->getIncludePath()), ISysRoot(N->getISysRoot()) {}
+
+  bool isKeyOf(const DIModule *RHS) const {
+    return Scope == RHS->getRawScope() && Name == RHS->getName() &&
+           ConfigurationMacros == RHS->getConfigurationMacros() &&
+           IncludePath == RHS->getIncludePath() &&
+           ISysRoot == RHS->getISysRoot();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(Scope, Name,
+                        ConfigurationMacros, IncludePath, ISysRoot);
+  }
+};
+
 template <> struct MDNodeKeyImpl<DITemplateTypeParameter> {
   StringRef Name;
   Metadata *Type;
@@ -1025,6 +1054,6 @@ public:
   void dropTriviallyDeadConstantArrays();
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index 881d780..27d98a2 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -275,8 +275,8 @@ public:
 void FunctionPassManagerImpl::anchor() {}
 
 char FunctionPassManagerImpl::ID = 0;
-} // namespace legacy
-} // namespace llvm
+} // End of legacy namespace
+} // End of llvm namespace
 
 namespace {
 //===----------------------------------------------------------------------===//
@@ -439,8 +439,8 @@ public:
 void PassManagerImpl::anchor() {}
 
 char PassManagerImpl::ID = 0;
-} // namespace legacy
-} // namespace llvm
+} // End of legacy namespace
+} // End of llvm namespace
 
 namespace {
 
@@ -486,7 +486,7 @@ public:
   }
 };
 
-} // namespace
+} // End of anon namespace
 
 static TimingInfo *TheTimeInfo;
 
diff --git a/lib/IR/Mangler.cpp b/lib/IR/Mangler.cpp
index a0e1b25..016cb9e 100644
--- a/lib/IR/Mangler.cpp
+++ b/lib/IR/Mangler.cpp
@@ -17,12 +17,21 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-static void getNameWithPrefixx(raw_ostream &OS, const Twine &GVName,
-                              Mangler::ManglerPrefixTy PrefixTy,
-                              const DataLayout &DL, char Prefix) {
+namespace {
+enum ManglerPrefixTy {
+  Default,      ///< Emit default string before each symbol.
+  Private,      ///< Emit "private" prefix before each symbol.
+  LinkerPrivate ///< Emit "linker private" prefix before each symbol.
+};
+}
+
+static void getNameWithPrefixImpl(raw_ostream &OS, const Twine &GVName,
+                                  ManglerPrefixTy PrefixTy,
+                                  const DataLayout &DL, char Prefix) {
   SmallString<256> TmpData;
   StringRef Name = GVName.toStringRef(TmpData);
   assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
@@ -34,9 +43,9 @@ static void getNameWithPrefixx(raw_ostream &OS, const Twine &GVName,
     return;
   }
 
-  if (PrefixTy == Mangler::Private)
+  if (PrefixTy == Private)
     OS << DL.getPrivateGlobalPrefix();
-  else if (PrefixTy == Mangler::LinkerPrivate)
+  else if (PrefixTy == LinkerPrivate)
     OS << DL.getLinkerPrivateGlobalPrefix();
 
   if (Prefix != '\0')
@@ -46,17 +55,23 @@ static void getNameWithPrefixx(raw_ostream &OS, const Twine &GVName,
   OS << Name;
 }
 
+static void getNameWithPrefixImpl(raw_ostream &OS, const Twine &GVName,
+                                  const DataLayout &DL,
+                                  ManglerPrefixTy PrefixTy) {
+  char Prefix = DL.getGlobalPrefix();
+  return getNameWithPrefixImpl(OS, GVName, PrefixTy, DL, Prefix);
+}
+
 void Mangler::getNameWithPrefix(raw_ostream &OS, const Twine &GVName,
-                                ManglerPrefixTy PrefixTy) const {
-  char Prefix = DL->getGlobalPrefix();
-  return getNameWithPrefixx(OS, GVName, PrefixTy, *DL, Prefix);
+                                const DataLayout &DL) {
+  return getNameWithPrefixImpl(OS, GVName, DL, Default);
 }
 
 void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
-                                const Twine &GVName,
-                                ManglerPrefixTy PrefixTy) const {
+                                const Twine &GVName, const DataLayout &DL) {
   raw_svector_ostream OS(OutName);
-  return getNameWithPrefix(OS, GVName, PrefixTy);
+  char Prefix = DL.getGlobalPrefix();
+  return getNameWithPrefixImpl(OS, GVName, Default, DL, Prefix);
 }
 
 static bool hasByteCountSuffix(CallingConv::ID CC) {
@@ -92,14 +107,15 @@ static void addByteCountSuffix(raw_ostream &OS, const Function *F,
 
 void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV,
                                 bool CannotUsePrivateLabel) const {
-  ManglerPrefixTy PrefixTy = Mangler::Default;
+  ManglerPrefixTy PrefixTy = Default;
   if (GV->hasPrivateLinkage()) {
     if (CannotUsePrivateLabel)
-      PrefixTy = Mangler::LinkerPrivate;
+      PrefixTy = LinkerPrivate;
     else
-      PrefixTy = Mangler::Private;
+      PrefixTy = Private;
   }
 
+  const DataLayout &DL = GV->getParent()->getDataLayout();
   if (!GV->hasName()) {
     // Get the ID for the global, assigning a new one if we haven't got one
     // already.
@@ -108,12 +124,12 @@ void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV,
       ID = NextAnonGlobalID++;
 
     // Must mangle the global into a unique ID.
-    getNameWithPrefix(OS, "__unnamed_" + Twine(ID), PrefixTy);
+    getNameWithPrefixImpl(OS, "__unnamed_" + Twine(ID), DL, PrefixTy);
     return;
   }
 
   StringRef Name = GV->getName();
-  char Prefix = DL->getGlobalPrefix();
+  char Prefix = DL.getGlobalPrefix();
 
   // Mangle functions with Microsoft calling conventions specially.  Only do
   // this mangling for x86_64 vectorcall and 32-bit x86.
@@ -122,7 +138,7 @@ void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV,
     MSFunc = nullptr; // Don't mangle when \01 is present.
   CallingConv::ID CC =
       MSFunc ? MSFunc->getCallingConv() : (unsigned)CallingConv::C;
-  if (!DL->hasMicrosoftFastStdCallMangling() &&
+  if (!DL.hasMicrosoftFastStdCallMangling() &&
       CC != CallingConv::X86_VectorCall)
     MSFunc = nullptr;
   if (MSFunc) {
@@ -132,7 +148,7 @@ void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV,
       Prefix = '\0'; // vectorcall functions have no prefix.
   }
 
-  getNameWithPrefixx(OS, Name, PrefixTy, *DL, Prefix);
+  getNameWithPrefixImpl(OS, Name, PrefixTy, DL, Prefix);
 
   if (!MSFunc)
     return;
@@ -147,7 +163,7 @@ void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV,
       // "Pure" variadic functions do not receive @0 suffix.
       (!FT->isVarArg() || FT->getNumParams() == 0 ||
        (FT->getNumParams() == 1 && MSFunc->hasStructRetAttr())))
-    addByteCountSuffix(OS, MSFunc, *DL);
+    addByteCountSuffix(OS, MSFunc, DL);
 }
 
 void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
diff --git a/lib/IR/Operator.cpp b/lib/IR/Operator.cpp
index bea1f80..77dc680 100644
--- a/lib/IR/Operator.cpp
+++ b/lib/IR/Operator.cpp
@@ -41,4 +41,4 @@ bool GEPOperator::accumulateConstantOffset(const DataLayout &DL,
   }
   return true;
 }
-} // namespace llvm
+}
diff --git a/lib/IR/Pass.cpp b/lib/IR/Pass.cpp
index 2fa1e7c..df45460 100644
--- a/lib/IR/Pass.cpp
+++ b/lib/IR/Pass.cpp
@@ -249,7 +249,7 @@ namespace {
         CFGOnlyList.push_back(P->getTypeInfo());
     }
   };
-} // namespace
+}
 
 // setPreservesCFG - This function should be called to by the pass, iff they do
 // not:
diff --git a/lib/IR/SymbolTableListTraitsImpl.h b/lib/IR/SymbolTableListTraitsImpl.h
index f94def7..a18f982 100644
--- a/lib/IR/SymbolTableListTraitsImpl.h
+++ b/lib/IR/SymbolTableListTraitsImpl.h
@@ -113,6 +113,6 @@ void SymbolTableListTraits<ValueSubClass,ItemParentClass>
   }
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/IR/Use.cpp b/lib/IR/Use.cpp
index fd06fdb..cae845d 100644
--- a/lib/IR/Use.cpp
+++ b/lib/IR/Use.cpp
@@ -124,4 +124,4 @@ const Use *Use::getImpliedUser() const {
   }
 }
 
-} // namespace llvm
+} // End llvm namespace
diff --git a/lib/IR/User.cpp b/lib/IR/User.cpp
index 21f4849..522722d 100644
--- a/lib/IR/User.cpp
+++ b/lib/IR/User.cpp
@@ -144,4 +144,4 @@ Operator::~Operator() {
   llvm_unreachable("should never destroy an Operator");
 }
 
-} // namespace llvm
+} // End llvm namespace
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index eb5c225..78d1adb 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -369,7 +369,7 @@ void Value::replaceAllUsesWith(Value *New) {
     // constant because they are uniqued.
     if (auto *C = dyn_cast<Constant>(U.getUser())) {
       if (!isa<GlobalValue>(C)) {
-        C->replaceUsesOfWithOnConstant(this, New, &U);
+        C->handleOperandChange(this, New, &U);
         continue;
       }
     }
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 19b11b4..3c61165 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -102,6 +102,9 @@ private:
       OS << '\n';
     }
   }
+  void Write(ImmutableCallSite CS) {
+    Write(CS.getInstruction());
+  }
 
   void Write(const Metadata *MD) {
     if (!MD)
@@ -367,7 +370,7 @@ private:
   void visitSelectInst(SelectInst &SI);
   void visitUserOp1(Instruction &I);
   void visitUserOp2(Instruction &I) { visitUserOp1(I); }
-  void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
+  void visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS);
   template <class DbgIntrinsicTy>
   void visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII);
   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI);
@@ -1014,6 +1017,11 @@ void Verifier::visitDINamespace(const DINamespace &N) {
     Assert(isa<DIScope>(S), "invalid scope ref", &N, S);
 }
 
+void Verifier::visitDIModule(const DIModule &N) {
+  Assert(N.getTag() == dwarf::DW_TAG_module, "invalid tag", &N);
+  Assert(!N.getName().empty(), "anonymous module", &N);
+}
+
 void Verifier::visitDITemplateParameter(const DITemplateParameter &N) {
   Assert(isTypeRef(N, N.getType()), "invalid type ref", &N, N.getType());
 }
@@ -2289,6 +2297,10 @@ void Verifier::VerifyCallSite(CallSite CS) {
              "Function has metadata parameter but isn't an intrinsic", I);
   }
 
+  if (Function *F = CS.getCalledFunction())
+    if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+      visitIntrinsicCallSite(ID, CS);
+
   visitInstruction(*I);
 }
 
@@ -2384,10 +2396,6 @@ void Verifier::visitCallInst(CallInst &CI) {
 
   if (CI.isMustTailCall())
     verifyMustTailCall(CI);
-
-  if (Function *F = CI.getCalledFunction())
-    if (Intrinsic::ID ID = F->getIntrinsicID())
-      visitIntrinsicFunctionCall(ID, CI);
 }
 
 void Verifier::visitInvokeInst(InvokeInst &II) {
@@ -2398,13 +2406,6 @@ void Verifier::visitInvokeInst(InvokeInst &II) {
   Assert(II.getUnwindDest()->isLandingPad(),
          "The unwind destination does not have a landingpad instruction!", &II);
 
-  if (Function *F = II.getCalledFunction())
-    // TODO: Ideally we should use visitIntrinsicFunction here. But it uses
-    //       CallInst as an input parameter. It not woth updating this whole
-    //       function only to support statepoint verification.
-    if (F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint)
-      VerifyStatepoint(ImmutableCallSite(&II));
-
   visitTerminatorInst(II);
 }
 
@@ -3144,10 +3145,9 @@ Verifier::VerifyIntrinsicIsVarArg(bool isVarArg,
   return true;
 }
 
-/// visitIntrinsicFunction - Allow intrinsics to be verified in different ways.
-///
-void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
-  Function *IF = CI.getCalledFunction();
+/// Allow intrinsics to be verified in different ways.
+void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
+  Function *IF = CS.getCalledFunction();
   Assert(IF->isDeclaration(), "Intrinsic functions should never be defined!",
          IF);
 
@@ -3191,41 +3191,41 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
 
   // If the intrinsic takes MDNode arguments, verify that they are either global
   // or are local to *this* function.
-  for (unsigned i = 0, e = CI.getNumArgOperands(); i != e; ++i)
-    if (auto *MD = dyn_cast<MetadataAsValue>(CI.getArgOperand(i)))
-      visitMetadataAsValue(*MD, CI.getParent()->getParent());
+  for (Value *V : CS.args()) 
+    if (auto *MD = dyn_cast<MetadataAsValue>(V))
+      visitMetadataAsValue(*MD, CS.getCaller());
 
   switch (ID) {
   default:
     break;
   case Intrinsic::ctlz:  // llvm.ctlz
   case Intrinsic::cttz:  // llvm.cttz
-    Assert(isa<ConstantInt>(CI.getArgOperand(1)),
+    Assert(isa<ConstantInt>(CS.getArgOperand(1)),
            "is_zero_undef argument of bit counting intrinsics must be a "
            "constant int",
-           &CI);
+           CS);
     break;
   case Intrinsic::dbg_declare: // llvm.dbg.declare
-    Assert(isa<MetadataAsValue>(CI.getArgOperand(0)),
-           "invalid llvm.dbg.declare intrinsic call 1", &CI);
-    visitDbgIntrinsic("declare", cast<DbgDeclareInst>(CI));
+    Assert(isa<MetadataAsValue>(CS.getArgOperand(0)),
+           "invalid llvm.dbg.declare intrinsic call 1", CS);
+    visitDbgIntrinsic("declare", cast<DbgDeclareInst>(*CS.getInstruction()));
     break;
   case Intrinsic::dbg_value: // llvm.dbg.value
-    visitDbgIntrinsic("value", cast<DbgValueInst>(CI));
+    visitDbgIntrinsic("value", cast<DbgValueInst>(*CS.getInstruction()));
     break;
   case Intrinsic::memcpy:
   case Intrinsic::memmove:
   case Intrinsic::memset: {
-    ConstantInt *AlignCI = dyn_cast<ConstantInt>(CI.getArgOperand(3));
+    ConstantInt *AlignCI = dyn_cast<ConstantInt>(CS.getArgOperand(3));
     Assert(AlignCI,
            "alignment argument of memory intrinsics must be a constant int",
-           &CI);
+           CS);
     const APInt &AlignVal = AlignCI->getValue();
     Assert(AlignCI->isZero() || AlignVal.isPowerOf2(),
-           "alignment argument of memory intrinsics must be a power of 2", &CI);
-    Assert(isa<ConstantInt>(CI.getArgOperand(4)),
+           "alignment argument of memory intrinsics must be a power of 2", CS);
+    Assert(isa<ConstantInt>(CS.getArgOperand(4)),
            "isvolatile argument of memory intrinsics must be a constant int",
-           &CI);
+           CS);
     break;
   }
   case Intrinsic::gcroot:
@@ -3233,76 +3233,76 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   case Intrinsic::gcread:
     if (ID == Intrinsic::gcroot) {
       AllocaInst *AI =
-        dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
-      Assert(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI);
-      Assert(isa<Constant>(CI.getArgOperand(1)),
-             "llvm.gcroot parameter #2 must be a constant.", &CI);
+        dyn_cast<AllocaInst>(CS.getArgOperand(0)->stripPointerCasts());
+      Assert(AI, "llvm.gcroot parameter #1 must be an alloca.", CS);
+      Assert(isa<Constant>(CS.getArgOperand(1)),
+             "llvm.gcroot parameter #2 must be a constant.", CS);
       if (!AI->getAllocatedType()->isPointerTy()) {
-        Assert(!isa<ConstantPointerNull>(CI.getArgOperand(1)),
+        Assert(!isa<ConstantPointerNull>(CS.getArgOperand(1)),
                "llvm.gcroot parameter #1 must either be a pointer alloca, "
                "or argument #2 must be a non-null constant.",
-               &CI);
+               CS);
       }
     }
 
-    Assert(CI.getParent()->getParent()->hasGC(),
-           "Enclosing function does not use GC.", &CI);
+    Assert(CS.getParent()->getParent()->hasGC(),
+           "Enclosing function does not use GC.", CS);
     break;
   case Intrinsic::init_trampoline:
-    Assert(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
+    Assert(isa<Function>(CS.getArgOperand(1)->stripPointerCasts()),
            "llvm.init_trampoline parameter #2 must resolve to a function.",
-           &CI);
+           CS);
     break;
   case Intrinsic::prefetch:
-    Assert(isa<ConstantInt>(CI.getArgOperand(1)) &&
-               isa<ConstantInt>(CI.getArgOperand(2)) &&
-               cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
-               cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
-           "invalid arguments to llvm.prefetch", &CI);
+    Assert(isa<ConstantInt>(CS.getArgOperand(1)) &&
+               isa<ConstantInt>(CS.getArgOperand(2)) &&
+               cast<ConstantInt>(CS.getArgOperand(1))->getZExtValue() < 2 &&
+               cast<ConstantInt>(CS.getArgOperand(2))->getZExtValue() < 4,
+           "invalid arguments to llvm.prefetch", CS);
     break;
   case Intrinsic::stackprotector:
-    Assert(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
-           "llvm.stackprotector parameter #2 must resolve to an alloca.", &CI);
+    Assert(isa<AllocaInst>(CS.getArgOperand(1)->stripPointerCasts()),
+           "llvm.stackprotector parameter #2 must resolve to an alloca.", CS);
     break;
   case Intrinsic::lifetime_start:
   case Intrinsic::lifetime_end:
   case Intrinsic::invariant_start:
-    Assert(isa<ConstantInt>(CI.getArgOperand(0)),
+    Assert(isa<ConstantInt>(CS.getArgOperand(0)),
            "size argument of memory use markers must be a constant integer",
-           &CI);
+           CS);
     break;
   case Intrinsic::invariant_end:
-    Assert(isa<ConstantInt>(CI.getArgOperand(1)),
-           "llvm.invariant.end parameter #2 must be a constant integer", &CI);
+    Assert(isa<ConstantInt>(CS.getArgOperand(1)),
+           "llvm.invariant.end parameter #2 must be a constant integer", CS);
     break;
 
   case Intrinsic::frameescape: {
-    BasicBlock *BB = CI.getParent();
+    BasicBlock *BB = CS.getParent();
     Assert(BB == &BB->getParent()->front(),
-           "llvm.frameescape used outside of entry block", &CI);
+           "llvm.frameescape used outside of entry block", CS);
     Assert(!SawFrameEscape,
-           "multiple calls to llvm.frameescape in one function", &CI);
-    for (Value *Arg : CI.arg_operands()) {
+           "multiple calls to llvm.frameescape in one function", CS);
+    for (Value *Arg : CS.args()) {
       if (isa<ConstantPointerNull>(Arg))
         continue; // Null values are allowed as placeholders.
       auto *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
       Assert(AI && AI->isStaticAlloca(),
-             "llvm.frameescape only accepts static allocas", &CI);
+             "llvm.frameescape only accepts static allocas", CS);
     }
-    FrameEscapeInfo[BB->getParent()].first = CI.getNumArgOperands();
+    FrameEscapeInfo[BB->getParent()].first = CS.getNumArgOperands();
     SawFrameEscape = true;
     break;
   }
   case Intrinsic::framerecover: {
-    Value *FnArg = CI.getArgOperand(0)->stripPointerCasts();
+    Value *FnArg = CS.getArgOperand(0)->stripPointerCasts();
     Function *Fn = dyn_cast<Function>(FnArg);
     Assert(Fn && !Fn->isDeclaration(),
            "llvm.framerecover first "
            "argument must be function defined in this module",
-           &CI);
-    auto *IdxArg = dyn_cast<ConstantInt>(CI.getArgOperand(2));
+           CS);
+    auto *IdxArg = dyn_cast<ConstantInt>(CS.getArgOperand(2));
     Assert(IdxArg, "idx argument of llvm.framerecover must be a constant int",
-           &CI);
+           CS);
     auto &Entry = FrameEscapeInfo[Fn];
     Entry.second = unsigned(
         std::max(uint64_t(Entry.second), IdxArg->getLimitedValue(~0U) + 1));
@@ -3310,49 +3310,49 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   }
 
   case Intrinsic::experimental_gc_statepoint:
-    Assert(!CI.isInlineAsm(),
-           "gc.statepoint support for inline assembly unimplemented", &CI);
-    Assert(CI.getParent()->getParent()->hasGC(),
-           "Enclosing function does not use GC.", &CI);
+    Assert(!CS.isInlineAsm(),
+           "gc.statepoint support for inline assembly unimplemented", CS);
+    Assert(CS.getParent()->getParent()->hasGC(),
+           "Enclosing function does not use GC.", CS);
 
-    VerifyStatepoint(ImmutableCallSite(&CI));
+    VerifyStatepoint(CS);
     break;
   case Intrinsic::experimental_gc_result_int:
   case Intrinsic::experimental_gc_result_float:
   case Intrinsic::experimental_gc_result_ptr:
   case Intrinsic::experimental_gc_result: {
-    Assert(CI.getParent()->getParent()->hasGC(),
-           "Enclosing function does not use GC.", &CI);
+    Assert(CS.getParent()->getParent()->hasGC(),
+           "Enclosing function does not use GC.", CS);
     // Are we tied to a statepoint properly?
-    CallSite StatepointCS(CI.getArgOperand(0));
+    CallSite StatepointCS(CS.getArgOperand(0));
     const Function *StatepointFn =
       StatepointCS.getInstruction() ? StatepointCS.getCalledFunction() : nullptr;
     Assert(StatepointFn && StatepointFn->isDeclaration() &&
                StatepointFn->getIntrinsicID() ==
                    Intrinsic::experimental_gc_statepoint,
-           "gc.result operand #1 must be from a statepoint", &CI,
-           CI.getArgOperand(0));
+           "gc.result operand #1 must be from a statepoint", CS,
+           CS.getArgOperand(0));
 
     // Assert that result type matches wrapped callee.
     const Value *Target = StatepointCS.getArgument(2);
     const PointerType *PT = cast<PointerType>(Target->getType());
     const FunctionType *TargetFuncType =
       cast<FunctionType>(PT->getElementType());
-    Assert(CI.getType() == TargetFuncType->getReturnType(),
-           "gc.result result type does not match wrapped callee", &CI);
+    Assert(CS.getType() == TargetFuncType->getReturnType(),
+           "gc.result result type does not match wrapped callee", CS);
     break;
   }
   case Intrinsic::experimental_gc_relocate: {
-    Assert(CI.getNumArgOperands() == 3, "wrong number of arguments", &CI);
+    Assert(CS.getNumArgOperands() == 3, "wrong number of arguments", CS);
 
     // Check that this relocate is correctly tied to the statepoint
 
     // This is case for relocate on the unwinding path of an invoke statepoint
     if (ExtractValueInst *ExtractValue =
-          dyn_cast<ExtractValueInst>(CI.getArgOperand(0))) {
+          dyn_cast<ExtractValueInst>(CS.getArgOperand(0))) {
       Assert(isa<LandingPadInst>(ExtractValue->getAggregateOperand()),
              "gc relocate on unwind path incorrectly linked to the statepoint",
-             &CI);
+             CS);
 
       const BasicBlock *InvokeBB =
         ExtractValue->getParent()->getUniquePredecessor();
@@ -3370,32 +3370,32 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
       // In all other cases relocate should be tied to the statepoint directly.
       // This covers relocates on a normal return path of invoke statepoint and
       // relocates of a call statepoint
-      auto Token = CI.getArgOperand(0);
+      auto Token = CS.getArgOperand(0);
       Assert(isa<Instruction>(Token) && isStatepoint(cast<Instruction>(Token)),
-             "gc relocate is incorrectly tied to the statepoint", &CI, Token);
+             "gc relocate is incorrectly tied to the statepoint", CS, Token);
     }
 
     // Verify rest of the relocate arguments
 
-    GCRelocateOperands Ops(&CI);
+    GCRelocateOperands Ops(CS);
     ImmutableCallSite StatepointCS(Ops.getStatepoint());
 
     // Both the base and derived must be piped through the safepoint
-    Value* Base = CI.getArgOperand(1);
+    Value* Base = CS.getArgOperand(1);
     Assert(isa<ConstantInt>(Base),
-           "gc.relocate operand #2 must be integer offset", &CI);
+           "gc.relocate operand #2 must be integer offset", CS);
 
-    Value* Derived = CI.getArgOperand(2);
+    Value* Derived = CS.getArgOperand(2);
     Assert(isa<ConstantInt>(Derived),
-           "gc.relocate operand #3 must be integer offset", &CI);
+           "gc.relocate operand #3 must be integer offset", CS);
 
     const int BaseIndex = cast<ConstantInt>(Base)->getZExtValue();
     const int DerivedIndex = cast<ConstantInt>(Derived)->getZExtValue();
     // Check the bounds
     Assert(0 <= BaseIndex && BaseIndex < (int)StatepointCS.arg_size(),
-           "gc.relocate: statepoint base index out of bounds", &CI);
+           "gc.relocate: statepoint base index out of bounds", CS);
     Assert(0 <= DerivedIndex && DerivedIndex < (int)StatepointCS.arg_size(),
-           "gc.relocate: statepoint derived index out of bounds", &CI);
+           "gc.relocate: statepoint derived index out of bounds", CS);
 
     // Check that BaseIndex and DerivedIndex fall within the 'gc parameters'
     // section of the statepoint's argument
@@ -3424,24 +3424,24 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
     Assert(GCParamArgsStart <= BaseIndex && BaseIndex < GCParamArgsEnd,
            "gc.relocate: statepoint base index doesn't fall within the "
            "'gc parameters' section of the statepoint call",
-           &CI);
+           CS);
     Assert(GCParamArgsStart <= DerivedIndex && DerivedIndex < GCParamArgsEnd,
            "gc.relocate: statepoint derived index doesn't fall within the "
            "'gc parameters' section of the statepoint call",
-           &CI);
+           CS);
 
     // Relocated value must be a pointer type, but gc_relocate does not need to return the
     // same pointer type as the relocated pointer. It can be casted to the correct type later
     // if it's desired. However, they must have the same address space.
-    GCRelocateOperands Operands(&CI);
+    GCRelocateOperands Operands(CS);
     Assert(Operands.getDerivedPtr()->getType()->isPointerTy(),
-           "gc.relocate: relocated value must be a gc pointer", &CI);
+           "gc.relocate: relocated value must be a gc pointer", CS);
 
     // gc_relocate return type must be a pointer type, and is verified earlier in
     // VerifyIntrinsicType().
-    Assert(cast<PointerType>(CI.getType())->getAddressSpace() ==
+    Assert(cast<PointerType>(CS.getType())->getAddressSpace() ==
            cast<PointerType>(Operands.getDerivedPtr()->getType())->getAddressSpace(),
-           "gc.relocate: relocating a pointer shouldn't change its address space", &CI);
+           "gc.relocate: relocating a pointer shouldn't change its address space", CS);
     break;
   }
   };
@@ -3691,7 +3691,7 @@ struct VerifierLegacyPass : public FunctionPass {
     AU.setPreservesAll();
   }
 };
-} // namespace
+}
 
 char VerifierLegacyPass::ID = 0;
 INITIALIZE_PASS(VerifierLegacyPass, "verify", "Module Verifier", false, false)
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 716d66a..149ec6a 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -453,7 +453,7 @@ void LTOCodeGenerator::applyScopeRestrictions() {
   passes.add(createVerifierPass());
 
   // mark which symbols can not be internalized
-  Mangler Mangler(TargetMach->getDataLayout());
+  Mangler Mangler;
   std::vector<const char*> MustPreserveList;
   SmallPtrSet<GlobalValue*, 8> AsmUsed;
   std::vector<StringRef> Libcalls;
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index bbb3b6d..6131c31 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -19,6 +19,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCExpr.h"
@@ -642,6 +643,8 @@ bool LTOModule::parseSymbols(std::string &errMsg) {
 
 /// parseMetadata - Parse metadata from the module
 void LTOModule::parseMetadata() {
+  raw_string_ostream OS(LinkerOpts);
+
   // Linker Options
   if (Metadata *Val = getModule().getModuleFlag("Linker Options")) {
     MDNode *LinkerOptions = cast<MDNode>(Val);
@@ -649,20 +652,19 @@ void LTOModule::parseMetadata() {
       MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
       for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
         MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
-        // FIXME: Make StringSet::insert match Self-Associative Container
-        // requirements, returning <iter,bool> rather than bool, and use that
-        // here.
-        StringRef Op =
-            _linkeropt_strings.insert(MDOption->getString()).first->first();
-        StringRef DepLibName =
-            _target->getObjFileLowering()->getDepLibFromLinkerOpt(Op);
-        if (!DepLibName.empty())
-          _deplibs.push_back(DepLibName.data());
-        else if (!Op.empty())
-          _linkeropts.push_back(Op.data());
+        OS << " " << MDOption->getString();
       }
     }
   }
 
+  // Globals
+  Mangler Mang;
+  for (const NameAndAttributes &Sym : _symbols) {
+    if (!Sym.symbol)
+      continue;
+    _target->getObjFileLowering()->emitLinkerFlagsForGlobal(OS, Sym.symbol,
+                                                            Mang);
+  }
+
   // Add other interesting metadata here.
 }
diff --git a/lib/LibDriver/LibDriver.cpp b/lib/LibDriver/LibDriver.cpp
index c9857b04..cb3278c 100644
--- a/lib/LibDriver/LibDriver.cpp
+++ b/lib/LibDriver/LibDriver.cpp
@@ -54,7 +54,7 @@ public:
   LibOptTable() : OptTable(infoTable, llvm::array_lengthof(infoTable), true) {}
 };
 
-} // namespace
+}
 
 static std::string getOutputPath(llvm::opt::InputArgList *Args) {
   if (auto *Arg = Args->getLastArg(OPT_out))
@@ -103,38 +103,37 @@ static Optional<std::string> findInputFile(StringRef File,
   return Optional<std::string>();
 }
 
-int llvm::libDriverMain(int Argc, const char **Argv) {
-  SmallVector<const char *, 20> NewArgv(Argv, Argv + Argc);
+int llvm::libDriverMain(llvm::ArrayRef<const char*> ArgsArr) {
+  SmallVector<const char *, 20> NewArgs(ArgsArr.begin(), ArgsArr.end());
   BumpPtrAllocator Alloc;
   BumpPtrStringSaver Saver(Alloc);
-  cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine, NewArgv);
-  Argv = &NewArgv[0];
-  Argc = static_cast<int>(NewArgv.size());
+  cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine, NewArgs);
+  ArgsArr = NewArgs;
 
   LibOptTable Table;
   unsigned MissingIndex;
   unsigned MissingCount;
-  std::unique_ptr<llvm::opt::InputArgList> Args(
-      Table.ParseArgs(&Argv[1], &Argv[Argc], MissingIndex, MissingCount));
+  llvm::opt::InputArgList Args =
+      Table.ParseArgs(ArgsArr.slice(1), MissingIndex, MissingCount);
   if (MissingCount) {
     llvm::errs() << "missing arg value for \""
-                 << Args->getArgString(MissingIndex)
-                 << "\", expected " << MissingCount
+                 << Args.getArgString(MissingIndex) << "\", expected "
+                 << MissingCount
                  << (MissingCount == 1 ? " argument.\n" : " arguments.\n");
     return 1;
   }
-  for (auto *Arg : Args->filtered(OPT_UNKNOWN))
+  for (auto *Arg : Args.filtered(OPT_UNKNOWN))
     llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n";
 
-  if (Args->filtered_begin(OPT_INPUT) == Args->filtered_end()) {
+  if (Args.filtered_begin(OPT_INPUT) == Args.filtered_end()) {
     llvm::errs() << "no input files.\n";
     return 1;
   }
 
-  std::vector<StringRef> SearchPaths = getSearchPaths(Args.get(), Saver);
+  std::vector<StringRef> SearchPaths = getSearchPaths(&Args, Saver);
 
   std::vector<llvm::NewArchiveIterator> Members;
-  for (auto *Arg : Args->filtered(OPT_INPUT)) {
+  for (auto *Arg : Args.filtered(OPT_INPUT)) {
     Optional<std::string> Path = findInputFile(Arg->getValue(), SearchPaths);
     if (!Path.hasValue()) {
       llvm::errs() << Arg->getValue() << ": no such file or directory\n";
@@ -144,11 +143,11 @@ int llvm::libDriverMain(int Argc, const char **Argv) {
                          llvm::sys::path::filename(Arg->getValue()));
   }
 
-  std::pair<StringRef, std::error_code> Result = llvm::writeArchive(
-      getOutputPath(Args.get()), Members, /*WriteSymtab=*/true);
+  std::pair<StringRef, std::error_code> Result =
+      llvm::writeArchive(getOutputPath(&Args), Members, /*WriteSymtab=*/true);
   if (Result.second) {
     if (Result.first.empty())
-      Result.first = Argv[0];
+      Result.first = ArgsArr[0];
     llvm::errs() << Result.first << ": " << Result.second.message() << "\n";
     return 1;
   }
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index f80f6bc..f090680 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -99,7 +99,7 @@ private:
 
   bool areTypesIsomorphic(Type *DstTy, Type *SrcTy);
 };
-} // namespace
+}
 
 void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
   assert(SpeculativeTypes.empty());
@@ -507,7 +507,7 @@ private:
   void linkNamedMDNodes();
   void stripReplacedSubprograms();
 };
-} // namespace
+}
 
 /// The LLVM SymbolTable class autorenames globals that conflict in the symbol
 /// table. This is good for all clients except for us. Go through the trouble
@@ -1573,8 +1573,8 @@ bool ModuleLinker::run() {
     if (C.getSelectionKind() == Comdat::Any)
       continue;
     const GlobalValue *GV = SrcM->getNamedValue(C.getName());
-    assert(GV);
-    MapValue(GV, ValueMap, RF_None, &TypeMap, &ValMaterializer);
+    if (GV)
+      MapValue(GV, ValueMap, RF_None, &TypeMap, &ValMaterializer);
   }
 
   // Strip replaced subprograms before mapping any metadata -- so that we're
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index c9df8fc..e925bc2 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -116,8 +116,6 @@ class ELFObjectWriter : public MCObjectWriter {
     unsigned StringTableIndex;
     // This holds the .symtab section index.
     unsigned SymbolTableIndex;
-    // This holds the .symtab_shndx section index.
-    unsigned SymtabShndxSectionIndex = 0;
 
     // Sections in the order they are to be output in the section table.
     std::vector<const MCSectionELF *> SectionTable;
@@ -144,7 +142,6 @@ class ELFObjectWriter : public MCObjectWriter {
       Renames.clear();
       Relocations.clear();
       StrTabBuilder.clear();
-      SymtabShndxSectionIndex = 0;
       SectionTable.clear();
       MCObjectWriter::reset();
     }
@@ -232,7 +229,7 @@ class ELFObjectWriter : public MCObjectWriter {
                       uint32_t GroupSymbolIndex, uint64_t Offset, uint64_t Size,
                       const MCSectionELF &Section);
   };
-} // namespace
+}
 
 void ELFObjectWriter::align(unsigned Alignment) {
   uint64_t Padding = OffsetToAlignment(OS.tell(), Alignment);
@@ -787,10 +784,15 @@ void ELFObjectWriter::computeSymbolTable(
                     Renames.count(&Symbol)))
       continue;
 
+    if (Symbol.isTemporary() && Symbol.isUndefined())
+      Ctx.reportFatalError(SMLoc(), "Undefined temporary");
+
     ELFSymbolData MSD;
     MSD.Symbol = cast<MCSymbolELF>(&Symbol);
 
     bool Local = Symbol.getBinding() == ELF::STB_LOCAL;
+    assert(Local || !Symbol.isTemporary());
+
     if (Symbol.isAbsolute()) {
       MSD.SectionIndex = ELF::SHN_ABS;
     } else if (Symbol.isCommon()) {
@@ -837,12 +839,12 @@ void ELFObjectWriter::computeSymbolTable(
     // seems that this information is not easily accessible from the
     // ELFObjectWriter.
     StringRef Name = Symbol.getName();
+    SmallString<32> Buf;
     if (!Name.startswith("?") && !Name.startswith("@?") &&
         !Name.startswith("__imp_?") && !Name.startswith("__imp_@?")) {
       // This symbol isn't following the MSVC C++ name mangling convention. We
       // can thus safely interpret the @@@ in symbol names as specifying symbol
       // versioning.
-      SmallString<32> Buf;
       size_t Pos = Name.find("@@@");
       if (Pos != StringRef::npos) {
         Buf += Name.substr(0, Pos);
@@ -862,6 +864,9 @@ void ELFObjectWriter::computeSymbolTable(
       ExternalSymbolData.push_back(MSD);
   }
 
+  // This holds the .symtab_shndx section index.
+  unsigned SymtabShndxSectionIndex = 0;
+
   if (HasLargeSectionIndex) {
     MCSectionELF *SymtabShndxSection =
         Ctx.getELFSection(".symtab_shndxr", ELF::SHT_SYMTAB_SHNDX, 0, 4, "");
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 34211aa..da6516a 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -54,8 +54,8 @@ STATISTIC(FragmentLayouts, "Number of fragment layouts");
 STATISTIC(ObjectBytes, "Number of emitted object file bytes");
 STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
 STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
-} // namespace stats
-} // namespace
+}
+}
 
 // FIXME FIXME FIXME: There are number of places in this file where we convert
 // what is a 64-bit assembler value used for computation into a value in the
@@ -254,7 +254,7 @@ uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler,
     else { // EndOfFragment > BundleSize
       return 2 * BundleSize - EndOfFragment;
     }
-  } else if (EndOfFragment > BundleSize)
+  } else if (OffsetInBundle > 0 && EndOfFragment > BundleSize)
     return BundleSize - OffsetInBundle;
   else
     return 0;
@@ -581,16 +581,22 @@ void MCAsmLayout::layoutFragment(MCFragment *F) {
   // size won't include the padding.
   //
   // When the -mc-relax-all flag is used, we optimize bundling by writting the
-  // bundle padding directly into fragments when the instructions are emitted
-  // inside the streamer.
+  // padding directly into fragments when the instructions are emitted inside
+  // the streamer. When the fragment is larger than the bundle size, we need to
+  // ensure that it's bundle aligned. This means that if we end up with
+  // multiple fragments, we must emit bundle padding between fragments.
+  //
+  // ".align N" is an example of a directive that introduces multiple
+  // fragments. We could add a special case to handle ".align N" by emitting
+  // within-fragment padding (which would produce less padding when N is less
+  // than the bundle size), but for now we don't.
   //
-  if (Assembler.isBundlingEnabled() && !Assembler.getRelaxAll() &&
-      F->hasInstructions()) {
+  if (Assembler.isBundlingEnabled() && F->hasInstructions()) {
     assert(isa<MCEncodedFragment>(F) &&
            "Only MCEncodedFragment implementations have instructions");
     uint64_t FSize = Assembler.computeFragmentSize(*this, *F);
 
-    if (FSize > Assembler.getBundleAlignSize())
+    if (!Assembler.getRelaxAll() && FSize > Assembler.getBundleAlignSize())
       report_fatal_error("Fragment can't be larger than a bundle size");
 
     uint64_t RequiredBundlePadding = computeBundlePadding(Assembler, F,
diff --git a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
index b9aebfc..68948d3 100644
--- a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
+++ b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
@@ -193,4 +193,4 @@ MCSymbolizer *createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
   return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo,
                                   SymbolLookUp, DisInfo);
 }
-} // namespace llvm
+}
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 4ae2bcf..c84c486 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -1461,7 +1461,7 @@ namespace {
     bool IsSignalFrame;
     bool IsSimple;
   };
-} // namespace
+}
 
 namespace llvm {
   template <>
@@ -1488,7 +1488,7 @@ namespace llvm {
         LHS.IsSimple == RHS.IsSimple;
     }
   };
-} // namespace llvm
+}
 
 void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
                                bool IsEH) {
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index b16245a..a30ceec 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -752,7 +752,7 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
     case MCBinaryExpr::Mul:  Result = LHS * RHS; break;
     case MCBinaryExpr::NE:   Result = LHS != RHS; break;
     case MCBinaryExpr::Or:   Result = LHS | RHS; break;
-    case MCBinaryExpr::Shl:  Result = LHS << RHS; break;
+    case MCBinaryExpr::Shl:  Result = uint64_t(LHS) << uint64_t(RHS); break;
     case MCBinaryExpr::Sub:  Result = LHS - RHS; break;
     case MCBinaryExpr::Xor:  Result = LHS ^ RHS; break;
     }
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index e0f610b..eb2d912 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -36,7 +36,7 @@ namespace {
     void EmitGPRel32Value(const MCExpr *Value) override {}
   };
 
-} // namespace
+}
 
 MCStreamer *llvm::createNullStreamer(MCContext &Context) {
   return new MCNullStreamer(Context);
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index aa3d965..576827a 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -733,6 +733,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
       ".tls$", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
                    COFF::IMAGE_SCN_MEM_WRITE,
       SectionKind::getDataRel());
+	  
+  StackMapSection = Ctx->getCOFFSection(".llvm_stackmaps",
+                                        COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                            COFF::IMAGE_SCN_MEM_READ,
+                                        SectionKind::getReadOnly());	
 }
 
 void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple,
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index a73c171..0a63777 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -124,6 +124,7 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
                                      const SMLoc &Loc) {
   MCStreamer::EmitValueImpl(Value, Size, Loc);
   MCDataFragment *DF = getOrCreateDataFragment();
+  flushPendingLabels(DF, DF->getContents().size());
 
   MCLineEntry::Make(this, getCurrentSection().first);
 
@@ -362,7 +363,9 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
 
 void MCObjectStreamer::EmitBytes(StringRef Data) {
   MCLineEntry::Make(this, getCurrentSection().first);
-  getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+  MCDataFragment *DF = getOrCreateDataFragment();
+  flushPendingLabels(DF, DF->getContents().size());
+  DF->getContents().append(Data.begin(), Data.end());
 }
 
 void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment,
@@ -410,6 +413,7 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
 // Associate GPRel32 fixup with data and resize data area
 void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) {
   MCDataFragment *DF = getOrCreateDataFragment();
+  flushPendingLabels(DF, DF->getContents().size());
 
   DF->getFixups().push_back(MCFixup::create(DF->getContents().size(), 
                                             Value, FK_GPRel_4));
@@ -419,6 +423,7 @@ void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) {
 // Associate GPRel32 fixup with data and resize data area
 void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) {
   MCDataFragment *DF = getOrCreateDataFragment();
+  flushPendingLabels(DF, DF->getContents().size());
 
   DF->getFixups().push_back(MCFixup::create(DF->getContents().size(), 
                                             Value, FK_GPRel_4));
@@ -428,7 +433,9 @@ void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) {
 void MCObjectStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) {
   // FIXME: A MCFillFragment would be more memory efficient but MCExpr has
   //        problems evaluating expressions across multiple fragments.
-  getOrCreateDataFragment()->getContents().append(NumBytes, FillValue);
+  MCDataFragment *DF = getOrCreateDataFragment();
+  flushPendingLabels(DF, DF->getContents().size());
+  DF->getContents().append(NumBytes, FillValue);
 }
 
 void MCObjectStreamer::EmitZeros(uint64_t NumBytes) {
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 9c1062f..04d1413 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -26,6 +26,7 @@
 #include "llvm/MC/MCParser/AsmCond.h"
 #include "llvm/MC/MCParser/AsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserUtils.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionMachO.h"
@@ -233,6 +234,8 @@ public:
   bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
   bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
+  bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
+                             SMLoc &EndLoc) override;
   bool parseAbsoluteExpression(int64_t &Res) override;
 
   /// \brief Parse an identifier or string (as a quoted identifier)
@@ -484,7 +487,7 @@ private:
 
   void initializeDirectiveKindMap();
 };
-} // namespace
+}
 
 namespace llvm {
 
@@ -1065,6 +1068,27 @@ bool AsmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
   return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
 }
 
+bool AsmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
+                                      SMLoc &EndLoc) {
+  if (parseParenExpr(Res, EndLoc))
+    return true;
+
+  for (; ParenDepth > 0; --ParenDepth) {
+    if (parseBinOpRHS(1, Res, EndLoc))
+      return true;
+
+    // We don't Lex() the last RParen.
+    // This is the same behavior as parseParenExpression().
+    if (ParenDepth - 1 > 0) {
+      if (Lexer.isNot(AsmToken::RParen))
+        return TokError("expected ')' in parentheses expression");
+      EndLoc = Lexer.getTok().getEndLoc();
+      Lex();
+    }
+  }
+  return false;
+}
+
 bool AsmParser::parseAbsoluteExpression(int64_t &Res) {
   const MCExpr *Expr;
 
@@ -1622,8 +1646,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
     if (ActiveMacros.empty())
       Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
     else
-      Line = SrcMgr.FindLineNumber(ActiveMacros.back()->InstantiationLoc,
-                                   ActiveMacros.back()->ExitBuffer);
+      Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
+                                   ActiveMacros.front()->ExitBuffer);
 
     // If we previously parsed a cpp hash file line comment then make sure the
     // current Dwarf File is for the CppHashFilename if not then emit the
@@ -1944,7 +1968,7 @@ public:
 private:
   AsmLexer &Lexer;
 };
-} // namespace
+}
 
 bool AsmParser::parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg) {
 
@@ -2178,82 +2202,20 @@ void AsmParser::handleMacroExit() {
   ActiveMacros.pop_back();
 }
 
-static bool isUsedIn(const MCSymbol *Sym, const MCExpr *Value) {
-  switch (Value->getKind()) {
-  case MCExpr::Binary: {
-    const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Value);
-    return isUsedIn(Sym, BE->getLHS()) || isUsedIn(Sym, BE->getRHS());
-  }
-  case MCExpr::Target:
-  case MCExpr::Constant:
-    return false;
-  case MCExpr::SymbolRef: {
-    const MCSymbol &S =
-        static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
-    if (S.isVariable())
-      return isUsedIn(Sym, S.getVariableValue());
-    return &S == Sym;
-  }
-  case MCExpr::Unary:
-    return isUsedIn(Sym, static_cast<const MCUnaryExpr *>(Value)->getSubExpr());
-  }
-
-  llvm_unreachable("Unknown expr kind!");
-}
-
 bool AsmParser::parseAssignment(StringRef Name, bool allow_redef,
                                 bool NoDeadStrip) {
-  // FIXME: Use better location, we should use proper tokens.
-  SMLoc EqualLoc = Lexer.getLoc();
-
+  MCSymbol *Sym;
   const MCExpr *Value;
-  if (parseExpression(Value))
+  if (MCParserUtils::parseAssignmentExpression(Name, allow_redef, *this, Sym,
+                                               Value))
     return true;
 
-  // Note: we don't count b as used in "a = b". This is to allow
-  // a = b
-  // b = c
-
-  if (Lexer.isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in assignment");
-
-  // Eat the end of statement marker.
-  Lex();
-
-  // Validate that the LHS is allowed to be a variable (either it has not been
-  // used as a symbol, or it is an absolute symbol).
-  MCSymbol *Sym = getContext().lookupSymbol(Name);
-  if (Sym) {
-    // Diagnose assignment to a label.
-    //
-    // FIXME: Diagnostics. Note the location of the definition as a label.
-    // FIXME: Diagnose assignment to protected identifier (e.g., register name).
-    if (isUsedIn(Sym, Value))
-      return Error(EqualLoc, "Recursive use of '" + Name + "'");
-    else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
-      ; // Allow redefinitions of undefined symbols only used in directives.
-    else if (Sym->isVariable() && !Sym->isUsed() && allow_redef)
-      ; // Allow redefinitions of variables that haven't yet been used.
-    else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef))
-      return Error(EqualLoc, "redefinition of '" + Name + "'");
-    else if (!Sym->isVariable())
-      return Error(EqualLoc, "invalid assignment to '" + Name + "'");
-    else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
-      return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
-                                 Name + "'");
-
-    // Don't count these checks as uses.
-    Sym->setUsed(false);
-  } else if (Name == ".") {
-    if (Out.EmitValueToOffset(Value, 0)) {
-      Error(EqualLoc, "expected absolute expression");
-      eatToEndOfStatement();
-    }
+  if (!Sym) {
+    // In the case where we parse an expression starting with a '.', we will
+    // not generate an error, nor will we create a symbol.  In this case we
+    // should just return out.
     return false;
-  } else
-    Sym = getContext().getOrCreateSymbol(Name);
-
-  Sym->setRedefinable(allow_redef);
+  }
 
   // Do the assignment.
   Out.EmitAssignment(Sym, Value);
@@ -4777,6 +4739,103 @@ bool AsmParser::parseMSInlineAsm(
   return false;
 }
 
+namespace llvm {
+namespace MCParserUtils {
+
+/// Returns whether the given symbol is used anywhere in the given expression,
+/// or subexpressions.
+static bool isSymbolUsedInExpression(const MCSymbol *Sym, const MCExpr *Value) {
+  switch (Value->getKind()) {
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Value);
+    return isSymbolUsedInExpression(Sym, BE->getLHS()) ||
+           isSymbolUsedInExpression(Sym, BE->getRHS());
+  }
+  case MCExpr::Target:
+  case MCExpr::Constant:
+    return false;
+  case MCExpr::SymbolRef: {
+    const MCSymbol &S =
+        static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
+    if (S.isVariable())
+      return isSymbolUsedInExpression(Sym, S.getVariableValue());
+    return &S == Sym;
+  }
+  case MCExpr::Unary:
+    return isSymbolUsedInExpression(
+        Sym, static_cast<const MCUnaryExpr *>(Value)->getSubExpr());
+  }
+
+  llvm_unreachable("Unknown expr kind!");
+}
+
+bool parseAssignmentExpression(StringRef Name, bool allow_redef,
+                               MCAsmParser &Parser, MCSymbol *&Sym,
+                               const MCExpr *&Value) {
+  MCAsmLexer &Lexer = Parser.getLexer();
+
+  // FIXME: Use better location, we should use proper tokens.
+  SMLoc EqualLoc = Lexer.getLoc();
+
+  if (Parser.parseExpression(Value)) {
+    Parser.TokError("missing expression");
+    Parser.eatToEndOfStatement();
+    return true;
+  }
+
+  // Note: we don't count b as used in "a = b". This is to allow
+  // a = b
+  // b = c
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return Parser.TokError("unexpected token in assignment");
+
+  // Eat the end of statement marker.
+  Parser.Lex();
+
+  // Validate that the LHS is allowed to be a variable (either it has not been
+  // used as a symbol, or it is an absolute symbol).
+  Sym = Parser.getContext().lookupSymbol(Name);
+  if (Sym) {
+    // Diagnose assignment to a label.
+    //
+    // FIXME: Diagnostics. Note the location of the definition as a label.
+    // FIXME: Diagnose assignment to protected identifier (e.g., register name).
+    if (isSymbolUsedInExpression(Sym, Value))
+      return Parser.Error(EqualLoc, "Recursive use of '" + Name + "'");
+    else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
+      ; // Allow redefinitions of undefined symbols only used in directives.
+    else if (Sym->isVariable() && !Sym->isUsed() && allow_redef)
+      ; // Allow redefinitions of variables that haven't yet been used.
+    else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef))
+      return Parser.Error(EqualLoc, "redefinition of '" + Name + "'");
+    else if (!Sym->isVariable())
+      return Parser.Error(EqualLoc, "invalid assignment to '" + Name + "'");
+    else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
+      return Parser.Error(EqualLoc,
+                          "invalid reassignment of non-absolute variable '" +
+                              Name + "'");
+
+    // Don't count these checks as uses.
+    Sym->setUsed(false);
+  } else if (Name == ".") {
+    if (Parser.getStreamer().EmitValueToOffset(Value, 0)) {
+      Parser.Error(EqualLoc, "expected absolute expression");
+      Parser.eatToEndOfStatement();
+      return true;
+    }
+    return false;
+  } else
+    Sym = Parser.getContext().getOrCreateSymbol(Name);
+
+  Sym->setRedefinable(allow_redef);
+
+  return false;
+}
+
+} // namespace MCParserUtils
+} // namespace llvm
+
 /// \brief Create an MCAsmParser instance.
 MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM, MCContext &C,
                                      MCStreamer &Out, const MCAsmInfo &MAI) {
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index 1480f5b..f09bce0 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -145,7 +145,7 @@ public:
   COFFAsmParser() {}
 };
 
-} // namespace
+} // end annonomous namespace.
 
 static SectionKind computeSectionKind(unsigned Flags) {
   if (Flags & COFF::IMAGE_SCN_MEM_EXECUTE)
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index e131b23..5f8a603 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -154,7 +154,7 @@ private:
   unsigned parseSunStyleSectionFlags();
 };
 
-} // namespace
+}
 
 /// ParseDirectiveSymbolAttribute
 ///  ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ]
@@ -593,10 +593,16 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
     Lex();
 
   if (getLexer().isNot(AsmToken::Identifier) &&
-      getLexer().isNot(AsmToken::Hash) && getLexer().isNot(AsmToken::At) &&
-      getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::String))
-    return TokError("expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', '@<type>', "
-                    "'%<type>' or \"<type>\"");
+      getLexer().isNot(AsmToken::Hash) &&
+      getLexer().isNot(AsmToken::Percent) &&
+      getLexer().isNot(AsmToken::String)) {
+    if (!getLexer().getAllowAtInIdentifier())
+      return TokError("expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', "
+                      "'%<type>' or \"<type>\"");
+    else if (getLexer().isNot(AsmToken::At))
+      return TokError("expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', '@<type>', "
+                      "'%<type>' or \"<type>\"");
+  }
 
   if (getLexer().isNot(AsmToken::String) &&
       getLexer().isNot(AsmToken::Identifier))
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index 4484221..affc574 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -19,6 +19,9 @@ using namespace llvm;
 // Sentinel value for the absolute pseudo section.
 MCSection *MCSymbol::AbsolutePseudoSection = reinterpret_cast<MCSection *>(1);
 
+const unsigned MCSymbol::NumCommonAlignmentBits;
+const unsigned MCSymbol::NumFlagsBits;
+
 void *MCSymbol::operator new(size_t s, const StringMapEntry<bool> *Name,
                              MCContext &Ctx) {
   // We may need more space for a Name to account for alignment.  So allocate
@@ -40,8 +43,12 @@ void *MCSymbol::operator new(size_t s, const StringMapEntry<bool> *Name,
 void MCSymbol::setVariableValue(const MCExpr *Value) {
   assert(!IsUsed && "Cannot set a variable that has already been used.");
   assert(Value && "Invalid variable value!");
+  assert((SymbolContents == SymContentsUnset ||
+          SymbolContents == SymContentsVariable) &&
+         "Cannot give common/offset symbol a variable value");
   this->Value = Value;
-  SectionOrFragment = nullptr;
+  SymbolContents = SymContentsVariable;
+  setUndefined();
 }
 
 void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
diff --git a/lib/MC/MCSymbolELF.cpp b/lib/MC/MCSymbolELF.cpp
index 6ec70ed..ec7ef44 100644
--- a/lib/MC/MCSymbolELF.cpp
+++ b/lib/MC/MCSymbolELF.cpp
@@ -38,7 +38,7 @@ enum {
   // One bit.
   ELF_BindingSet_Shift = 12
 };
-} // namespace
+}
 
 void MCSymbolELF::setBinding(unsigned Binding) const {
   setIsBindingSet();
@@ -198,4 +198,4 @@ void MCSymbolELF::setIsBindingSet() const {
 bool MCSymbolELF::isBindingSet() const {
   return getFlags() & (0x1 << ELF_BindingSet_Shift);
 }
-} // namespace llvm
+}
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index d8280c7..1b73b7a 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -247,6 +247,6 @@ void UnwindEmitter::EmitUnwindInfo(MCStreamer &Streamer,
 
   llvm::EmitUnwindInfo(Streamer, info);
 }
-} // namespace Win64EH
+}
 } // End of namespace llvm
 
diff --git a/lib/MC/MCWinEH.cpp b/lib/MC/MCWinEH.cpp
index 9cf2edf..d5d9ead 100644
--- a/lib/MC/MCWinEH.cpp
+++ b/lib/MC/MCWinEH.cpp
@@ -74,6 +74,6 @@ MCSection *UnwindEmitter::getXDataSection(const MCSymbol *Function,
   return getUnwindInfoSection(".xdata", XData, Function, Context);
 }
 
-} // namespace WinEH
-} // namespace llvm
+}
+}
 
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 5bc1404..56ef1c7 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -191,7 +191,7 @@ public:
 
   void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
 };
-} // namespace
+}
 
 static inline void write_uint32_le(void *Data, uint32_t Value) {
   support::endian::write<uint32_t, support::little, support::unaligned>(Data,
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index 4ecdc3b..36dd691 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -291,5 +291,5 @@ LLVM_ATTRIBUTE_NORETURN
 void MCWinCOFFStreamer::FatalError(const Twine &Msg) const {
   getContext().reportFatalError(SMLoc(), Msg);
 }
-} // namespace llvm
+}
 
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index de80918..8f10143 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_library(LLVMObject
   ObjectFile.cpp
   RecordStreamer.cpp
   SymbolicFile.cpp
+  SymbolSize.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Object
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index e2f559e..64bb0d5 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -144,68 +145,62 @@ void COFFObjectFile::moveSymbolNext(DataRefImpl &Ref) const {
   }
 }
 
-std::error_code COFFObjectFile::getSymbolName(DataRefImpl Ref,
-                                              StringRef &Result) const {
+ErrorOr<StringRef> COFFObjectFile::getSymbolName(DataRefImpl Ref) const {
   COFFSymbolRef Symb = getCOFFSymbol(Ref);
-  return getSymbolName(Symb, Result);
+  StringRef Result;
+  std::error_code EC = getSymbolName(Symb, Result);
+  if (EC)
+    return EC;
+  return Result;
+}
+
+uint64_t COFFObjectFile::getSymbolValue(DataRefImpl Ref) const {
+  COFFSymbolRef Sym = getCOFFSymbol(Ref);
+
+  if (Sym.isAnyUndefined() || Sym.isCommon())
+    return UnknownAddress;
+
+  return Sym.getValue();
 }
 
 std::error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref,
                                                  uint64_t &Result) const {
+  Result = getSymbolValue(Ref);
   COFFSymbolRef Symb = getCOFFSymbol(Ref);
-
-  if (Symb.isAnyUndefined()) {
-    Result = UnknownAddressOrSize;
-    return std::error_code();
-  }
-  if (Symb.isCommon()) {
-    Result = UnknownAddressOrSize;
-    return std::error_code();
-  }
   int32_t SectionNumber = Symb.getSectionNumber();
-  if (!COFF::isReservedSectionNumber(SectionNumber)) {
-    const coff_section *Section = nullptr;
-    if (std::error_code EC = getSection(SectionNumber, Section))
-      return EC;
 
-    Result = Section->VirtualAddress + Symb.getValue();
+  if (Symb.isAnyUndefined() || Symb.isCommon() ||
+      COFF::isReservedSectionNumber(SectionNumber))
     return std::error_code();
-  }
 
-  Result = Symb.getValue();
+  const coff_section *Section = nullptr;
+  if (std::error_code EC = getSection(SectionNumber, Section))
+    return EC;
+  Result += Section->VirtualAddress;
   return std::error_code();
 }
 
-std::error_code COFFObjectFile::getSymbolType(DataRefImpl Ref,
-                                              SymbolRef::Type &Result) const {
+SymbolRef::Type COFFObjectFile::getSymbolType(DataRefImpl Ref) const {
   COFFSymbolRef Symb = getCOFFSymbol(Ref);
   int32_t SectionNumber = Symb.getSectionNumber();
-  Result = SymbolRef::ST_Other;
-
-  if (Symb.isAnyUndefined()) {
-    Result = SymbolRef::ST_Unknown;
-  } else if (Symb.isFunctionDefinition()) {
-    Result = SymbolRef::ST_Function;
-  } else if (Symb.isCommon()) {
-    Result = SymbolRef::ST_Data;
-  } else if (Symb.isFileRecord()) {
-    Result = SymbolRef::ST_File;
-  } else if (SectionNumber == COFF::IMAGE_SYM_DEBUG ||
-             Symb.isSectionDefinition()) {
-    // TODO: perhaps we need a new symbol type ST_Section.
-    Result = SymbolRef::ST_Debug;
-  } else if (!COFF::isReservedSectionNumber(SectionNumber)) {
-    const coff_section *Section = nullptr;
-    if (std::error_code EC = getSection(SectionNumber, Section))
-      return EC;
-    uint32_t Characteristics = Section->Characteristics;
-    if (Characteristics & COFF::IMAGE_SCN_CNT_CODE)
-      Result = SymbolRef::ST_Function;
-    else if (Characteristics & (COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                                COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA))
-      Result = SymbolRef::ST_Data;
-  }
-  return std::error_code();
+
+  if (Symb.isAnyUndefined())
+    return SymbolRef::ST_Unknown;
+  if (Symb.isFunctionDefinition())
+    return SymbolRef::ST_Function;
+  if (Symb.isCommon())
+    return SymbolRef::ST_Data;
+  if (Symb.isFileRecord())
+    return SymbolRef::ST_File;
+
+  // TODO: perhaps we need a new symbol type ST_Section.
+  if (SectionNumber == COFF::IMAGE_SYM_DEBUG || Symb.isSectionDefinition())
+    return SymbolRef::ST_Debug;
+
+  if (!COFF::isReservedSectionNumber(SectionNumber))
+    return SymbolRef::ST_Data;
+
+  return SymbolRef::ST_Other;
 }
 
 uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const {
@@ -236,12 +231,9 @@ uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const {
   return Result;
 }
 
-uint64_t COFFObjectFile::getSymbolSize(DataRefImpl Ref) const {
+uint64_t COFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Ref) const {
   COFFSymbolRef Symb = getCOFFSymbol(Ref);
-
-  if (Symb.isCommon())
-    return Symb.getValue();
-  return UnknownAddressOrSize;
+  return Symb.getValue();
 }
 
 std::error_code
@@ -261,6 +253,11 @@ COFFObjectFile::getSymbolSection(DataRefImpl Ref,
   return std::error_code();
 }
 
+unsigned COFFObjectFile::getSymbolSectionID(SymbolRef Sym) const {
+  COFFSymbolRef Symb = getCOFFSymbol(Sym.getRawDataRefImpl());
+  return Symb.getSectionNumber();
+}
+
 void COFFObjectFile::moveSectionNext(DataRefImpl &Ref) const {
   const coff_section *Sec = toSec(Ref);
   Sec += 1;
@@ -314,6 +311,13 @@ bool COFFObjectFile::isSectionBSS(DataRefImpl Ref) const {
   return (Sec->Characteristics & BssFlags) == BssFlags;
 }
 
+unsigned COFFObjectFile::getSectionID(SectionRef Sec) const {
+  uintptr_t Offset =
+      uintptr_t(Sec.getRawDataRefImpl().p) - uintptr_t(SectionTable);
+  assert((Offset % sizeof(coff_section)) == 0);
+  return (Offset / sizeof(coff_section)) + 1;
+}
+
 bool COFFObjectFile::isSectionVirtual(DataRefImpl Ref) const {
   const coff_section *Sec = toSec(Ref);
   // In COFF, a virtual section won't have any in-file 
@@ -321,14 +325,6 @@ bool COFFObjectFile::isSectionVirtual(DataRefImpl Ref) const {
   return Sec->PointerToRawData == 0;
 }
 
-bool COFFObjectFile::sectionContainsSymbol(DataRefImpl SecRef,
-                                           DataRefImpl SymbRef) const {
-  const coff_section *Sec = toSec(SecRef);
-  COFFSymbolRef Symb = getCOFFSymbol(SymbRef);
-  int32_t SecNumber = (Sec - SectionTable) + 1;
-  return SecNumber == Symb.getSectionNumber();
-}
-
 static uint32_t getNumberOfRelocations(const coff_section *Sec,
                                        MemoryBufferRef M, const uint8_t *base) {
   // The field for the number of relocations in COFF section table is only
@@ -846,20 +842,24 @@ std::error_code COFFObjectFile::getString(uint32_t Offset,
 
 std::error_code COFFObjectFile::getSymbolName(COFFSymbolRef Symbol,
                                               StringRef &Res) const {
+  return getSymbolName(Symbol.getGeneric(), Res);
+}
+
+std::error_code COFFObjectFile::getSymbolName(const coff_symbol_generic *Symbol,
+                                              StringRef &Res) const {
   // Check for string table entry. First 4 bytes are 0.
-  if (Symbol.getStringTableOffset().Zeroes == 0) {
-    uint32_t Offset = Symbol.getStringTableOffset().Offset;
-    if (std::error_code EC = getString(Offset, Res))
+  if (Symbol->Name.Offset.Zeroes == 0) {
+    if (std::error_code EC = getString(Symbol->Name.Offset.Offset, Res))
       return EC;
     return std::error_code();
   }
 
-  if (Symbol.getShortName()[COFF::NameSize - 1] == 0)
+  if (Symbol->Name.ShortName[COFF::NameSize - 1] == 0)
     // Null terminated, let ::strlen figure out the length.
-    Res = StringRef(Symbol.getShortName());
+    Res = StringRef(Symbol->Name.ShortName);
   else
     // Not null terminated, use all 8 bytes.
-    Res = StringRef(Symbol.getShortName(), COFF::NameSize);
+    Res = StringRef(Symbol->Name.ShortName, COFF::NameSize);
   return std::error_code();
 }
 
@@ -961,20 +961,13 @@ void COFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
             reinterpret_cast<const coff_relocation*>(Rel.p) + 1);
 }
 
-std::error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel,
-                                                     uint64_t &Res) const {
+ErrorOr<uint64_t> COFFObjectFile::getRelocationAddress(DataRefImpl Rel) const {
   report_fatal_error("getRelocationAddress not implemented in COFFObjectFile");
 }
 
-std::error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel,
-                                                    uint64_t &Res) const {
+uint64_t COFFObjectFile::getRelocationOffset(DataRefImpl Rel) const {
   const coff_relocation *R = toRel(Rel);
-  const support::ulittle32_t *VirtualAddressPtr;
-  if (std::error_code EC =
-          getObject(VirtualAddressPtr, Data, &R->VirtualAddress))
-    return EC;
-  Res = *VirtualAddressPtr;
-  return std::error_code();
+  return R->VirtualAddress;
 }
 
 symbol_iterator COFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
@@ -991,11 +984,9 @@ symbol_iterator COFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
   return symbol_iterator(SymbolRef(Ref, this));
 }
 
-std::error_code COFFObjectFile::getRelocationType(DataRefImpl Rel,
-                                                  uint64_t &Res) const {
+uint64_t COFFObjectFile::getRelocationType(DataRefImpl Rel) const {
   const coff_relocation* R = toRel(Rel);
-  Res = R->Type;
-  return std::error_code();
+  return R->Type;
 }
 
 const coff_section *
@@ -1020,14 +1011,22 @@ COFFObjectFile::getCOFFRelocation(const RelocationRef &Reloc) const {
   return toRel(Reloc.getRawDataRefImpl());
 }
 
+iterator_range<const coff_relocation *>
+COFFObjectFile::getRelocations(const coff_section *Sec) const {
+  const coff_relocation *I = getFirstReloc(Sec, Data, base());
+  const coff_relocation *E = I;
+  if (I)
+    E += getNumberOfRelocations(Sec, Data, base());
+  return make_range(I, E);
+}
+
 #define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(reloc_type)                           \
   case COFF::reloc_type:                                                       \
     Res = #reloc_type;                                                         \
     break;
 
-std::error_code
-COFFObjectFile::getRelocationTypeName(DataRefImpl Rel,
-                                      SmallVectorImpl<char> &Result) const {
+void COFFObjectFile::getRelocationTypeName(
+    DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
   const coff_relocation *Reloc = toRel(Rel);
   StringRef Res;
   switch (getMachine()) {
@@ -1096,7 +1095,6 @@ COFFObjectFile::getRelocationTypeName(DataRefImpl Rel,
     Res = "Unknown";
   }
   Result.append(Res.begin(), Res.end());
-  return std::error_code();
 }
 
 #undef LLVM_COFF_SWITCH_RELOC_TYPE_NAME
diff --git a/lib/Object/COFFYAML.cpp b/lib/Object/COFFYAML.cpp
index dda4b7f..9a24b53 100644
--- a/lib/Object/COFFYAML.cpp
+++ b/lib/Object/COFFYAML.cpp
@@ -335,7 +335,7 @@ struct NDLLCharacteristics {
   COFF::DLLCharacteristics Characteristics;
 };
 
-} // namespace
+}
 
 void MappingTraits<COFFYAML::Relocation>::mapping(IO &IO,
                                                   COFFYAML::Relocation &Rel) {
@@ -497,5 +497,5 @@ void MappingTraits<COFFYAML::Object>::mapping(IO &IO, COFFYAML::Object &Obj) {
   IO.mapRequired("symbols", Obj.Symbols);
 }
 
-} // namespace yaml
-} // namespace llvm
+}
+}
diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp
index 50730a9..ecdd468 100644
--- a/lib/Object/ELFYAML.cpp
+++ b/lib/Object/ELFYAML.cpp
@@ -590,7 +590,7 @@ struct NormalizedOther {
   ELFYAML::ELF_STV Visibility;
   ELFYAML::ELF_STO Other;
 };
-} // namespace
+}
 
 void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) {
   IO.mapOptional("Name", Symbol.Name, StringRef());
@@ -723,7 +723,7 @@ struct NormalizedMips64RelType {
   ELFYAML::ELF_REL Type3;
   ELFYAML::ELF_RSS SpecSym;
 };
-} // namespace
+}
 
 void MappingTraits<ELFYAML::Relocation>::mapping(IO &IO,
                                                  ELFYAML::Relocation &Rel) {
diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp
index 644a178..7ca2f12 100644
--- a/lib/Object/Error.cpp
+++ b/lib/Object/Error.cpp
@@ -41,6 +41,10 @@ std::string _object_error_category::message(int EV) const {
     return "Invalid data was encountered while parsing the file";
   case object_error::unexpected_eof:
     return "The end of the file was unexpectedly encountered";
+  case object_error::string_table_non_null_end:
+    return "String table must end with a null terminator";
+  case object_error::invalid_section_index:
+    return "Invalid section index";
   case object_error::bitcode_section_not_found:
     return "Bitcode section not found in object file";
   case object_error::macho_small_load_command:
diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp
index e90e08d..9f5132e 100644
--- a/lib/Object/IRObjectFile.cpp
+++ b/lib/Object/IRObjectFile.cpp
@@ -37,9 +37,7 @@ using namespace object;
 
 IRObjectFile::IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> Mod)
     : SymbolicFile(Binary::ID_IR, Object), M(std::move(Mod)) {
-  // Setup a mangler with the DataLayout.
-  const DataLayout &DL = M->getDataLayout();
-  Mang.reset(new Mangler(&DL));
+  Mang.reset(new Mangler());
 
   const std::string &InlineAsm = M->getModuleInlineAsm();
   if (InlineAsm.empty())
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index f76dd0d..4255ed7 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -327,16 +327,14 @@ void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
   Symb.p += SymbolTableEntrySize;
 }
 
-std::error_code MachOObjectFile::getSymbolName(DataRefImpl Symb,
-                                               StringRef &Res) const {
+ErrorOr<StringRef> MachOObjectFile::getSymbolName(DataRefImpl Symb) const {
   StringRef StringTable = getStringTableData();
   MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb);
   const char *Start = &StringTable.data()[Entry.n_strx];
   if (Start < getData().begin() || Start >= getData().end())
     report_fatal_error(
         "Symbol name entry points before beginning or past end of file.");
-  Res = StringRef(Start);
-  return std::error_code();
+  return StringRef(Start);
 }
 
 unsigned MachOObjectFile::getSectionType(SectionRef Sec) const {
@@ -345,23 +343,24 @@ unsigned MachOObjectFile::getSectionType(SectionRef Sec) const {
   return Flags & MachO::SECTION_TYPE;
 }
 
+uint64_t MachOObjectFile::getNValue(DataRefImpl Sym) const {
+  if (is64Bit()) {
+    MachO::nlist_64 Entry = getSymbol64TableEntry(Sym);
+    return Entry.n_value;
+  }
+  MachO::nlist Entry = getSymbolTableEntry(Sym);
+  return Entry.n_value;
+}
+
 // getIndirectName() returns the name of the alias'ed symbol who's string table
 // index is in the n_value field.
 std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb,
                                                  StringRef &Res) const {
   StringRef StringTable = getStringTableData();
-  uint64_t NValue;
-  if (is64Bit()) {
-    MachO::nlist_64 Entry = getSymbol64TableEntry(Symb);
-    NValue = Entry.n_value;
-    if ((Entry.n_type & MachO::N_TYPE) != MachO::N_INDR)
-      return object_error::parse_failed;
-  } else {
-    MachO::nlist Entry = getSymbolTableEntry(Symb);
-    NValue = Entry.n_value;
-    if ((Entry.n_type & MachO::N_TYPE) != MachO::N_INDR)
-      return object_error::parse_failed;
-  }
+  MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb);
+  if ((Entry.n_type & MachO::N_TYPE) != MachO::N_INDR)
+    return object_error::parse_failed;
+  uint64_t NValue = getNValue(Symb);
   if (NValue >= StringTable.size())
     return object_error::parse_failed;
   const char *Start = &StringTable.data()[NValue];
@@ -369,23 +368,17 @@ std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb,
   return std::error_code();
 }
 
-std::error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb,
+uint64_t MachOObjectFile::getSymbolValue(DataRefImpl Sym) const {
+  uint64_t NValue = getNValue(Sym);
+  MachO::nlist_base Entry = getSymbolTableEntryBase(this, Sym);
+  if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0)
+    return UnknownAddress;
+  return NValue;
+}
+
+std::error_code MachOObjectFile::getSymbolAddress(DataRefImpl Sym,
                                                   uint64_t &Res) const {
-  if (is64Bit()) {
-    MachO::nlist_64 Entry = getSymbol64TableEntry(Symb);
-    if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF &&
-        Entry.n_value == 0)
-      Res = UnknownAddressOrSize;
-    else
-      Res = Entry.n_value;
-  } else {
-    MachO::nlist Entry = getSymbolTableEntry(Symb);
-    if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF &&
-        Entry.n_value == 0)
-      Res = UnknownAddressOrSize;
-    else
-      Res = Entry.n_value;
-  }
+  Res = getSymbolValue(Sym);
   return std::error_code();
 }
 
@@ -398,37 +391,27 @@ uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const {
   return 0;
 }
 
-uint64_t MachOObjectFile::getSymbolSize(DataRefImpl DRI) const {
+uint64_t MachOObjectFile::getCommonSymbolSizeImpl(DataRefImpl DRI) const {
   uint64_t Value;
   getSymbolAddress(DRI, Value);
-  uint32_t flags = getSymbolFlags(DRI);
-  if (flags & SymbolRef::SF_Common)
-    return Value;
-  return UnknownAddressOrSize;
+  return Value;
 }
 
-std::error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
-                                               SymbolRef::Type &Res) const {
+SymbolRef::Type MachOObjectFile::getSymbolType(DataRefImpl Symb) const {
   MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb);
   uint8_t n_type = Entry.n_type;
 
-  Res = SymbolRef::ST_Other;
-
   // If this is a STAB debugging symbol, we can do nothing more.
-  if (n_type & MachO::N_STAB) {
-    Res = SymbolRef::ST_Debug;
-    return std::error_code();
-  }
+  if (n_type & MachO::N_STAB)
+    return SymbolRef::ST_Debug;
 
   switch (n_type & MachO::N_TYPE) {
     case MachO::N_UNDF :
-      Res = SymbolRef::ST_Unknown;
-      break;
+      return SymbolRef::ST_Unknown;
     case MachO::N_SECT :
-      Res = SymbolRef::ST_Function;
-      break;
+      return SymbolRef::ST_Function;
   }
-  return std::error_code();
+  return SymbolRef::ST_Other;
 }
 
 uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
@@ -453,7 +436,7 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
     if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) {
       uint64_t Value;
       getSymbolAddress(DRI, Value);
-      if (Value && Value != UnknownAddressOrSize)
+      if (Value && Value != UnknownAddress)
         Result |= SymbolRef::SF_Common;
     }
 
@@ -491,6 +474,12 @@ std::error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
   return std::error_code();
 }
 
+unsigned MachOObjectFile::getSymbolSectionID(SymbolRef Sym) const {
+  MachO::nlist_base Entry =
+      getSymbolTableEntryBase(this, Sym.getRawDataRefImpl());
+  return Entry.n_sect - 1;
+}
+
 void MachOObjectFile::moveSectionNext(DataRefImpl &Sec) const {
   Sec.d.a++;
 }
@@ -567,27 +556,15 @@ bool MachOObjectFile::isSectionBSS(DataRefImpl Sec) const {
           SectionType == MachO::S_GB_ZEROFILL);
 }
 
+unsigned MachOObjectFile::getSectionID(SectionRef Sec) const {
+  return Sec.getRawDataRefImpl().d.a;
+}
+
 bool MachOObjectFile::isSectionVirtual(DataRefImpl Sec) const {
   // FIXME: Unimplemented.
   return false;
 }
 
-bool MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
-                                            DataRefImpl Symb) const {
-  SymbolRef::Type ST;
-  this->getSymbolType(Symb, ST);
-  if (ST == SymbolRef::ST_Unknown)
-    return false;
-
-  uint64_t SectBegin = getSectionAddress(Sec);
-  uint64_t SectEnd = getSectionSize(Sec);
-  SectEnd += SectBegin;
-
-  uint64_t SymAddr;
-  getSymbolAddress(Symb, SymAddr);
-  return (SymAddr >= SectBegin) && (SymAddr < SectEnd);
-}
-
 relocation_iterator MachOObjectFile::section_rel_begin(DataRefImpl Sec) const {
   DataRefImpl Ret;
   Ret.d.a = Sec.d.a;
@@ -616,25 +593,20 @@ void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
   ++Rel.d.b;
 }
 
-std::error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
-                                                      uint64_t &Res) const {
-  uint64_t Offset;
-  getRelocationOffset(Rel, Offset);
+ErrorOr<uint64_t> MachOObjectFile::getRelocationAddress(DataRefImpl Rel) const {
+  uint64_t Offset = getRelocationOffset(Rel);
 
   DataRefImpl Sec;
   Sec.d.a = Rel.d.a;
   uint64_t SecAddress = getSectionAddress(Sec);
-  Res = SecAddress + Offset;
-  return std::error_code();
+  return SecAddress + Offset;
 }
 
-std::error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
-                                                     uint64_t &Res) const {
+uint64_t MachOObjectFile::getRelocationOffset(DataRefImpl Rel) const {
   assert(getHeader().filetype == MachO::MH_OBJECT &&
          "Only implemented for MH_OBJECT");
   MachO::any_relocation_info RE = getRelocation(Rel);
-  Res = getAnyRelocationAddress(RE);
-  return std::error_code();
+  return getAnyRelocationAddress(RE);
 }
 
 symbol_iterator
@@ -663,19 +635,15 @@ MachOObjectFile::getRelocationSection(DataRefImpl Rel) const {
   return section_iterator(getAnyRelocationSection(getRelocation(Rel)));
 }
 
-std::error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
-                                                   uint64_t &Res) const {
+uint64_t MachOObjectFile::getRelocationType(DataRefImpl Rel) const {
   MachO::any_relocation_info RE = getRelocation(Rel);
-  Res = getAnyRelocationType(RE);
-  return std::error_code();
+  return getAnyRelocationType(RE);
 }
 
-std::error_code
-MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
-                                       SmallVectorImpl<char> &Result) const {
+void MachOObjectFile::getRelocationTypeName(
+    DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
   StringRef res;
-  uint64_t RType;
-  getRelocationType(Rel, RType);
+  uint64_t RType = getRelocationType(Rel);
 
   unsigned Arch = this->getArch();
 
@@ -779,35 +747,6 @@ MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
       break;
   }
   Result.append(res.begin(), res.end());
-  return std::error_code();
-}
-
-std::error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
-                                                     bool &Result) const {
-  unsigned Arch = getArch();
-  uint64_t Type;
-  getRelocationType(Rel, Type);
-
-  Result = false;
-
-  // On arches that use the generic relocations, GENERIC_RELOC_PAIR
-  // is always hidden.
-  if (Arch == Triple::x86 || Arch == Triple::arm || Arch == Triple::ppc) {
-    if (Type == MachO::GENERIC_RELOC_PAIR) Result = true;
-  } else if (Arch == Triple::x86_64) {
-    // On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows
-    // an X86_64_RELOC_SUBTRACTOR.
-    if (Type == MachO::X86_64_RELOC_UNSIGNED && Rel.d.a > 0) {
-      DataRefImpl RelPrev = Rel;
-      RelPrev.d.a--;
-      uint64_t PrevType;
-      getRelocationType(RelPrev, PrevType);
-      if (PrevType == MachO::X86_64_RELOC_SUBTRACTOR)
-        Result = true;
-    }
-  }
-
-  return std::error_code();
 }
 
 uint8_t MachOObjectFile::getRelocationLength(DataRefImpl Rel) const {
diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
index 2705e7d..1d0e69e 100644
--- a/lib/Object/MachOUniversal.cpp
+++ b/lib/Object/MachOUniversal.cpp
@@ -123,25 +123,13 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source,
   ec = std::error_code();
 }
 
-static bool getCTMForArch(Triple::ArchType Arch, MachO::CPUType &CTM) {
-  switch (Arch) {
-    case Triple::x86:    CTM = MachO::CPU_TYPE_I386; return true;
-    case Triple::x86_64: CTM = MachO::CPU_TYPE_X86_64; return true;
-    case Triple::arm:    CTM = MachO::CPU_TYPE_ARM; return true;
-    case Triple::sparc:  CTM = MachO::CPU_TYPE_SPARC; return true;
-    case Triple::ppc:    CTM = MachO::CPU_TYPE_POWERPC; return true;
-    case Triple::ppc64:  CTM = MachO::CPU_TYPE_POWERPC64; return true;
-    default: return false;
-  }
-}
-
 ErrorOr<std::unique_ptr<MachOObjectFile>>
-MachOUniversalBinary::getObjectForArch(Triple::ArchType Arch) const {
-  MachO::CPUType CTM;
-  if (!getCTMForArch(Arch, CTM))
+MachOUniversalBinary::getObjectForArch(StringRef ArchName) const {
+  if (Triple(ArchName).getArch() == Triple::ArchType::UnknownArch)
     return object_error::arch_not_found;
+
   for (object_iterator I = begin_objects(), E = end_objects(); I != E; ++I) {
-    if (I->getCPUType() == static_cast<uint32_t>(CTM))
+    if (I->getArchTypeName() == ArchName)
       return I->getAsObjectFile();
   }
   return object_error::arch_not_found;
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 85f2436..945252b 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -173,10 +173,10 @@ void LLVMMoveToNextRelocation(LLVMRelocationIteratorRef SI) {
 
 // SymbolRef accessors
 const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) {
-  StringRef ret;
-  if (std::error_code ec = (*unwrap(SI))->getName(ret))
-    report_fatal_error(ec.message());
-  return ret.data();
+  ErrorOr<StringRef> Ret = (*unwrap(SI))->getName();
+  if (std::error_code EC = Ret.getError())
+    report_fatal_error(EC.message());
+  return Ret->data();
 }
 
 uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
@@ -187,22 +187,19 @@ uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
 }
 
 uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) {
-  return (*unwrap(SI))->getSize();
+  return (*unwrap(SI))->getCommonSize();
 }
 
 // RelocationRef accessors
 uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI) {
-  uint64_t ret;
-  if (std::error_code ec = (*unwrap(RI))->getAddress(ret))
-    report_fatal_error(ec.message());
-  return ret;
+  ErrorOr<uint64_t> Ret = (*unwrap(RI))->getAddress();
+  if (std::error_code EC = Ret.getError())
+    report_fatal_error(EC.message());
+  return *Ret;
 }
 
 uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) {
-  uint64_t ret;
-  if (std::error_code ec = (*unwrap(RI))->getOffset(ret))
-    report_fatal_error(ec.message());
-  return ret;
+  return (*unwrap(RI))->getOffset();
 }
 
 LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) {
@@ -211,18 +208,13 @@ LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) {
 }
 
 uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI) {
-  uint64_t ret;
-  if (std::error_code ec = (*unwrap(RI))->getType(ret))
-    report_fatal_error(ec.message());
-  return ret;
+  return (*unwrap(RI))->getType();
 }
 
 // NOTE: Caller takes ownership of returned string.
 const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) {
   SmallVector<char, 0> ret;
-  if (std::error_code ec = (*unwrap(RI))->getTypeName(ret))
-    report_fatal_error(ec.message());
-
+  (*unwrap(RI))->getTypeName(ret);
   char *str = static_cast<char*>(malloc(ret.size()));
   std::copy(ret.begin(), ret.end(), str);
   return str;
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index f6667d9..04e4916 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -28,12 +28,19 @@ void ObjectFile::anchor() { }
 ObjectFile::ObjectFile(unsigned int Type, MemoryBufferRef Source)
     : SymbolicFile(Type, Source) {}
 
+bool SectionRef::containsSymbol(SymbolRef S) const {
+  section_iterator SymSec = getObject()->section_end();
+  if (S.getSection(SymSec))
+    return false;
+  return *this == *SymSec;
+}
+
 std::error_code ObjectFile::printSymbolName(raw_ostream &OS,
                                             DataRefImpl Symb) const {
-  StringRef Name;
-  if (std::error_code EC = getSymbolName(Symb, Name))
+  ErrorOr<StringRef> Name = getSymbolName(Symb);
+  if (std::error_code EC = Name.getError())
     return EC;
-  OS << Name;
+  OS << *Name;
   return std::error_code();
 }
 
diff --git a/lib/Object/RecordStreamer.h b/lib/Object/RecordStreamer.h
index d694a9f..d861061 100644
--- a/lib/Object/RecordStreamer.h
+++ b/lib/Object/RecordStreamer.h
@@ -38,5 +38,5 @@ public:
   void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                         unsigned ByteAlignment) override;
 };
-} // namespace llvm
+}
 #endif
diff --git a/lib/Object/SymbolSize.cpp b/lib/Object/SymbolSize.cpp
new file mode 100644
index 0000000..1d5cd78
--- /dev/null
+++ b/lib/Object/SymbolSize.cpp
@@ -0,0 +1,100 @@
+//===- SymbolSize.cpp -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/SymbolSize.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/MachO.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+struct SymEntry {
+  symbol_iterator I;
+  uint64_t Address;
+  unsigned Number;
+  unsigned SectionID;
+};
+}
+
+static int compareAddress(const SymEntry *A, const SymEntry *B) {
+  if (A->SectionID != B->SectionID)
+    return A->SectionID - B->SectionID;
+  return A->Address - B->Address;
+}
+
+static unsigned getSectionID(const ObjectFile &O, SectionRef Sec) {
+  if (auto *M = dyn_cast<MachOObjectFile>(&O))
+    return M->getSectionID(Sec);
+  return cast<COFFObjectFile>(O).getSectionID(Sec);
+}
+
+static unsigned getSymbolSectionID(const ObjectFile &O, SymbolRef Sym) {
+  if (auto *M = dyn_cast<MachOObjectFile>(&O))
+    return M->getSymbolSectionID(Sym);
+  return cast<COFFObjectFile>(O).getSymbolSectionID(Sym);
+}
+
+std::vector<std::pair<SymbolRef, uint64_t>>
+llvm::object::computeSymbolSizes(const ObjectFile &O) {
+  std::vector<std::pair<SymbolRef, uint64_t>> Ret;
+
+  if (const auto *E = dyn_cast<ELFObjectFileBase>(&O)) {
+    auto Syms = E->symbols();
+    if (Syms.begin() == Syms.end())
+      Syms = E->getDynamicSymbolIterators();
+    for (ELFSymbolRef Sym : Syms)
+      Ret.push_back({Sym, Sym.getSize()});
+    return Ret;
+  }
+
+  // Collect sorted symbol addresses. Include dummy addresses for the end
+  // of each section.
+  std::vector<SymEntry> Addresses;
+  unsigned SymNum = 0;
+  for (symbol_iterator I = O.symbol_begin(), E = O.symbol_end(); I != E; ++I) {
+    SymbolRef Sym = *I;
+    uint64_t Value = Sym.getValue();
+    Addresses.push_back({I, Value, SymNum, getSymbolSectionID(O, Sym)});
+    ++SymNum;
+  }
+  for (SectionRef Sec : O.sections()) {
+    uint64_t Address = Sec.getAddress();
+    uint64_t Size = Sec.getSize();
+    Addresses.push_back(
+        {O.symbol_end(), Address + Size, 0, getSectionID(O, Sec)});
+  }
+  array_pod_sort(Addresses.begin(), Addresses.end(), compareAddress);
+
+  // Compute the size as the gap to the next symbol
+  for (unsigned I = 0, N = Addresses.size() - 1; I < N; ++I) {
+    auto &P = Addresses[I];
+    if (P.I == O.symbol_end())
+      continue;
+
+    // If multiple symbol have the same address, give both the same size.
+    unsigned NextI = I + 1;
+    while (NextI < N && Addresses[NextI].Address == P.Address)
+      ++NextI;
+
+    uint64_t Size = Addresses[NextI].Address - P.Address;
+    P.Address = Size;
+  }
+
+  // Assign the sorted symbols in the original order.
+  Ret.resize(SymNum);
+  for (SymEntry &P : Addresses) {
+    if (P.I == O.symbol_end())
+      continue;
+    Ret[P.Number] = {*P.I, P.Address};
+  }
+  return Ret;
+}
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index b771a18..a74ead6 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -33,9 +33,6 @@ void arg_iterator::SkipToNextArg() {
   }
 }
 
-ArgList::~ArgList() {
-}
-
 void ArgList::append(Arg *A) {
   Args.push_back(A);
 }
@@ -318,18 +315,18 @@ const char *ArgList::GetOrMakeJoinedArgString(unsigned Index,
 
 //
 
+void InputArgList::releaseMemory() {
+  // An InputArgList always owns its arguments.
+  for (Arg *A : *this)
+    delete A;
+}
+
 InputArgList::InputArgList(const char* const *ArgBegin,
                            const char* const *ArgEnd)
   : NumInputArgStrings(ArgEnd - ArgBegin) {
   ArgStrings.append(ArgBegin, ArgEnd);
 }
 
-InputArgList::~InputArgList() {
-  // An InputArgList always owns its arguments.
-  for (iterator it = begin(), ie = end(); it != ie; ++it)
-    delete *it;
-}
-
 unsigned InputArgList::MakeIndex(StringRef String0) const {
   unsigned Index = ArgStrings.size();
 
@@ -358,8 +355,6 @@ const char *InputArgList::MakeArgStringRef(StringRef Str) const {
 DerivedArgList::DerivedArgList(const InputArgList &BaseArgs)
     : BaseArgs(BaseArgs) {}
 
-DerivedArgList::~DerivedArgList() {}
-
 const char *DerivedArgList::MakeArgStringRef(StringRef Str) const {
   return BaseArgs.MakeArgString(Str);
 }
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index c37f193..e83536f 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -79,8 +79,8 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
 static inline bool operator<(const OptTable::Info &I, const char *Name) {
   return StrCmpOptionNameIgnoreCase(I.Name, Name) < 0;
 }
-} // namespace opt
-} // namespace llvm
+}
+}
 
 OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
 
@@ -247,33 +247,32 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
   return new Arg(getOption(TheUnknownOptionID), Str, Index++, Str);
 }
 
-InputArgList *OptTable::ParseArgs(const char *const *ArgBegin,
-                                  const char *const *ArgEnd,
-                                  unsigned &MissingArgIndex,
-                                  unsigned &MissingArgCount,
-                                  unsigned FlagsToInclude,
-                                  unsigned FlagsToExclude) const {
-  InputArgList *Args = new InputArgList(ArgBegin, ArgEnd);
+InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
+                                 unsigned &MissingArgIndex,
+                                 unsigned &MissingArgCount,
+                                 unsigned FlagsToInclude,
+                                 unsigned FlagsToExclude) const {
+  InputArgList Args(ArgArr.begin(), ArgArr.end());
 
   // FIXME: Handle '@' args (or at least error on them).
 
   MissingArgIndex = MissingArgCount = 0;
-  unsigned Index = 0, End = ArgEnd - ArgBegin;
+  unsigned Index = 0, End = ArgArr.size();
   while (Index < End) {
     // Ingore nullptrs, they are response file's EOL markers
-    if (Args->getArgString(Index) == nullptr) {
+    if (Args.getArgString(Index) == nullptr) {
       ++Index;
       continue;
     }
     // Ignore empty arguments (other things may still take them as arguments).
-    StringRef Str = Args->getArgString(Index);
+    StringRef Str = Args.getArgString(Index);
     if (Str == "") {
       ++Index;
       continue;
     }
 
     unsigned Prev = Index;
-    Arg *A = ParseOneArg(*Args, Index, FlagsToInclude, FlagsToExclude);
+    Arg *A = ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude);
     assert(Index > Prev && "Parser failed to consume argument.");
 
     // Check for missing argument error.
@@ -285,7 +284,7 @@ InputArgList *OptTable::ParseArgs(const char *const *ArgBegin,
       break;
     }
 
-    Args->append(A);
+    Args.append(A);
   }
 
   return Args;
diff --git a/lib/ProfileData/CoverageMapping.cpp b/lib/ProfileData/CoverageMapping.cpp
index b6c2489..cf04fea 100644
--- a/lib/ProfileData/CoverageMapping.cpp
+++ b/lib/ProfileData/CoverageMapping.cpp
@@ -236,7 +236,7 @@ CoverageMapping::load(CoverageMappingReader &CoverageReader,
 
 ErrorOr<std::unique_ptr<CoverageMapping>>
 CoverageMapping::load(StringRef ObjectFilename, StringRef ProfileFilename,
-                      Triple::ArchType Arch) {
+                      StringRef Arch) {
   auto CounterMappingBuff = MemoryBuffer::getFileOrSTDIN(ObjectFilename);
   if (std::error_code EC = CounterMappingBuff.getError())
     return EC;
@@ -350,7 +350,7 @@ public:
     return Segments;
   }
 };
-} // namespace
+}
 
 std::vector<StringRef> CoverageMapping::getUniqueSourceFiles() const {
   std::vector<StringRef> Filenames;
@@ -521,7 +521,7 @@ class CoverageMappingErrorCategoryType : public std::error_category {
     llvm_unreachable("A value of coveragemap_error has no message.");
   }
 };
-} // namespace
+}
 
 static ManagedStatic<CoverageMappingErrorCategoryType> ErrorCategory;
 
diff --git a/lib/ProfileData/CoverageMappingReader.cpp b/lib/ProfileData/CoverageMappingReader.cpp
index 32de0ba..334a3f5 100644
--- a/lib/ProfileData/CoverageMappingReader.cpp
+++ b/lib/ProfileData/CoverageMappingReader.cpp
@@ -315,7 +315,7 @@ struct SectionData {
     return std::error_code();
   }
 };
-} // namespace
+}
 
 template <typename T, support::endianness Endian>
 std::error_code readCoverageMappingData(
@@ -448,7 +448,7 @@ static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer,
                                         StringRef &CoverageMapping,
                                         uint8_t &BytesInAddress,
                                         support::endianness &Endian,
-                                        Triple::ArchType Arch) {
+                                        StringRef Arch) {
   auto BinOrErr = object::createBinary(ObjectBuffer);
   if (std::error_code EC = BinOrErr.getError())
     return EC;
@@ -465,7 +465,7 @@ static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer,
     // For any other object file, upcast and take ownership.
     OF.reset(cast<object::ObjectFile>(Bin.release()));
     // If we've asked for a particular arch, make sure they match.
-    if (Arch != Triple::ArchType::UnknownArch && OF->getArch() != Arch)
+    if (!Arch.empty() && OF->getArch() != Triple(Arch).getArch())
       return object_error::arch_not_found;
   } else
     // We can only handle object files.
@@ -495,7 +495,7 @@ static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer,
 
 ErrorOr<std::unique_ptr<BinaryCoverageReader>>
 BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer,
-                             Triple::ArchType Arch) {
+                             StringRef Arch) {
   std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader());
 
   SectionData Profile;
diff --git a/lib/ProfileData/CoverageMappingWriter.cpp b/lib/ProfileData/CoverageMappingWriter.cpp
index 128003c..d90d2f5 100644
--- a/lib/ProfileData/CoverageMappingWriter.cpp
+++ b/lib/ProfileData/CoverageMappingWriter.cpp
@@ -74,7 +74,7 @@ public:
     return C;
   }
 };
-} // namespace
+}
 
 /// \brief Encode the counter.
 ///
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index 805d6d1..92822a7 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -54,7 +54,7 @@ class InstrProfErrorCategoryType : public std::error_category {
     llvm_unreachable("A value of instrprof_error has no message.");
   }
 };
-} // namespace
+}
 
 static ManagedStatic<InstrProfErrorCategoryType> ErrorCategory;
 
diff --git a/lib/ProfileData/InstrProfIndexed.h b/lib/ProfileData/InstrProfIndexed.h
index afd8cfb..ebca7b2 100644
--- a/lib/ProfileData/InstrProfIndexed.h
+++ b/lib/ProfileData/InstrProfIndexed.h
@@ -49,7 +49,7 @@ static inline uint64_t ComputeHash(HashT Type, StringRef K) {
 const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
 const uint64_t Version = 2;
 const HashT HashType = HashT::MD5;
-} // namespace IndexedInstrProf
+}
 
 } // end namespace llvm
 
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index 3a5b266..8a529a0 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -15,7 +15,6 @@
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "InstrProfIndexed.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ProfileData/InstrProf.h"
 #include <cassert>
 
 using namespace llvm;
@@ -126,18 +125,16 @@ std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
     return error(instrprof_error::malformed);
 
   // Read each counter and fill our internal storage with the values.
-  Counts.clear();
-  Counts.reserve(NumCounters);
+  Record.Counts.clear();
+  Record.Counts.reserve(NumCounters);
   for (uint64_t I = 0; I < NumCounters; ++I) {
     if (Line.is_at_end())
       return error(instrprof_error::truncated);
     uint64_t Count;
     if ((Line++)->getAsInteger(10, Count))
       return error(instrprof_error::malformed);
-    Counts.push_back(Count);
+    Record.Counts.push_back(Count);
   }
-  // Give the record a reference to our internal counter storage.
-  Record.Counts = Counts;
 
   return success();
 }
@@ -280,11 +277,10 @@ RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
   Record.Hash = swap(Data->FuncHash);
   Record.Name = RawName;
   if (ShouldSwapBytes) {
-    Counts.clear();
-    Counts.reserve(RawCounts.size());
+    Record.Counts.clear();
+    Record.Counts.reserve(RawCounts.size());
     for (uint64_t Count : RawCounts)
-      Counts.push_back(swap(Count));
-    Record.Counts = Counts;
+      Record.Counts.push_back(swap(Count));
   } else
     Record.Counts = RawCounts;
 
@@ -303,6 +299,49 @@ InstrProfLookupTrait::ComputeHash(StringRef K) {
   return IndexedInstrProf::ComputeHash(HashType, K);
 }
 
+typedef InstrProfLookupTrait::data_type data_type;
+typedef InstrProfLookupTrait::offset_type offset_type;
+
+data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
+                                         offset_type N) {
+
+  // Check if the data is corrupt. If so, don't try to read it.
+  if (N % sizeof(uint64_t))
+    return data_type();
+
+  DataBuffer.clear();
+  uint64_t NumCounts;
+  uint64_t NumEntries = N / sizeof(uint64_t);
+  std::vector<uint64_t> CounterBuffer;
+  for (uint64_t I = 0; I < NumEntries; I += NumCounts) {
+    using namespace support;
+    // The function hash comes first.
+    uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
+
+    if (++I >= NumEntries)
+      return data_type();
+
+    // In v1, we have at least one count.
+    // Later, we have the number of counts.
+    NumCounts = (1 == FormatVersion)
+                    ? NumEntries - I
+                    : endian::readNext<uint64_t, little, unaligned>(D);
+    if (1 != FormatVersion)
+      ++I;
+
+    // If we have more counts than data, this is bogus.
+    if (I + NumCounts > NumEntries)
+      return data_type();
+
+    CounterBuffer.clear();
+    for (unsigned J = 0; J < NumCounts; ++J)
+      CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
+
+    DataBuffer.push_back(InstrProfRecord(K, Hash, std::move(CounterBuffer)));
+  }
+  return DataBuffer;
+}
+
 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
   if (DataBuffer.getBufferSize() < 8)
     return false;
@@ -342,8 +381,9 @@ std::error_code IndexedInstrProfReader::readHeader() {
   uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
 
   // The rest of the file is an on disk hash table.
-  Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur, Start,
-                                           InstrProfLookupTrait(HashType)));
+  Index.reset(InstrProfReaderIndex::Create(
+      Start + HashOffset, Cur, Start,
+      InstrProfLookupTrait(HashType, FormatVersion)));
   // Set up our iterator for readNextRecord.
   RecordIterator = Index->data_begin();
 
@@ -357,21 +397,14 @@ std::error_code IndexedInstrProfReader::getFunctionCounts(
     return error(instrprof_error::unknown_function);
 
   // Found it. Look for counters with the right hash.
-  ArrayRef<uint64_t> Data = (*Iter).Data;
-  uint64_t NumCounts;
-  for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) {
-    // The function hash comes first.
-    uint64_t FoundHash = Data[I++];
-    // In v1, we have at least one count. Later, we have the number of counts.
-    if (I == E)
-      return error(instrprof_error::malformed);
-    NumCounts = FormatVersion == 1 ? E - I : Data[I++];
-    // If we have more counts than data, this is bogus.
-    if (I + NumCounts > E)
-      return error(instrprof_error::malformed);
+  ArrayRef<InstrProfRecord> Data = (*Iter);
+  if (Data.empty())
+    return error(instrprof_error::malformed);
+
+  for (unsigned I = 0, E = Data.size(); I < E; ++I) {
     // Check for a match and fill the vector if there is one.
-    if (FoundHash == FuncHash) {
-      Counts = Data.slice(I, NumCounts);
+    if (Data[I].Hash == FuncHash) {
+      Counts = Data[I].Counts;
       return success();
     }
   }
@@ -384,30 +417,15 @@ IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
   if (RecordIterator == Index->data_end())
     return error(instrprof_error::eof);
 
-  // Record the current function name.
-  Record.Name = (*RecordIterator).Name;
-
-  ArrayRef<uint64_t> Data = (*RecordIterator).Data;
-  // Valid data starts with a hash and either a count or the number of counts.
-  if (CurrentOffset + 1 > Data.size())
-    return error(instrprof_error::malformed);
-  // First we have a function hash.
-  Record.Hash = Data[CurrentOffset++];
-  // In version 1 we knew the number of counters implicitly, but in newer
-  // versions we store the number of counters next.
-  uint64_t NumCounts =
-      FormatVersion == 1 ? Data.size() - CurrentOffset : Data[CurrentOffset++];
-  if (CurrentOffset + NumCounts > Data.size())
+  if ((*RecordIterator).empty())
     return error(instrprof_error::malformed);
-  // And finally the counts themselves.
-  Record.Counts = Data.slice(CurrentOffset, NumCounts);
 
-  // If we've exhausted this function's data, increment the record.
-  CurrentOffset += NumCounts;
-  if (CurrentOffset == Data.size()) {
+  static unsigned RecordIndex = 0;
+  ArrayRef<InstrProfRecord> Data = (*RecordIterator);
+  Record = Data[RecordIndex++];
+  if (RecordIndex >= Data.size()) {
     ++RecordIterator;
-    CurrentOffset = 0;
+    RecordIndex = 0;
   }
-
   return success();
 }
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index efac292..2188543 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -69,7 +69,7 @@ public:
     }
   }
 };
-} // namespace
+}
 
 std::error_code
 InstrProfWriter::addFunctionCounts(StringRef FunctionName,
diff --git a/lib/ProfileData/SampleProf.cpp b/lib/ProfileData/SampleProf.cpp
index e2894c6..920c48a 100644
--- a/lib/ProfileData/SampleProf.cpp
+++ b/lib/ProfileData/SampleProf.cpp
@@ -42,7 +42,7 @@ class SampleProfErrorCategoryType : public std::error_category {
     llvm_unreachable("A value of sampleprof_error has no message.");
   }
 };
-} // namespace
+}
 
 static ManagedStatic<SampleProfErrorCategoryType> ErrorCategory;
 
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 48830e8..4b0a0e5 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -90,7 +90,7 @@ namespace llvm {
   const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
   const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
                                                 / (351 * integerPartWidth));
-} // namespace llvm
+}
 
 /* A bunch of private, handy routines.  */
 
@@ -3539,7 +3539,7 @@ namespace {
     exp += FirstSignificant;
     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
   }
-} // namespace
+}
 
 void APFloat::toString(SmallVectorImpl<char> &Str,
                        unsigned FormatPrecision,
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index aa026d4..23f89bb 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -2331,7 +2331,7 @@ namespace {
   {
     return findFirstSet(value, ZB_Max);
   }
-} // namespace
+}
 
 /* Sets the least significant part of a bignum to the input value, and
    zeroes out higher parts.  */
diff --git a/lib/Support/APSInt.cpp b/lib/Support/APSInt.cpp
index 73acafa..975457c 100644
--- a/lib/Support/APSInt.cpp
+++ b/lib/Support/APSInt.cpp
@@ -17,6 +17,25 @@
 
 using namespace llvm;
 
+APSInt::APSInt(StringRef Str) {
+  assert(!Str.empty() && "Invalid string length");
+
+  // (Over-)estimate the required number of bits.
+  unsigned NumBits = ((Str.size() * 64) / 19) + 2;
+  APInt Tmp(NumBits, Str, /*Radix=*/10);
+  if (Str[0] == '-') {
+    unsigned MinBits = Tmp.getMinSignedBits();
+    if (MinBits > 0 && MinBits < NumBits)
+      Tmp = Tmp.trunc(MinBits);
+    *this = APSInt(Tmp, /*IsUnsigned=*/false);
+    return;
+  }
+  unsigned ActiveBits = Tmp.getActiveBits();
+  if (ActiveBits > 0 && ActiveBits < NumBits)
+    Tmp = Tmp.trunc(ActiveBits);
+  *this = APSInt(Tmp, /*IsUnsigned=*/true);
+}
+
 void APSInt::Profile(FoldingSetNodeID& ID) const {
   ID.AddInteger((unsigned) (IsUnsigned ? 1 : 0));
   APInt::Profile(ID);
diff --git a/lib/Support/ARMBuildAttrs.cpp b/lib/Support/ARMBuildAttrs.cpp
index 9c8bb15..960a0f1 100644
--- a/lib/Support/ARMBuildAttrs.cpp
+++ b/lib/Support/ARMBuildAttrs.cpp
@@ -66,7 +66,7 @@ const struct {
   { ARMBuildAttrs::ABI_align_needed, "Tag_ABI_align8_needed" },
   { ARMBuildAttrs::ABI_align_preserved, "Tag_ABI_align8_preserved" },
 };
-} // namespace
+}
 
 namespace llvm {
 namespace ARMBuildAttrs {
@@ -90,6 +90,6 @@ int AttrTypeFromString(StringRef Tag) {
       return ARMAttributeTags[TI].Attr;
   return -1;
 }
-} // namespace ARMBuildAttrs
-} // namespace llvm
+}
+}
 
diff --git a/lib/Support/ARMWinEH.cpp b/lib/Support/ARMWinEH.cpp
index 8d21ca5..03c150f 100644
--- a/lib/Support/ARMWinEH.cpp
+++ b/lib/Support/ARMWinEH.cpp
@@ -32,7 +32,7 @@ std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF) {
 
   return std::make_pair(GPRMask, VFPMask);
 }
-} // namespace WinEH
-} // namespace ARM
-} // namespace llvm
+}
+}
+}
 
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index 021037a..f48edac 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -37,4 +37,4 @@ void PrintRecyclerStats(size_t Size,
          << "Number of elements free for recycling: " << FreeListSize << '\n';
 }
 
-} // namespace llvm
+}
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index eac189b..a8a4df5 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -1,7 +1,7 @@
 set(system_libs)
 if( NOT MSVC )
   if( MINGW )
-    set(system_libs ${system_libs} imagehlp psapi shell32 ole32)
+    set(system_libs ${system_libs} psapi shell32 ole32)
   elseif( CMAKE_HOST_UNIX )
     if( HAVE_LIBRT )
       set(system_libs ${system_libs} rt)
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 3638f0d..dcaacf6 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -61,8 +61,8 @@ TEMPLATE_INSTANTIATION(class opt<int>);
 TEMPLATE_INSTANTIATION(class opt<std::string>);
 TEMPLATE_INSTANTIATION(class opt<char>);
 TEMPLATE_INSTANTIATION(class opt<bool>);
-} // namespace cl
-} // namespace llvm
+}
+} // end namespace llvm::cl
 
 // Pin the vtables to this file.
 void GenericOptionValue::anchor() {}
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index 929f5da..aba0f1d 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -60,7 +60,7 @@ public:
   }
 };
 
-} // namespace
+}
 
 static ManagedStatic<sys::Mutex> gCrashRecoveryContextMutex;
 static bool gCrashRecoveryEnabled = false;
diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp
index 0f44780..f1a334b 100644
--- a/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/lib/Support/DAGDeltaAlgorithm.cpp
@@ -175,7 +175,7 @@ public:
       : DDAI(DDAI), Required(Required) {}
 };
 
-} // namespace
+}
 
 DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(
     DAGDeltaAlgorithm &DDA, const changeset_ty &Changes,
diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp
index ad05494..3b10fc5 100644
--- a/lib/Support/DataStream.cpp
+++ b/lib/Support/DataStream.cpp
@@ -72,7 +72,7 @@ public:
   }
 };
 
-} // namespace
+}
 
 std::unique_ptr<DataStreamer>
 llvm::getDataFileStreamer(const std::string &Filename, std::string *StrError) {
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index 2052662..47751fc 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -99,7 +99,7 @@ struct DebugOnlyOpt {
   }
 };
 
-} // namespace
+}
 
 static DebugOnlyOpt DebugOnlyOptLoc;
 
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index 6229825..13a4155 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -190,6 +190,9 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
   case DW_AT_APPLE_property_attribute:   return "DW_AT_APPLE_property_attribute";
   case DW_AT_APPLE_property:             return "DW_AT_APPLE_property";
   case DW_AT_APPLE_objc_complete_type:   return "DW_AT_APPLE_objc_complete_type";
+  case DW_AT_LLVM_include_path:          return "DW_AT_LLVM_include_path";
+  case DW_AT_LLVM_config_macros:         return "DW_AT_LLVM_config_macros";
+  case DW_AT_LLVM_isysroot:              return "DW_AT_LLVM_isysroot";
 
     // DWARF5 Fission Extension Attribute
   case DW_AT_GNU_dwo_name:               return "DW_AT_GNU_dwo_name";
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index 6f064c9..307ff09 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -109,4 +109,4 @@ std::error_code FileOutputBuffer::commit() {
   // Rename file to final name.
   return sys::fs::rename(Twine(TempPath), Twine(FinalPath));
 }
-} // namespace llvm
+} // namespace
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index 97aedc8..a9b0220 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -135,12 +135,12 @@ static const char *getProgramName(GraphProgram::Name program) {
 bool llvm::DisplayGraph(StringRef FilenameRef, bool wait,
                         GraphProgram::Name program) {
   std::string Filename = FilenameRef;
-  wait &= !ViewBackground;
   std::string ErrMsg;
   std::string ViewerPath;
   GraphSession S;
 
 #ifdef __APPLE__
+  wait &= !ViewBackground;
   if (S.TryFindProgram("open", ViewerPath)) {
     std::vector<const char *> args;
     args.push_back(ViewerPath.c_str());
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index d07c5f0..fb81d60 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -12,6 +12,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
 #include <sys/stat.h>
 #include <sys/types.h>
 #if LLVM_ON_WIN32
@@ -20,6 +21,16 @@
 #if LLVM_ON_UNIX
 #include <unistd.h>
 #endif
+
+#if defined(__APPLE__) && defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && (__MAC_OS_X_VERSION_MIN_REQUIRED > 1050)
+#define USE_OSX_GETHOSTUUID 1
+#else
+#define USE_OSX_GETHOSTUUID 0
+#endif
+
+#if USE_OSX_GETHOSTUUID
+#include <uuid/uuid.h>
+#endif
 using namespace llvm;
 
 /// \brief Attempt to read the lock file with the given name, if it exists.
@@ -55,20 +66,80 @@ LockFileManager::readLockFile(StringRef LockFileName) {
   return None;
 }
 
-bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) {
+static std::error_code getHostID(SmallVectorImpl<char> &HostID) {
+  HostID.clear();
+
+#if USE_OSX_GETHOSTUUID
+  // On OS X, use the more stable hardware UUID instead of hostname.
+  struct timespec wait = {1, 0}; // 1 second.
+  uuid_t uuid;
+  if (gethostuuid(uuid, &wait) != 0)
+    return std::error_code(errno, std::system_category());
+
+  uuid_string_t UUIDStr;
+  uuid_unparse(uuid, UUIDStr);
+  StringRef UUIDRef(UUIDStr);
+  HostID.append(UUIDRef.begin(), UUIDRef.end());
+
+#elif LLVM_ON_UNIX
+  char HostName[256];
+  HostName[255] = 0;
+  HostName[0] = 0;
+  gethostname(HostName, 255);
+  StringRef HostNameRef(HostName);
+  HostID.append(HostNameRef.begin(), HostNameRef.end());
+
+#else
+  StringRef Dummy("localhost");
+  HostID.append(Dummy.begin(), Dummy.end());
+#endif
+
+  return std::error_code();
+}
+
+bool LockFileManager::processStillExecuting(StringRef HostID, int PID) {
 #if LLVM_ON_UNIX && !defined(__ANDROID__)
-  char MyHostname[256];
-  MyHostname[255] = 0;
-  MyHostname[0] = 0;
-  gethostname(MyHostname, 255);
+  SmallString<256> StoredHostID;
+  if (getHostID(StoredHostID))
+    return true; // Conservatively assume it's executing on error.
+
   // Check whether the process is dead. If so, we're done.
-  if (MyHostname == Hostname && getsid(PID) == -1 && errno == ESRCH)
+  if (StoredHostID == HostID && getsid(PID) == -1 && errno == ESRCH)
     return false;
 #endif
 
   return true;
 }
 
+namespace {
+/// An RAII helper object ensure that the unique lock file is removed.
+///
+/// Ensures that if there is an error or a signal before we finish acquiring the
+/// lock, the unique file will be removed. And if we successfully take the lock,
+/// the signal handler is left in place so that signals while the lock is held
+/// will remove the unique lock file. The caller should ensure there is a
+/// matching call to sys::DontRemoveFileOnSignal when the lock is released.
+class RemoveUniqueLockFileOnSignal {
+  StringRef Filename;
+  bool RemoveImmediately;
+public:
+  RemoveUniqueLockFileOnSignal(StringRef Name)
+  : Filename(Name), RemoveImmediately(true) {
+    sys::RemoveFileOnSignal(Filename, nullptr);
+  }
+  ~RemoveUniqueLockFileOnSignal() {
+    if (!RemoveImmediately) {
+      // Leave the signal handler enabled. It will be removed when the lock is
+      // released.
+      return;
+    }
+    sys::fs::remove(Filename);
+    sys::DontRemoveFileOnSignal(Filename);
+  }
+  void lockAcquired() { RemoveImmediately = false; }
+};
+} // end anonymous namespace
+
 LockFileManager::LockFileManager(StringRef FileName)
 {
   this->FileName = FileName;
@@ -96,17 +167,18 @@ LockFileManager::LockFileManager(StringRef FileName)
 
   // Write our process ID to our unique lock file.
   {
-    raw_fd_ostream Out(UniqueLockFileID, /*shouldClose=*/true);
+    SmallString<256> HostID;
+    if (auto EC = getHostID(HostID)) {
+      Error = EC;
+      return;
+    }
 
+    raw_fd_ostream Out(UniqueLockFileID, /*shouldClose=*/true);
+    Out << HostID << ' ';
 #if LLVM_ON_UNIX
-    // FIXME: move getpid() call into LLVM
-    char hostname[256];
-    hostname[255] = 0;
-    hostname[0] = 0;
-    gethostname(hostname, 255);
-    Out << hostname << ' ' << getpid();
+    Out << getpid();
 #else
-    Out << "localhost 1";
+    Out << "1";
 #endif
     Out.close();
 
@@ -119,12 +191,18 @@ LockFileManager::LockFileManager(StringRef FileName)
     }
   }
 
+  // Clean up the unique file on signal, which also releases the lock if it is
+  // held since the .lock symlink will point to a nonexistent file.
+  RemoveUniqueLockFileOnSignal RemoveUniqueFile(UniqueLockFileName);
+
   while (1) {
     // Create a link from the lock file name. If this succeeds, we're done.
     std::error_code EC =
         sys::fs::create_link(UniqueLockFileName, LockFileName);
-    if (!EC)
+    if (!EC) {
+      RemoveUniqueFile.lockAcquired();
       return;
+    }
 
     if (EC != errc::file_exists) {
       Error = EC;
@@ -171,6 +249,9 @@ LockFileManager::~LockFileManager() {
   // Since we own the lock, remove the lock file and our own unique lock file.
   sys::fs::remove(LockFileName);
   sys::fs::remove(UniqueLockFileName);
+  // The unique file is now gone, so remove it from the signal handler. This
+  // matches a sys::RemoveFileOnSignal() in LockFileManager().
+  sys::DontRemoveFileOnSignal(UniqueLockFileName);
 }
 
 LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
diff --git a/lib/Support/MD5.cpp b/lib/Support/MD5.cpp
index 6ed81fb..ceab580 100644
--- a/lib/Support/MD5.cpp
+++ b/lib/Support/MD5.cpp
@@ -283,4 +283,4 @@ void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) {
     Res << format("%.2x", Result[i]);
 }
 
-} // namespace llvm
+}
diff --git a/lib/Support/MathExtras.cpp b/lib/Support/MathExtras.cpp
index 9265a43..ba09245 100644
--- a/lib/Support/MathExtras.cpp
+++ b/lib/Support/MathExtras.cpp
@@ -29,4 +29,4 @@ namespace llvm {
   const float huge_valf = HUGE_VALF;
 #endif
 
-} // namespace llvm
+}
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 1d69b96..98862e9 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -94,7 +94,7 @@ public:
     return MemoryBuffer_Malloc;
   }
 };
-} // namespace
+}
 
 static ErrorOr<std::unique_ptr<MemoryBuffer>>
 getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize, 
@@ -220,7 +220,7 @@ public:
     return MemoryBuffer_MMap;
   }
 };
-} // namespace
+}
 
 static ErrorOr<std::unique_ptr<MemoryBuffer>>
 getMemoryBufferForStream(int FD, const Twine &BufferName) {
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index 42867c9..c8d3844 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -110,7 +110,7 @@ MutexImpl::tryacquire()
   return errorcode == 0;
 }
 
-} // namespace llvm
+}
 
 #elif defined(LLVM_ON_UNIX)
 #include "Unix/Mutex.inc"
diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp
index 21ba5a4..3b6309c 100644
--- a/lib/Support/RWMutex.cpp
+++ b/lib/Support/RWMutex.cpp
@@ -113,7 +113,7 @@ RWMutexImpl::writer_release()
   return errorcode == 0;
 }
 
-} // namespace llvm
+}
 
 #elif defined(LLVM_ON_UNIX)
 #include "Unix/RWMutex.inc"
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 90f5fdb..56c3b0f 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -60,7 +60,7 @@ public:
     Stats.push_back(S);
   }
 };
-} // namespace
+}
 
 static ManagedStatic<StatisticInfo> StatInfo;
 static ManagedStatic<sys::SmartMutex<true> > StatLock;
diff --git a/lib/Support/StreamingMemoryObject.cpp b/lib/Support/StreamingMemoryObject.cpp
index 891aa66..5a44e62 100644
--- a/lib/Support/StreamingMemoryObject.cpp
+++ b/lib/Support/StreamingMemoryObject.cpp
@@ -129,4 +129,4 @@ StreamingMemoryObject::StreamingMemoryObject(
       BytesSkipped(0), ObjectSize(0), EOFReached(false) {
   BytesRead = this->Streamer->GetBytes(&Bytes[0], kChunkSize);
 }
-} // namespace llvm
+}
diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp
index 760cdc1..4d4c041 100644
--- a/lib/Support/TargetParser.cpp
+++ b/lib/Support/TargetParser.cpp
@@ -25,31 +25,37 @@ namespace {
 // List of canonical FPU names (use getFPUSynonym) and which architectural
 // features they correspond to (use getFPUFeatures).
 // FIXME: TableGen this.
+// The entries must appear in the order listed in ARM::FPUKind for correct indexing
 struct {
   const char * Name;
   ARM::FPUKind ID;
-  unsigned FPUVersion; ///< Corresponds directly to the FP arch version number.
+  ARM::FPUVersion FPUVersion;
   ARM::NeonSupportLevel NeonSupport;
   ARM::FPURestriction Restriction;
 } FPUNames[] = {
-  { "invalid",       ARM::FK_INVALID,       0, ARM::NS_None,   ARM::FR_None},
-  { "none",          ARM::FK_NONE,          0, ARM::NS_None,   ARM::FR_None},
-  { "vfp",           ARM::FK_VFP,           2, ARM::NS_None,   ARM::FR_None},
-  { "vfpv2",         ARM::FK_VFPV2,         2, ARM::NS_None,   ARM::FR_None},
-  { "vfpv3",         ARM::FK_VFPV3,         3, ARM::NS_None,   ARM::FR_None},
-  { "vfpv3-d16",     ARM::FK_VFPV3_D16,     3, ARM::NS_None,   ARM::FR_D16},
-  { "vfpv4",         ARM::FK_VFPV4,         4, ARM::NS_None,   ARM::FR_None},
-  { "vfpv4-d16",     ARM::FK_VFPV4_D16,     4, ARM::NS_None,   ARM::FR_D16},
-  { "fpv4-sp-d16",   ARM::FK_FPV4_SP_D16,   4, ARM::NS_None,   ARM::FR_SP_D16},
-  { "fpv5-d16",      ARM::FK_FPV5_D16,      5, ARM::NS_None,   ARM::FR_D16},
-  { "fpv5-sp-d16",   ARM::FK_FPV5_SP_D16,   5, ARM::NS_None,   ARM::FR_SP_D16},
-  { "fp-armv8",      ARM::FK_FP_ARMV8,      5, ARM::NS_None,   ARM::FR_None},
-  { "neon",          ARM::FK_NEON,          3, ARM::NS_Neon,   ARM::FR_None},
-  { "neon-vfpv4",    ARM::FK_NEON_VFPV4,    4, ARM::NS_Neon,   ARM::FR_None},
-  { "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8, 5, ARM::NS_Neon,   ARM::FR_None},
+  { "invalid",        ARM::FK_INVALID,        ARM::FV_NONE,       ARM::NS_None,   ARM::FR_None},
+  { "none",           ARM::FK_NONE,           ARM::FV_NONE,       ARM::NS_None,   ARM::FR_None},
+  { "vfp",            ARM::FK_VFP,            ARM::FV_VFPV2,      ARM::NS_None,   ARM::FR_None},
+  { "vfpv2",          ARM::FK_VFPV2,          ARM::FV_VFPV2,      ARM::NS_None,   ARM::FR_None},
+  { "vfpv3",          ARM::FK_VFPV3,          ARM::FV_VFPV3,      ARM::NS_None,   ARM::FR_None},
+  { "vfpv3-fp16",     ARM::FK_VFPV3_FP16,     ARM::FV_VFPV3_FP16, ARM::NS_None,   ARM::FR_None},
+  { "vfpv3-d16",      ARM::FK_VFPV3_D16,      ARM::FV_VFPV3,      ARM::NS_None,   ARM::FR_D16},
+  { "vfpv3-d16-fp16", ARM::FK_VFPV3_D16_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None,   ARM::FR_D16},
+  { "vfpv3xd",        ARM::FK_VFPV3XD,        ARM::FV_VFPV3,      ARM::NS_None,   ARM::FR_SP_D16},
+  { "vfpv3xd-fp16",   ARM::FK_VFPV3XD_FP16,   ARM::FV_VFPV3_FP16, ARM::NS_None,   ARM::FR_SP_D16},
+  { "vfpv4",          ARM::FK_VFPV4,          ARM::FV_VFPV4,      ARM::NS_None,   ARM::FR_None},
+  { "vfpv4-d16",      ARM::FK_VFPV4_D16,      ARM::FV_VFPV4,      ARM::NS_None,   ARM::FR_D16},
+  { "fpv4-sp-d16",    ARM::FK_FPV4_SP_D16,    ARM::FV_VFPV4,      ARM::NS_None,   ARM::FR_SP_D16},
+  { "fpv5-d16",       ARM::FK_FPV5_D16,       ARM::FV_VFPV5,      ARM::NS_None,   ARM::FR_D16},
+  { "fpv5-sp-d16",    ARM::FK_FPV5_SP_D16,    ARM::FV_VFPV5,      ARM::NS_None,   ARM::FR_SP_D16},
+  { "fp-armv8",       ARM::FK_FP_ARMV8,       ARM::FV_VFPV5,      ARM::NS_None,   ARM::FR_None},
+  { "neon",           ARM::FK_NEON,           ARM::FV_VFPV3,      ARM::NS_Neon,   ARM::FR_None},
+  { "neon-fp16",      ARM::FK_NEON_FP16,      ARM::FV_VFPV3_FP16, ARM::NS_Neon,   ARM::FR_None},
+  { "neon-vfpv4",     ARM::FK_NEON_VFPV4,     ARM::FV_VFPV4,      ARM::NS_Neon,   ARM::FR_None},
+  { "neon-fp-armv8",  ARM::FK_NEON_FP_ARMV8,  ARM::FV_VFPV5,      ARM::NS_Neon,   ARM::FR_None},
   { "crypto-neon-fp-armv8",
-              ARM::FK_CRYPTO_NEON_FP_ARMV8, 5, ARM::NS_Crypto, ARM::FR_None},
-  { "softvfp",       ARM::FK_SOFTVFP,       0, ARM::NS_None,   ARM::FR_None},
+               ARM::FK_CRYPTO_NEON_FP_ARMV8,  ARM::FV_VFPV5,      ARM::NS_Crypto, ARM::FR_None},
+  { "softvfp",        ARM::FK_SOFTVFP,        ARM::FV_NONE,       ARM::NS_None,   ARM::FR_None},
 };
 
 // List of canonical arch names (use getArchSynonym).
@@ -279,27 +285,33 @@ bool ARMTargetParser::getFPUFeatures(unsigned FPUKind,
   // higher. We also have to make sure to disable fp16 when vfp4 is disabled,
   // as +vfp4 implies +fp16 but -vfp4 does not imply -fp16.
   switch (FPUNames[FPUKind].FPUVersion) {
-  case 5:
+  case ARM::FV_VFPV5:
     Features.push_back("+fp-armv8");
     break;
-  case 4:
+  case ARM::FV_VFPV4:
     Features.push_back("+vfp4");
     Features.push_back("-fp-armv8");
     break;
-  case 3:
+  case ARM::FV_VFPV3_FP16:
+    Features.push_back("+vfp3");
+    Features.push_back("+fp16");
+    Features.push_back("-vfp4");
+    Features.push_back("-fp-armv8");
+    break;
+  case ARM::FV_VFPV3:
     Features.push_back("+vfp3");
     Features.push_back("-fp16");
     Features.push_back("-vfp4");
     Features.push_back("-fp-armv8");
     break;
-  case 2:
+  case ARM::FV_VFPV2:
     Features.push_back("+vfp2");
     Features.push_back("-vfp3");
     Features.push_back("-fp16");
     Features.push_back("-vfp4");
     Features.push_back("-fp-armv8");
     break;
-  case 0:
+  case ARM::FV_NONE:
     Features.push_back("-vfp2");
     Features.push_back("-vfp3");
     Features.push_back("-fp16");
diff --git a/lib/Support/TimeValue.cpp b/lib/Support/TimeValue.cpp
index caa5b5a..136b93e 100644
--- a/lib/Support/TimeValue.cpp
+++ b/lib/Support/TimeValue.cpp
@@ -45,7 +45,7 @@ TimeValue::normalize( void ) {
   }
 }
 
-} // namespace llvm
+}
 
 /// Include the platform-specific portion of TimeValue class
 #ifdef LLVM_ON_UNIX
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 0ad253b..d7b6515 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -50,7 +50,7 @@ namespace {
   InfoOutputFilename("info-output-file", cl::value_desc("filename"),
                      cl::desc("File to append -stats and -timer output to"),
                    cl::Hidden, cl::location(getLibSupportInfoOutputFilename()));
-} // namespace
+}
 
 // CreateInfoOutputFile - Return a file stream to print our output on.
 raw_ostream *llvm::CreateInfoOutputFile() {
@@ -218,7 +218,7 @@ public:
   }
 };
 
-} // namespace
+}
 
 static ManagedStatic<Name2TimerMap> NamedTimers;
 static ManagedStatic<Name2PairMap> NamedGroupedTimers;
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 072d4a0..92be0e0 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -60,6 +60,8 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case spir64:      return "spir64";
   case kalimba:     return "kalimba";
   case shave:       return "shave";
+  case wasm32:      return "wasm32";
+  case wasm64:      return "wasm64";
   }
 
   llvm_unreachable("Invalid ArchType!");
@@ -122,6 +124,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case spir64:      return "spir";
   case kalimba:     return "kalimba";
   case shave:       return "shave";
+  case wasm32:      return "wasm32";
+  case wasm64:      return "wasm64";
   }
 }
 
@@ -255,6 +259,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("spir64", spir64)
     .Case("kalimba", kalimba)
     .Case("shave", shave)
+    .Case("wasm32", wasm32)
+    .Case("wasm64", wasm64)
     .Default(UnknownArch);
 }
 
@@ -360,6 +366,8 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("spir64", Triple::spir64)
     .StartsWith("kalimba", Triple::kalimba)
     .Case("shave", Triple::shave)
+    .Case("wasm32", Triple::wasm32)
+    .Case("wasm64", Triple::wasm64)
     .Default(Triple::UnknownArch);
 }
 
@@ -1009,6 +1017,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::spir:
   case llvm::Triple::kalimba:
   case llvm::Triple::shave:
+  case llvm::Triple::wasm32:
     return 32;
 
   case llvm::Triple::aarch64:
@@ -1028,6 +1037,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::amdil64:
   case llvm::Triple::hsail64:
   case llvm::Triple::spir64:
+  case llvm::Triple::wasm64:
     return 64;
   }
   llvm_unreachable("Invalid architecture value");
@@ -1081,6 +1091,7 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::x86:
   case Triple::xcore:
   case Triple::shave:
+  case Triple::wasm32:
     // Already 32-bit.
     break;
 
@@ -1094,6 +1105,7 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::amdil64:   T.setArch(Triple::amdil);   break;
   case Triple::hsail64:   T.setArch(Triple::hsail);   break;
   case Triple::spir64:    T.setArch(Triple::spir);    break;
+  case Triple::wasm64:    T.setArch(Triple::wasm32);  break;
   }
   return T;
 }
@@ -1134,6 +1146,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::sparcv9:
   case Triple::systemz:
   case Triple::x86_64:
+  case Triple::wasm64:
     // Already 64-bit.
     break;
 
@@ -1147,6 +1160,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::amdil:   T.setArch(Triple::amdil64);   break;
   case Triple::hsail:   T.setArch(Triple::hsail64);   break;
   case Triple::spir:    T.setArch(Triple::spir64);    break;
+  case Triple::wasm32:  T.setArch(Triple::wasm64);    break;
   }
   return T;
 }
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index b15cedd..df13bd2 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -205,7 +205,7 @@ private:
   int &FD;
   bool KeepOpen;
 };
-} // namespace
+}
 
 std::error_code Process::FixupStandardFileDescriptors() {
   int NullFD = -1;
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index dc633ab..8947b62 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -176,7 +176,7 @@ static void SetMemoryLimits (unsigned size)
 #endif
 }
 
-} // namespace llvm
+}
 
 static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
                     const char **envp, const StringRef **redirects,
@@ -473,4 +473,4 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
   }
   return true;
 }
-} // namespace llvm
+}
diff --git a/lib/Support/Unix/ThreadLocal.inc b/lib/Support/Unix/ThreadLocal.inc
index a04dd3e..31c3f38 100644
--- a/lib/Support/Unix/ThreadLocal.inc
+++ b/lib/Support/Unix/ThreadLocal.inc
@@ -56,7 +56,7 @@ void ThreadLocalImpl::removeInstance() {
   setInstance(nullptr);
 }
 
-} // namespace llvm
+}
 #else
 namespace llvm {
 using namespace sys;
diff --git a/lib/Support/Unix/TimeValue.inc b/lib/Support/Unix/TimeValue.inc
index 2c4f04c..042e0da 100644
--- a/lib/Support/Unix/TimeValue.inc
+++ b/lib/Support/Unix/TimeValue.inc
@@ -51,4 +51,4 @@ TimeValue TimeValue::now() {
       NANOSECONDS_PER_MICROSECOND ) );
 }
 
-} // namespace llvm
+}
diff --git a/lib/Support/Unix/Watchdog.inc b/lib/Support/Unix/Watchdog.inc
index 9e335aa..5d89c0e 100644
--- a/lib/Support/Unix/Watchdog.inc
+++ b/lib/Support/Unix/Watchdog.inc
@@ -28,5 +28,5 @@ namespace llvm {
       alarm(0);
 #endif
     }
-  } // namespace sys
-} // namespace llvm
+  }
+}
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index 79d5f79..d38f197 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -23,14 +23,6 @@
  #include <ntverp.h>
 #endif
 
-#ifdef __MINGW32__
- #if (HAVE_LIBIMAGEHLP != 1)
-  #error "libimagehlp.a should be present"
- #endif
-#else
- #pragma comment(lib, "dbghelp.lib")
-#endif
-
 namespace llvm {
 using namespace sys;
 
@@ -39,10 +31,21 @@ using namespace sys;
 //===          and must not be UNIX code.
 //===----------------------------------------------------------------------===//
 
+typedef BOOL (WINAPI *fpEnumerateLoadedModules)(HANDLE,PENUMLOADED_MODULES_CALLBACK64,PVOID);
+static fpEnumerateLoadedModules fEnumerateLoadedModules;
 static DenseSet<HMODULE> *OpenedHandles;
 
+static bool loadDebugHelp(void) {
+  HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll");
+  if (hLib) {
+    fEnumerateLoadedModules = (fpEnumerateLoadedModules)
+      ::GetProcAddress(hLib, "EnumerateLoadedModules64");
+  }
+  return fEnumerateLoadedModules != 0;
+}
+
 static BOOL CALLBACK
-ELM_Callback(WIN32_ELMCB_PCSTR ModuleName, ULONG_PTR ModuleBase,
+ELM_Callback(WIN32_ELMCB_PCSTR ModuleName, DWORD64 ModuleBase,
              ULONG ModuleSize, PVOID UserContext) {
   OpenedHandles->insert((HMODULE)ModuleBase);
   return TRUE;
@@ -57,7 +60,14 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
     if (OpenedHandles == 0)
       OpenedHandles = new DenseSet<HMODULE>();
 
-    EnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0);
+    if (!fEnumerateLoadedModules) {
+      if (!loadDebugHelp()) {
+        assert(false && "These APIs should always be available");
+        return DynamicLibrary();
+      }
+    }
+
+    fEnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0);
     // Dummy library that represents "search all handles".
     // This is mostly to ensure that the return value still shows up as "valid".
     return DynamicLibrary(&OpenedHandles);
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 6006499..5c8c239 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -31,10 +31,9 @@
 
 #ifdef _MSC_VER
  #pragma comment(lib, "psapi.lib")
- #pragma comment(lib, "dbghelp.lib")
 #elif __MINGW32__
- #if ((HAVE_LIBIMAGEHLP != 1) || (HAVE_LIBPSAPI != 1))
-  #error "libimagehlp.a & libpsapi.a should be present"
+ #if (HAVE_LIBPSAPI != 1)
+  #error "libpsapi.a should be present"
  #endif
  // The version of g++ that comes with MinGW does *not* properly understand
  // the ll format specifier for printf. However, MinGW passes the format
@@ -103,6 +102,8 @@
    DWORD64     Reserved[3];
    KDHELP64    KdHelp;
  } STACKFRAME64, *LPSTACKFRAME64;
+ #endif // !defined(__MINGW64_VERSION_MAJOR)
+#endif // __MINGW32__
 
 typedef BOOL (__stdcall *PREAD_PROCESS_MEMORY_ROUTINE64)(HANDLE hProcess,
                       DWORD64 qwBaseAddress, PVOID lpBuffer, DWORD nSize,
@@ -122,40 +123,46 @@ typedef BOOL (WINAPI *fpStackWalk64)(DWORD, HANDLE, HANDLE, LPSTACKFRAME64,
                       PFUNCTION_TABLE_ACCESS_ROUTINE64,
                       PGET_MODULE_BASE_ROUTINE64,
                       PTRANSLATE_ADDRESS_ROUTINE64);
-static fpStackWalk64 StackWalk64;
+static fpStackWalk64 fStackWalk64;
 
 typedef DWORD64 (WINAPI *fpSymGetModuleBase64)(HANDLE, DWORD64);
-static fpSymGetModuleBase64 SymGetModuleBase64;
+static fpSymGetModuleBase64 fSymGetModuleBase64;
 
 typedef BOOL (WINAPI *fpSymGetSymFromAddr64)(HANDLE, DWORD64,
                       PDWORD64, PIMAGEHLP_SYMBOL64);
-static fpSymGetSymFromAddr64 SymGetSymFromAddr64;
+static fpSymGetSymFromAddr64 fSymGetSymFromAddr64;
 
 typedef BOOL (WINAPI *fpSymGetLineFromAddr64)(HANDLE, DWORD64,
                       PDWORD, PIMAGEHLP_LINE64);
-static fpSymGetLineFromAddr64 SymGetLineFromAddr64;
+static fpSymGetLineFromAddr64 fSymGetLineFromAddr64;
 
 typedef PVOID (WINAPI *fpSymFunctionTableAccess64)(HANDLE, DWORD64);
-static fpSymFunctionTableAccess64 SymFunctionTableAccess64;
+static fpSymFunctionTableAccess64 fSymFunctionTableAccess64;
+
+typedef DWORD (WINAPI *fpSymSetOptions)(DWORD);
+static fpSymSetOptions fSymSetOptions;
+
+typedef BOOL (WINAPI *fpSymInitialize)(HANDLE, PCSTR, BOOL);
+static fpSymInitialize fSymInitialize;
 
 static bool load64BitDebugHelp(void) {
   HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll");
   if (hLib) {
-    StackWalk64 = (fpStackWalk64)
+    fStackWalk64 = (fpStackWalk64)
                       ::GetProcAddress(hLib, "StackWalk64");
-    SymGetModuleBase64 = (fpSymGetModuleBase64)
+    fSymGetModuleBase64 = (fpSymGetModuleBase64)
                       ::GetProcAddress(hLib, "SymGetModuleBase64");
-    SymGetSymFromAddr64 = (fpSymGetSymFromAddr64)
+    fSymGetSymFromAddr64 = (fpSymGetSymFromAddr64)
                       ::GetProcAddress(hLib, "SymGetSymFromAddr64");
-    SymGetLineFromAddr64 = (fpSymGetLineFromAddr64)
+    fSymGetLineFromAddr64 = (fpSymGetLineFromAddr64)
                       ::GetProcAddress(hLib, "SymGetLineFromAddr64");
-    SymFunctionTableAccess64 = (fpSymFunctionTableAccess64)
+    fSymFunctionTableAccess64 = (fpSymFunctionTableAccess64)
                      ::GetProcAddress(hLib, "SymFunctionTableAccess64");
+    fSymSetOptions = (fpSymSetOptions)::GetProcAddress(hLib, "SymSetOptions");
+    fSymInitialize = (fpSymInitialize)::GetProcAddress(hLib, "SymInitialize");
   }
-  return StackWalk64 != NULL;
+  return fStackWalk64 && fSymInitialize && fSymSetOptions;
 }
- #endif // !defined(__MINGW64_VERSION_MAJOR)
-#endif // __MINGW32__
 
 // Forward declare.
 static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep);
@@ -187,12 +194,12 @@ static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
 #endif
 
   // Initialize the symbol handler.
-  SymSetOptions(SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES);
-  SymInitialize(hProcess, NULL, TRUE);
+  fSymSetOptions(SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES);
+  fSymInitialize(hProcess, NULL, TRUE);
 
   while (true) {
-    if (!StackWalk64(machineType, hProcess, hThread, &StackFrame, Context, NULL,
-                     SymFunctionTableAccess64, SymGetModuleBase64, NULL)) {
+    if (!fStackWalk64(machineType, hProcess, hThread, &StackFrame, Context, 0,
+                      fSymFunctionTableAccess64, fSymGetModuleBase64, 0)) {
       break;
     }
 
@@ -221,7 +228,7 @@ static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
             static_cast<DWORD>(StackFrame.Params[3]));
 #endif
     // Verify the PC belongs to a module in this process.
-    if (!SymGetModuleBase64(hProcess, PC)) {
+    if (!fSymGetModuleBase64(hProcess, PC)) {
       OS << " <unknown module>\n";
       continue;
     }
@@ -234,7 +241,7 @@ static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
     symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL64);
 
     DWORD64 dwDisp;
-    if (!SymGetSymFromAddr64(hProcess, PC, &dwDisp, symbol)) {
+    if (!fSymGetSymFromAddr64(hProcess, PC, &dwDisp, symbol)) {
       OS << '\n';
       continue;
     }
@@ -250,7 +257,7 @@ static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
     IMAGEHLP_LINE64 line = {};
     DWORD dwLineDisp;
     line.SizeOfStruct = sizeof(line);
-    if (SymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) {
+    if (fSymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) {
       OS << format(", %s, line %lu", line.FileName, line.LineNumber);
       if (dwLineDisp > 0)
         OS << format(" + 0x%lX byte(s)", dwLineDisp);
@@ -301,17 +308,13 @@ static void InitializeThreading() {
 }
 
 static void RegisterHandler() {
-#if __MINGW32__ && !defined(__MINGW64_VERSION_MAJOR)
-  // On MinGW.org, we need to load up the symbols explicitly, because the
-  // Win32 framework they include does not have support for the 64-bit
-  // versions of the APIs we need.  If we cannot load up the APIs (which
-  // would be unexpected as they should exist on every version of Windows
-  // we support), we will bail out since there would be nothing to report.
+  // If we cannot load up the APIs (which would be unexpected as they should
+  // exist on every version of Windows we support), we will bail out since
+  // there would be nothing to report.
   if (!load64BitDebugHelp()) {
     assert(false && "These APIs should always be available");
     return;
   }
-#endif
 
   if (RegisteredUnhandledExceptionFilter) {
     EnterCriticalSection(&CriticalSection);
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 5ca28a0..d55da5e 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -144,8 +144,8 @@ struct Token : ilist_node<Token> {
 
   Token() : Kind(TK_Error) {}
 };
-} // namespace yaml
-} // namespace llvm
+}
+}
 
 namespace llvm {
 template<>
@@ -178,7 +178,7 @@ struct ilist_node_traits<Token> {
 
   BumpPtrAllocator Alloc;
 };
-} // namespace llvm
+}
 
 typedef ilist<Token> TokenQueueT;
 
@@ -203,7 +203,7 @@ struct SimpleKey {
     return Tok == Other.Tok;
   }
 };
-} // namespace
+}
 
 /// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
 ///        subsequence and the subsequence's length in code units (uint8_t).
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 97e796c..6e982bf 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -1574,13 +1574,9 @@ void Record::checkName() {
 }
 
 DefInit *Record::getDefInit() {
-  static DenseMap<Record *, std::unique_ptr<DefInit>> ThePool;
-  if (TheInit)
-    return TheInit;
-
-  std::unique_ptr<DefInit> &I = ThePool[this];
-  if (!I) I.reset(new DefInit(this, new RecordRecTy(this)));
-  return I.get();
+  if (!TheInit)
+    TheInit.reset(new DefInit(this, new RecordRecTy(this)));
+  return TheInit.get();
 }
 
 const std::string &Record::getName() const {
diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
index d97d1ca..cbc30be 100644
--- a/lib/TableGen/TGLexer.h
+++ b/lib/TableGen/TGLexer.h
@@ -60,7 +60,7 @@ namespace tgtok {
     // String valued tokens.
     Id, StrVal, VarName, CodeFragment
   };
-} // namespace tgtok
+}
 
 /// TGLexer - TableGen Lexer class.
 class TGLexer {
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 6c5a083..bffd9e6 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -148,7 +148,7 @@ private:
   Color getColor(unsigned Register);
   Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L);
 };
-} // namespace
+}
 
 char AArch64A57FPLoadBalancing::ID = 0;
 
diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index 176403c..d973234 100644
--- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -102,7 +102,7 @@ public:
   }
 };
 char AArch64BranchRelaxation::ID = 0;
-} // namespace
+}
 
 /// verify - check BBOffsets, BBSizes, alignment of islands
 void AArch64BranchRelaxation::verify() {
diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h
index efc328a..1e2d1c3 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/lib/Target/AArch64/AArch64CallingConvention.h
@@ -136,6 +136,6 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
 }
 
-} // namespace
+}
 
 #endif
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 11eefc4..06ff9af 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -135,7 +135,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
-} // namespace
+}
 
 char LDTLSCleanup::ID = 0;
 FunctionPass *llvm::createAArch64CleanupLocalDynamicTLSPass() {
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index acb3525..c2470f7 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -43,7 +43,7 @@ private:
                     unsigned BitSize);
 };
 char AArch64ExpandPseudo::ID = 0;
-} // namespace
+}
 
 /// \brief Transfer implicit operands on the pseudo instruction to the
 /// instructions created from the expansion.
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index d1523e8..c19fcdc 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -36,6 +36,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
@@ -1678,7 +1679,7 @@ unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 
 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
                                    bool WantZExt, MachineMemOperand *MMO) {
-  if(!TLI.allowsMisalignedMemoryAccesses(VT))
+  if (!TLI.allowsMisalignedMemoryAccesses(VT))
     return 0;
 
   // Simplify this down to something we can handle.
@@ -1965,7 +1966,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
 
 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
                                 MachineMemOperand *MMO) {
-  if(!TLI.allowsMisalignedMemoryAccesses(VT))
+  if (!TLI.allowsMisalignedMemoryAccesses(VT))
     return false;
 
   // Simplify this down to something we can handle.
@@ -3070,9 +3071,9 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
   bool IsTailCall     = CLI.IsTailCall;
   bool IsVarArg       = CLI.IsVarArg;
   const Value *Callee = CLI.Callee;
-  const char *SymName = CLI.SymName;
+  MCSymbol *Symbol = CLI.Symbol;
 
-  if (!Callee && !SymName)
+  if (!Callee && !Symbol)
     return false;
 
   // Allow SelectionDAG isel to handle tail calls.
@@ -3134,8 +3135,8 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
   if (CM == CodeModel::Small) {
     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
-    if (SymName)
-      MIB.addExternalSymbol(SymName, 0);
+    if (Symbol)
+      MIB.addSym(Symbol, 0);
     else if (Addr.getGlobalValue())
       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
     else if (Addr.getReg()) {
@@ -3145,18 +3146,18 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
       return false;
   } else {
     unsigned CallReg = 0;
-    if (SymName) {
+    if (Symbol) {
       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
               ADRPReg)
-        .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
+          .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 
       CallReg = createResultReg(&AArch64::GPR64RegClass);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
-              CallReg)
-        .addReg(ADRPReg)
-        .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
-                           AArch64II::MO_NC);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(AArch64::LDRXui), CallReg)
+          .addReg(ADRPReg)
+          .addSym(Symbol,
+                  AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
     } else if (Addr.getGlobalValue())
       CallReg = materializeGV(Addr.getGlobalValue());
     else if (Addr.getReg())
@@ -3460,7 +3461,8 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     }
 
     CallLoweringInfo CLI;
-    CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(),
+    MCContext &Ctx = MF->getContext();
+    CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
                   TLI.getLibcallName(LC), std::move(Args));
     if (!lowerCallTo(CLI))
       return false;
@@ -4734,7 +4736,8 @@ bool AArch64FastISel::selectFRem(const Instruction *I) {
   }
 
   CallLoweringInfo CLI;
-  CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(),
+  MCContext &Ctx = MF->getContext();
+  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
                 TLI.getLibcallName(LC), std::move(Args));
   if (!lowerCallTo(CLI))
     return false;
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index 11227ee..b496fcc 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -63,6 +63,6 @@ public:
                                             RegScavenger *RS) const override;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0165ef9..f3242cd 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1777,8 +1777,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
   if (N->getOpcode() != ISD::BUILD_VECTOR)
     return false;
 
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    SDNode *Elt = N->getOperand(i).getNode();
+  for (const SDValue &Elt : N->op_values()) {
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
       unsigned HalfSize = EltSize / 2;
@@ -6689,6 +6688,160 @@ bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
   return NumBits == 32 || NumBits == 64;
 }
 
+/// \brief Lower an interleaved load into a ldN intrinsic.
+///
+/// E.g. Lower an interleaved load (Factor = 2):
+///        %wide.vec = load <8 x i32>, <8 x i32>* %ptr
+///        %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6>  ; Extract even elements
+///        %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7>  ; Extract odd elements
+///
+///      Into:
+///        %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
+///        %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
+///        %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
+bool AArch64TargetLowering::lowerInterleavedLoad(
+    LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
+    ArrayRef<unsigned> Indices, unsigned Factor) const {
+  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+         "Invalid interleave factor");
+  assert(!Shuffles.empty() && "Empty shufflevector input");
+  assert(Shuffles.size() == Indices.size() &&
+         "Unmatched number of shufflevectors and indices");
+
+  const DataLayout *DL = getDataLayout();
+
+  VectorType *VecTy = Shuffles[0]->getType();
+  unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy);
+
+  // Skip illegal vector types.
+  if (VecSize != 64 && VecSize != 128)
+    return false;
+
+  // A pointer vector can not be the return type of the ldN intrinsics. Need to
+  // load integer vectors first and then convert to pointer vectors.
+  Type *EltTy = VecTy->getVectorElementType();
+  if (EltTy->isPointerTy())
+    VecTy = VectorType::get(DL->getIntPtrType(EltTy),
+                            VecTy->getVectorNumElements());
+
+  Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
+  Type *Tys[2] = {VecTy, PtrTy};
+  static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
+                                            Intrinsic::aarch64_neon_ld3,
+                                            Intrinsic::aarch64_neon_ld4};
+  Function *LdNFunc =
+      Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
+
+  IRBuilder<> Builder(LI);
+  Value *Ptr = Builder.CreateBitCast(LI->getPointerOperand(), PtrTy);
+
+  CallInst *LdN = Builder.CreateCall(LdNFunc, Ptr, "ldN");
+
+  // Replace uses of each shufflevector with the corresponding vector loaded
+  // by ldN.
+  for (unsigned i = 0; i < Shuffles.size(); i++) {
+    ShuffleVectorInst *SVI = Shuffles[i];
+    unsigned Index = Indices[i];
+
+    Value *SubVec = Builder.CreateExtractValue(LdN, Index);
+
+    // Convert the integer vector to pointer vector if the element is pointer.
+    if (EltTy->isPointerTy())
+      SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
+
+    SVI->replaceAllUsesWith(SubVec);
+  }
+
+  return true;
+}
+
+/// \brief Get a mask consisting of sequential integers starting from \p Start.
+///
+/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
+static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
+                                   unsigned NumElts) {
+  SmallVector<Constant *, 16> Mask;
+  for (unsigned i = 0; i < NumElts; i++)
+    Mask.push_back(Builder.getInt32(Start + i));
+
+  return ConstantVector::get(Mask);
+}
+
+/// \brief Lower an interleaved store into a stN intrinsic.
+///
+/// E.g. Lower an interleaved store (Factor = 3):
+///        %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
+///                                  <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+///        store <12 x i32> %i.vec, <12 x i32>* %ptr
+///
+///      Into:
+///        %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
+///        %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
+///        %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
+///        call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
+///
+/// Note that the new shufflevectors will be removed and we'll only generate one
+/// st3 instruction in CodeGen.
+bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
+                                                  ShuffleVectorInst *SVI,
+                                                  unsigned Factor) const {
+  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+         "Invalid interleave factor");
+
+  VectorType *VecTy = SVI->getType();
+  assert(VecTy->getVectorNumElements() % Factor == 0 &&
+         "Invalid interleaved store");
+
+  unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
+  Type *EltTy = VecTy->getVectorElementType();
+  VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
+
+  const DataLayout *DL = getDataLayout();
+  unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy);
+
+  // Skip illegal vector types.
+  if (SubVecSize != 64 && SubVecSize != 128)
+    return false;
+
+  Value *Op0 = SVI->getOperand(0);
+  Value *Op1 = SVI->getOperand(1);
+  IRBuilder<> Builder(SI);
+
+  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
+  // vectors to integer vectors.
+  if (EltTy->isPointerTy()) {
+    Type *IntTy = DL->getIntPtrType(EltTy);
+    unsigned NumOpElts =
+        dyn_cast<VectorType>(Op0->getType())->getVectorNumElements();
+
+    // Convert to the corresponding integer vector.
+    Type *IntVecTy = VectorType::get(IntTy, NumOpElts);
+    Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
+    Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
+
+    SubVecTy = VectorType::get(IntTy, NumSubElts);
+  }
+
+  Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
+  Type *Tys[2] = {SubVecTy, PtrTy};
+  static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
+                                             Intrinsic::aarch64_neon_st3,
+                                             Intrinsic::aarch64_neon_st4};
+  Function *StNFunc =
+      Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
+
+  SmallVector<Value *, 5> Ops;
+
+  // Split the shufflevector operands into sub vectors for the new stN call.
+  for (unsigned i = 0; i < Factor; i++)
+    Ops.push_back(Builder.CreateShuffleVector(
+        Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
+
+  Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy));
+  Builder.CreateCall(StNFunc, Ops);
+  return true;
+}
+
 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
                        unsigned AlignCheck) {
   return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index da42376..46298c0 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -305,6 +305,15 @@ public:
                      unsigned &RequiredAligment) const override;
   bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override;
 
+  unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
+
+  bool lowerInterleavedLoad(LoadInst *LI,
+                            ArrayRef<ShuffleVectorInst *> Shuffles,
+                            ArrayRef<unsigned> Indices,
+                            unsigned Factor) const override;
+  bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+                             unsigned Factor) const override;
+
   bool isLegalAddImmediate(int64_t) const override;
   bool isLegalICmpImmediate(int64_t) const override;
 
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 2c52f34..3f2e772 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -614,10 +614,15 @@ def move_vec_shift : Operand<i32> {
   let ParserMatchClass = MoveVecShifterOperand;
 }
 
-def AddSubImmOperand : AsmOperandClass {
-  let Name = "AddSubImm";
-  let ParserMethod = "tryParseAddSubImm";
-  let DiagnosticType = "AddSubSecondSource";
+let DiagnosticType = "AddSubSecondSource" in {
+  def AddSubImmOperand : AsmOperandClass {
+    let Name = "AddSubImm";
+    let ParserMethod = "tryParseAddSubImm";
+  }
+  def AddSubImmNegOperand : AsmOperandClass {
+    let Name = "AddSubImmNeg";
+    let ParserMethod = "tryParseAddSubImm";
+  }
 }
 // An ADD/SUB immediate shifter operand:
 //  second operand:
@@ -631,8 +636,17 @@ class addsub_shifted_imm<ValueType Ty>
   let MIOperandInfo = (ops i32imm, i32imm);
 }
 
+class addsub_shifted_imm_neg<ValueType Ty>
+    : Operand<Ty> {
+  let EncoderMethod = "getAddSubImmOpValue";
+  let ParserMatchClass = AddSubImmNegOperand;
+  let MIOperandInfo = (ops i32imm, i32imm);
+}
+
 def addsub_shifted_imm32 : addsub_shifted_imm<i32>;
 def addsub_shifted_imm64 : addsub_shifted_imm<i64>;
+def addsub_shifted_imm32_neg : addsub_shifted_imm_neg<i32>;
+def addsub_shifted_imm64_neg : addsub_shifted_imm_neg<i64>;
 
 class neg_addsub_shifted_imm<ValueType Ty>
     : Operand<Ty>, ComplexPattern<Ty, 2, "SelectNegArithImmed", [imm]> {
@@ -1633,7 +1647,7 @@ class AddSubRegAlias<string asm, Instruction inst, RegisterClass dstRegtype,
                 (inst dstRegtype:$dst, src1Regtype:$src1, src2Regtype:$src2,
                       shiftExt)>;
 
-multiclass AddSub<bit isSub, string mnemonic,
+multiclass AddSub<bit isSub, string mnemonic, string alias,
                   SDPatternOperator OpNode = null_frag> {
   let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
   // Add/Subtract immediate
@@ -1686,6 +1700,14 @@ multiclass AddSub<bit isSub, string mnemonic,
     let Inst{31} = 1;
   }
 
+  // add Rd, Rb, -imm -> sub Rd, Rn, imm
+  def : InstAlias<alias#" $Rd, $Rn, $imm",
+                  (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn,
+                      addsub_shifted_imm32_neg:$imm), 0>;
+  def : InstAlias<alias#" $Rd, $Rn, $imm",
+                  (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn,
+                       addsub_shifted_imm64_neg:$imm), 0>;
+
   // Register/register aliases with no shift when SP is not used.
   def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"),
                        GPR32, GPR32, GPR32, 0>;
@@ -1706,7 +1728,8 @@ multiclass AddSub<bit isSub, string mnemonic,
                        GPR64sp, GPR64sponly, GPR64, 24>; // UXTX #0
 }
 
-multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> {
+multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
+                   string alias, string cmpAlias> {
   let isCompare = 1, Defs = [NZCV] in {
   // Add/Subtract immediate
   def Wri  : BaseAddSubImm<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32,
@@ -1752,6 +1775,14 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> {
   }
   } // Defs = [NZCV]
 
+  // Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm
+  def : InstAlias<alias#" $Rd, $Rn, $imm",
+                  (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn,
+                      addsub_shifted_imm32_neg:$imm), 0>;
+  def : InstAlias<alias#" $Rd, $Rn, $imm",
+                  (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn,
+                       addsub_shifted_imm64_neg:$imm), 0>;
+
   // Compare aliases
   def : InstAlias<cmp#" $src, $imm", (!cast<Instruction>(NAME#"Wri")
                   WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>;
@@ -1768,6 +1799,12 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> {
   def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrs")
                   XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>;
 
+  // Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm
+  def : InstAlias<cmpAlias#" $src, $imm", (!cast<Instruction>(NAME#"Wri")
+                  WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>;
+  def : InstAlias<cmpAlias#" $src, $imm", (!cast<Instruction>(NAME#"Xri")
+                  XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>;
+
   // Compare shorthands
   def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrs")
                   WZR, GPR32:$src1, GPR32:$src2, 0), 5>;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8d8864c..c0b3f2c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -96,15 +96,10 @@ bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                    SmallVectorImpl<MachineOperand> &Cond,
                                    bool AllowModify) const {
   // If the block has no terminators, it just falls into the block after it.
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
     return false;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return false;
-    --I;
-  }
+
   if (!isUnpredicatedTerminator(I))
     return false;
 
@@ -224,15 +219,10 @@ bool AArch64InstrInfo::ReverseBranchCondition(
 }
 
 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
     return 0;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return 0;
-    --I;
-  }
+
   if (!isUncondBranchOpcode(I->getOpcode()) &&
       !isCondBranchOpcode(I->getOpcode()))
     return 0;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 653f802..b73e095 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -567,8 +567,8 @@ def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
 def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
 
 // Add/subtract
-defm ADD : AddSub<0, "add", add>;
-defm SUB : AddSub<1, "sub">;
+defm ADD : AddSub<0, "add", "sub", add>;
+defm SUB : AddSub<1, "sub", "add">;
 
 def : InstAlias<"mov $dst, $src",
                 (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
@@ -579,8 +579,8 @@ def : InstAlias<"mov $dst, $src",
 def : InstAlias<"mov $dst, $src",
                 (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
 
-defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn">;
-defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp">;
+defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
+defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
 
 // Use SUBS instead of SUB to enable CSE between SUBS and SUB.
 def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index e55ae99..580427a 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -187,6 +187,9 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
   case MachineOperand::MO_ExternalSymbol:
     MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
     break;
+  case MachineOperand::MO_MCSymbol:
+    MCOp = LowerSymbolOperand(MO, MO.getMCSymbol());
+    break;
   case MachineOperand::MO_JumpTableIndex:
     MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
     break;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h
index 908f66f..1e29b80 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.h
+++ b/lib/Target/AArch64/AArch64MCInstLower.h
@@ -47,6 +47,6 @@ public:
   MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
   MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 2a0f0a4..536a8d0 100644
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -158,6 +158,6 @@ private:
   MILOHContainer LOHContainerSet;
   SetOfInstructions LOHRelated;
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index bab8463..5394875 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -154,7 +154,7 @@ bool haveSameParity(unsigned reg1, unsigned reg2) {
   return isOdd(reg1) == isOdd(reg2);
 }
 
-} // namespace
+}
 
 bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
                                                  unsigned Ra) {
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
index c83aea4..4f656f9 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.h
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
@@ -33,6 +33,6 @@ private:
   // Add constraints between existing chains
   void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra);
 };
-} // namespace llvm
+}
 
 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index a993b60..11932d2 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -28,6 +28,6 @@ public:
                                   unsigned Align, bool isVolatile,
                                   MachinePointerInfo DstPtrInfo) const override;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index e8165a8..1c6b157 100644
--- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -57,7 +57,7 @@ private:
   }
 };
 char AArch64StorePairSuppress::ID = 0;
-} // namespace
+} // anonymous
 
 FunctionPass *llvm::createAArch64StorePairSuppressPass() {
   return new AArch64StorePairSuppress();
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index c9b54cc..6bb0694 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -151,6 +151,6 @@ public:
 
   std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 5496a50..db6e244 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -225,6 +225,10 @@ void AArch64PassConfig::addIRPasses() {
 
   TargetPassConfig::addIRPasses();
 
+  // Match interleaved memory accesses to ldN/stN intrinsics.
+  if (TM->getOptLevel() != CodeGenOpt::None)
+    addPass(createInterleavedAccessPass(TM));
+
   if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
     // Call SeparateConstOffsetFromGEP pass to extract constants within indices
     // and lower a GEP with multiple indices to either arithmetic operations or
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ed27cf8..fc91c94 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -407,6 +407,26 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
   return LT.first;
 }
 
+unsigned AArch64TTIImpl::getInterleavedMemoryOpCost(
+    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+    unsigned Alignment, unsigned AddressSpace) {
+  assert(Factor >= 2 && "Invalid interleave factor");
+  assert(isa<VectorType>(VecTy) && "Expect a vector type");
+
+  if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
+    unsigned NumElts = VecTy->getVectorNumElements();
+    Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
+    unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy);
+
+    // ldN/stN only support legal vector types of size 64 or 128 in bits.
+    if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
+      return Factor;
+  }
+
+  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+                                           Alignment, AddressSpace);
+}
+
 unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
   unsigned Cost = 0;
   for (auto *I : Tys) {
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 25c22bc..4dabdad 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -139,6 +139,11 @@ public:
 
   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
 
+  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                      unsigned Factor,
+                                      ArrayRef<unsigned> Indices,
+                                      unsigned Alignment,
+                                      unsigned AddressSpace);
   /// @}
 };
 
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 063c053..38e8b4d 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -699,6 +699,25 @@ public:
     const MCConstantExpr *CE = cast<MCConstantExpr>(Expr);
     return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
   }
+  bool isAddSubImmNeg() const {
+    if (!isShiftedImm() && !isImm())
+      return false;
+
+    const MCExpr *Expr;
+
+    // An ADD/SUB shifter is either 'lsl #0' or 'lsl #12'.
+    if (isShiftedImm()) {
+      unsigned Shift = ShiftedImm.ShiftAmount;
+      Expr = ShiftedImm.Val;
+      if (Shift != 0 && Shift != 12)
+        return false;
+    } else
+      Expr = getImm();
+
+    // Otherwise it should be a real negative immediate in range:
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
+    return CE != nullptr && CE->getValue() < 0 && -CE->getValue() <= 0xfff;
+  }
   bool isCondCode() const { return Kind == k_CondCode; }
   bool isSIMDImmType10() const {
     if (!isImm())
@@ -1219,6 +1238,18 @@ public:
     }
   }
 
+  void addAddSubImmNegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+
+    const MCExpr *MCE = isShiftedImm() ? getShiftedImmVal() : getImm();
+    const MCConstantExpr *CE = cast<MCConstantExpr>(MCE);
+    int64_t Val = -CE->getValue();
+    unsigned ShiftAmt = isShiftedImm() ? ShiftedImm.ShiftAmount : 0;
+
+    Inst.addOperand(MCOperand::createImm(Val));
+    Inst.addOperand(MCOperand::createImm(ShiftAmt));
+  }
+
   void addCondCodeOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createImm(getCondCode()));
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 19544ac..15dee97 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -181,6 +181,6 @@ public:
   static const char *getRegisterName(unsigned RegNo,
                                      unsigned AltIdx = AArch64::NoRegAltName);
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 3e982ee..7624c72 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -293,7 +293,7 @@ enum CompactUnwindEncodings {
   UNWIND_AArch64_FRAME_D14_D15_PAIR = 0x00000800
 };
 
-} // namespace CU
+} // end CU namespace
 
 // FIXME: This should be in a separate file.
 class DarwinAArch64AsmBackend : public AArch64AsmBackend {
@@ -517,7 +517,7 @@ void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
   }
   AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel);
 }
-} // namespace
+}
 
 MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
                                               const MCRegisterInfo &MRI,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 807679f..1f516d1 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -34,7 +34,7 @@ protected:
 
 private:
 };
-} // namespace
+}
 
 AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
                                                bool IsLittleEndian)
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index bbcbf51..b5b1d1f 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -156,22 +156,12 @@ private:
   }
 
   void EmitMappingSymbol(StringRef Name) {
-    MCSymbol *Start = getContext().createTempSymbol();
-    EmitLabel(Start);
-
     auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
         Name + "." + Twine(MappingSymbolCounter++)));
-
-    getAssembler().registerSymbol(*Symbol);
+    EmitLabel(Symbol);
     Symbol->setType(ELF::STT_NOTYPE);
     Symbol->setBinding(ELF::STB_LOCAL);
     Symbol->setExternal(false);
-    auto Sec = getCurrentSection().first;
-    assert(Sec && "need a section");
-    Symbol->setSection(*Sec);
-
-    const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext());
-    Symbol->setVariableValue(Value);
   }
 
   int64_t MappingSymbolCounter;
@@ -213,4 +203,4 @@ createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
     return new AArch64TargetELFStreamer(S);
   return nullptr;
 }
-} // namespace llvm
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index ca56f63..3423844 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -65,7 +65,7 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
 MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S,
                                                     const MCSubtargetInfo &STI);
 
-} // namespace llvm
+} // End llvm namespace
 
 // Defines symbolic names for AArch64 registers.  This defines a mapping from
 // register name to register number.
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index b2f5bf3..741b273 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -38,7 +38,7 @@ public:
                         const MCFixup &Fixup, MCValue Target,
                         uint64_t &FixedValue) override;
 };
-} // namespace
+}
 
 bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
     const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym,
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 40071f6..7e42f8e 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -346,7 +346,7 @@ namespace AArch64AT {
     ATMapper();
   };
 
-} // namespace AArch64AT
+}
 namespace AArch64DB {
   enum DBValues {
     Invalid = -1,
@@ -369,7 +369,7 @@ namespace AArch64DB {
 
     DBarrierMapper();
   };
-} // namespace AArch64DB
+}
 
 namespace  AArch64DC {
   enum DCValues {
@@ -390,7 +390,7 @@ namespace  AArch64DC {
     DCMapper();
   };
 
-} // namespace AArch64DC
+}
 
 namespace  AArch64IC {
   enum ICValues {
@@ -410,7 +410,7 @@ namespace  AArch64IC {
   static inline bool NeedsRegister(ICValues Val) {
     return Val == IVAU;
   }
-} // namespace AArch64IC
+}
 
 namespace  AArch64ISB {
   enum ISBValues {
@@ -422,7 +422,7 @@ namespace  AArch64ISB {
 
     ISBMapper();
   };
-} // namespace AArch64ISB
+}
 
 namespace AArch64PRFM {
   enum PRFMValues {
@@ -452,7 +452,7 @@ namespace AArch64PRFM {
 
     PRFMMapper();
   };
-} // namespace AArch64PRFM
+}
 
 namespace AArch64PState {
   enum PStateValues {
@@ -471,7 +471,7 @@ namespace AArch64PState {
     PStateMapper();
   };
 
-} // namespace AArch64PState
+}
 
 namespace AArch64SE {
     enum ShiftExtSpecifiers {
@@ -492,7 +492,7 @@ namespace AArch64SE {
         SXTW,
         SXTX
     };
-} // namespace AArch64SE
+}
 
 namespace AArch64Layout {
     enum VectorLayout {
@@ -514,7 +514,7 @@ namespace AArch64Layout {
         VL_S,
         VL_D
     };
-} // namespace AArch64Layout
+}
 
 inline static const char *
 AArch64VectorLayoutToString(AArch64Layout::VectorLayout Layout) {
@@ -1221,7 +1221,7 @@ namespace AArch64SysReg {
   };
 
   uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
-} // namespace AArch64SysReg
+}
 
 namespace AArch64TLBI {
   enum TLBIValues {
@@ -1283,7 +1283,7 @@ namespace AArch64TLBI {
       return true;
     }
   }
-} // namespace AArch64TLBI 
+} 
 
 namespace AArch64II {
   /// Target Operand Flag enum.
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 2e7e39a..569ad38 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -141,6 +141,19 @@ class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
 
+class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping>
+                                 : SubtargetFeature <
+      "isaver"#Major#"."#Minor#"."#Stepping,
+      "IsaVersion",
+      "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
+      "Instruction set version number"
+>;
+
+def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0>;
+def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1>;
+def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0>;
+def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1>;
+
 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
         "localmemorysize"#Value,
         "LocalMemorySize",
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index afc6bcb..709d753 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -17,7 +17,9 @@
 //
 
 #include "AMDGPUAsmPrinter.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "InstPrinter/AMDGPUInstPrinter.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "AMDGPU.h"
 #include "AMDKernelCodeT.h"
 #include "AMDGPUSubtarget.h"
@@ -89,6 +91,15 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
                                    std::unique_ptr<MCStreamer> Streamer)
     : AsmPrinter(TM, std::move(Streamer)) {}
 
+void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
+  const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
+  SIProgramInfo KernelInfo;
+  if (STM.isAmdHsaOS()) {
+    getSIProgramInfo(KernelInfo, *MF);
+    EmitAmdKernelCodeT(*MF, KernelInfo);
+  }
+}
+
 void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
 
   // This label is used to mark the end of the .text section.
@@ -113,13 +124,18 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
   SIProgramInfo KernelInfo;
-  if (STM.isAmdHsaOS()) {
-    getSIProgramInfo(KernelInfo, MF);
-    EmitAmdKernelCodeT(MF, KernelInfo);
-    OutStreamer->EmitCodeAlignment(2 << (MF.getAlignment() - 1));
-  } else if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-    getSIProgramInfo(KernelInfo, MF);
-    EmitProgramInfoSI(MF, KernelInfo);
+  if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+    if (!STM.isAmdHsaOS()) {
+      getSIProgramInfo(KernelInfo, MF);
+      EmitProgramInfoSI(MF, KernelInfo);
+    }
+    // Emit directives
+    AMDGPUTargetStreamer *TS =
+        static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+    TS->EmitDirectiveHSACodeObjectVersion(1, 0);
+    AMDGPU::IsaVersion ISA = STM.getIsaVersion();
+    TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
+                                      "AMD", "AMDGPU");
   } else {
     EmitProgramInfoR600(MF);
   }
@@ -459,125 +475,28 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
 }
 
 void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
-                                        const SIProgramInfo &KernelInfo) const {
+                                         const SIProgramInfo &KernelInfo) const {
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
   amd_kernel_code_t header;
 
-  memset(&header, 0, sizeof(header));
-
-  header.amd_code_version_major = AMD_CODE_VERSION_MAJOR;
-  header.amd_code_version_minor = AMD_CODE_VERSION_MINOR;
-
-  header.struct_byte_size = sizeof(amd_kernel_code_t);
-
-  header.target_chip = STM.getAmdKernelCodeChipID();
-
-  header.kernel_code_entry_byte_offset = (1ULL << MF.getAlignment());
+  AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
 
   header.compute_pgm_resource_registers =
       KernelInfo.ComputePGMRSrc1 |
       (KernelInfo.ComputePGMRSrc2 << 32);
+  header.code_properties =
+      AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
+      AMD_CODE_PROPERTY_IS_PTR64;
 
-  // Code Properties:
-  header.code_properties = AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
-                           AMD_CODE_PROPERTY_IS_PTR64;
-
-  if (KernelInfo.FlatUsed)
-    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
-
-  if (KernelInfo.ScratchBlocks)
-    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
-
-  header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
-  header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
-
-  // MFI->ABIArgOffset is the number of bytes for the kernel arguments
-  // plus 36.  36 is the number of bytes reserved at the begining of the
-  // input buffer to store work-group size information.
-  // FIXME: We should be adding the size of the implicit arguments
-  // to this value.
   header.kernarg_segment_byte_size = MFI->ABIArgOffset;
-
   header.wavefront_sgpr_count = KernelInfo.NumSGPR;
   header.workitem_vgpr_count = KernelInfo.NumVGPR;
 
-  // FIXME: What values do I put for these alignments
-  header.kernarg_segment_alignment = 0;
-  header.group_segment_alignment = 0;
-  header.private_segment_alignment = 0;
-
-  header.code_type = 1; // HSA_EXT_CODE_KERNEL
-
-  header.wavefront_size = STM.getWavefrontSize();
-
-  MCSectionELF *VersionSection =
-      OutContext.getELFSection(".hsa.version", ELF::SHT_PROGBITS, 0);
-  OutStreamer->SwitchSection(VersionSection);
-  OutStreamer->EmitBytes(Twine("HSA Code Unit:" +
-                         Twine(header.hsail_version_major) + "." +
-                         Twine(header.hsail_version_minor) + ":" +
-                         "AMD:" +
-                         Twine(header.amd_code_version_major) + "." +
-                         Twine(header.amd_code_version_minor) +  ":" +
-                         "GFX8.1:0").str());
-
-  OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
-
-  if (isVerbose()) {
-    OutStreamer->emitRawComment("amd_code_version_major = " +
-                                Twine(header.amd_code_version_major), false);
-    OutStreamer->emitRawComment("amd_code_version_minor = " +
-                                Twine(header.amd_code_version_minor), false);
-    OutStreamer->emitRawComment("struct_byte_size = " +
-                                Twine(header.struct_byte_size), false);
-    OutStreamer->emitRawComment("target_chip = " +
-                                Twine(header.target_chip), false);
-    OutStreamer->emitRawComment(" compute_pgm_rsrc1: " +
-                                Twine::utohexstr(KernelInfo.ComputePGMRSrc1),
-                                false);
-    OutStreamer->emitRawComment(" compute_pgm_rsrc2: " +
-                                Twine::utohexstr(KernelInfo.ComputePGMRSrc2),
-                                false);
-    OutStreamer->emitRawComment("enable_sgpr_private_segment_buffer = " +
-      Twine((bool)(header.code_properties &
-                   AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)), false);
-    OutStreamer->emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
-      Twine((bool)(header.code_properties &
-                   AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR)), false);
-    OutStreamer->emitRawComment("private_element_size = 2 ", false);
-    OutStreamer->emitRawComment("is_ptr64 = " +
-        Twine((bool)(header.code_properties & AMD_CODE_PROPERTY_IS_PTR64)), false);
-    OutStreamer->emitRawComment("workitem_private_segment_byte_size = " +
-                                Twine(header.workitem_private_segment_byte_size),
-                                false);
-    OutStreamer->emitRawComment("workgroup_group_segment_byte_size = " +
-                                Twine(header.workgroup_group_segment_byte_size),
-                                false);
-    OutStreamer->emitRawComment("gds_segment_byte_size = " +
-                                Twine(header.gds_segment_byte_size), false);
-    OutStreamer->emitRawComment("kernarg_segment_byte_size = " +
-                                Twine(header.kernarg_segment_byte_size), false);
-    OutStreamer->emitRawComment("wavefront_sgpr_count = " +
-                                Twine(header.wavefront_sgpr_count), false);
-    OutStreamer->emitRawComment("workitem_vgpr_count = " +
-                                Twine(header.workitem_vgpr_count), false);
-    OutStreamer->emitRawComment("code_type = " + Twine(header.code_type), false);
-    OutStreamer->emitRawComment("wavefront_size = " +
-                                Twine((int)header.wavefront_size), false);
-    OutStreamer->emitRawComment("optimization_level = " +
-                                Twine(header.optimization_level), false);
-    OutStreamer->emitRawComment("hsail_profile = " +
-                                Twine(header.hsail_profile), false);
-    OutStreamer->emitRawComment("hsail_machine_model = " +
-                                Twine(header.hsail_machine_model), false);
-    OutStreamer->emitRawComment("hsail_version_major = " +
-                                Twine(header.hsail_version_major), false);
-    OutStreamer->emitRawComment("hsail_version_minor = " +
-                                Twine(header.hsail_version_minor), false);
-  }
 
-  OutStreamer->EmitBytes(StringRef((char*)&header, sizeof(header)));
+  AMDGPUTargetStreamer *TS =
+      static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+  TS->EmitAMDKernelCodeT(header);
 }
 
 bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 9207251..345af9b 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -97,6 +97,8 @@ public:
   /// Implemented in AMDGPUMCInstLower.cpp
   void EmitInstruction(const MachineInstr *MI) override;
 
+  void EmitFunctionBodyStart() override;
+
   void EmitEndOfAsmFile(Module &M) override;
 
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -108,6 +110,6 @@ protected:
   size_t DisasmLineMaxLen;
 };
 
-} // namespace llvm
+} // End anonymous llvm
 
 #endif
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 570473d..d56838e 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -68,7 +68,7 @@ public:
 };
 
 int DiagnosticInfoUnsupported::KindID = 0;
-} // namespace
+}
 
 
 static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 31ae9a3..86d3962 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -198,7 +198,7 @@ namespace AMDGPU {
   int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
 }  // End namespace AMDGPU
 
-} // namespace llvm
+} // End llvm namespace
 
 #define AMDGPU_FLAG_REGISTER_LOAD  (UINT64_C(1) << 63)
 #define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index e17b41a..f5e4694 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -41,5 +41,5 @@ public:
   bool IsKernel;
 };
 
-} // namespace llvm
+}
 #endif
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 605ccd0..0779d1d 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -72,6 +72,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
       WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
       EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
       GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
+      IsaVersion(ISAVersion0_0_0),
       FrameLowering(TargetFrameLowering::StackGrowsUp,
                     64 * 16, // Maximum stack alignment (long16)
                     0),
@@ -109,6 +110,10 @@ unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const {
   }
 }
 
+AMDGPU::IsaVersion AMDGPUSubtarget::getIsaVersion() const {
+  return AMDGPU::getIsaVersion(getFeatureBits());
+}
+
 bool AMDGPUSubtarget::isVGPRSpillingEnabled(
                                        const SIMachineFunctionInfo *MFI) const {
   return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
@@ -131,3 +136,4 @@ void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
     Policy.OnlyBottomUp = false;
   }
 }
+
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 0d40d14..30f50eb 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -20,6 +20,8 @@
 #include "AMDGPUIntrinsicInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "R600ISelLowering.h"
+#include "AMDKernelCodeT.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -48,6 +50,14 @@ public:
     FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
   };
 
+  enum {
+    ISAVersion0_0_0,
+    ISAVersion7_0_0,
+    ISAVersion7_0_1,
+    ISAVersion8_0_0,
+    ISAVersion8_0_1
+  };
+
 private:
   std::string DevName;
   bool Is64bit;
@@ -77,6 +87,7 @@ private:
   bool CIInsts;
   bool FeatureDisable;
   int LDSBankCount;
+  unsigned IsaVersion; 
 
   AMDGPUFrameLowering FrameLowering;
   std::unique_ptr<AMDGPUTargetLowering> TLInfo;
@@ -236,6 +247,8 @@ public:
 
   unsigned getAmdKernelCodeChipID() const;
 
+  AMDGPU::IsaVersion getIsaVersion() const;
+
   bool enableMachineScheduler() const override {
     return true;
   }
@@ -275,6 +288,13 @@ public:
   bool enableSubRegLiveness() const override {
     return true;
   }
+
+  /// \brief Returns the offset in bytes from the start of the input buffer
+  ///        of the first explicit kernel argument.
+  unsigned getExplicitKernelArgOffset() const {
+    return isAmdHsaOS() ? 0 : 36;
+  }
+
 };
 
 } // End namespace llvm
diff --git a/lib/Target/AMDGPU/AMDKernelCodeT.h b/lib/Target/AMDGPU/AMDKernelCodeT.h
index eaffb85..a9ba60c 100644
--- a/lib/Target/AMDGPU/AMDKernelCodeT.h
+++ b/lib/Target/AMDGPU/AMDKernelCodeT.h
@@ -12,9 +12,12 @@
 #ifndef AMDKERNELCODET_H
 #define AMDKERNELCODET_H
 
+#include "llvm/MC/SubtargetFeature.h"
+
 #include <cstddef>
 #include <cstdint>
 
+#include "llvm/Support/Debug.h"
 //---------------------------------------------------------------------------//
 // AMD Kernel Code, and its dependencies                                     //
 //---------------------------------------------------------------------------//
@@ -142,7 +145,7 @@ enum amd_code_property_mask_t {
   /// the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
   /// is generally DWORD.
   ///
-  /// Use values from the amd_element_byte_size_t enum.
+  /// uSE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM.
   AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 11,
   AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
   AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
@@ -171,7 +174,11 @@ enum amd_code_property_mask_t {
   /// Indicate if code generated has support for debugging.
   AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 15,
   AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
-  AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT
+  AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT,
+
+  AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 15,
+  AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1,
+  AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT
 };
 
 /// @brief The hsa_ext_control_directives_t specifies the values for the HSAIL
@@ -369,7 +376,7 @@ typedef struct hsa_ext_control_directives_s {
 ///     Scratch Wave Offset must be added by the kernel code and moved to
 ///     SGPRn-4 for use as the FLAT SCRATCH BASE in flat memory instructions.
 ///
-///     The second SGPR is 32 bit byte size of a single work-item�s scratch
+///     The second SGPR is 32 bit byte size of a single work-item's scratch
 ///     memory usage. This is directly loaded from the dispatch packet Private
 ///     Segment Byte Size and rounded up to a multiple of DWORD.
 ///
@@ -385,7 +392,7 @@ typedef struct hsa_ext_control_directives_s {
 ///
 /// Private Segment Size (enable_sgpr_private_segment_size):
 ///   Number of User SGPR registers: 1. The 32 bit byte size of a single
-///   work-item�s scratch memory allocation. This is the value from the dispatch
+///   work-item's scratch memory allocation. This is the value from the dispatch
 ///   packet. Private Segment Byte Size rounded up by CP to a multiple of DWORD.
 ///
 ///   \todo [Does CP need to round this to >4 byte alignment?]
@@ -433,7 +440,7 @@ typedef struct hsa_ext_control_directives_s {
 ///   present
 ///
 /// Work-Group Info (enable_sgpr_workgroup_info):
-///   Number of System SGPR registers: 1. {first_wave, 14�b0000,
+///   Number of System SGPR registers: 1. {first_wave, 14'b0000,
 ///   ordered_append_term[10:0], threadgroup_size_in_waves[5:0]}
 ///
 /// Private Segment Wave Byte Offset
@@ -499,25 +506,14 @@ typedef struct hsa_ext_control_directives_s {
 /// Alternatively scalar loads can be used if the kernarg offset is uniform, as
 /// the kernarg segment is constant for the duration of the kernel execution.
 ///
-typedef struct amd_kernel_code_s {
-  /// The AMD major version of the Code Object. Must be the value
-  /// AMD_CODE_VERSION_MAJOR.
-  amd_code_version32_t amd_code_version_major;
 
-  /// The AMD minor version of the Code Object. Minor versions must be
-  /// backward compatible. Must be the value
-  /// AMD_CODE_VERSION_MINOR.
-  amd_code_version32_t amd_code_version_minor;
-
-  /// The byte size of this struct. Must be set to
-  /// sizeof(amd_kernel_code_t). Used for backward
-  /// compatibility.
-  uint32_t struct_byte_size;
-
-  /// The target chip instruction set for which code has been
-  /// generated. Values are from the E_SC_INSTRUCTION_SET enumeration
-  /// in sc/Interface/SCCommon.h.
-  uint32_t target_chip;
+typedef struct amd_kernel_code_s {
+  uint32_t amd_kernel_code_version_major;
+  uint32_t amd_kernel_code_version_minor;
+  uint16_t amd_machine_kind;
+  uint16_t amd_machine_version_major;
+  uint16_t amd_machine_version_minor;
+  uint16_t amd_machine_version_stepping;
 
   /// Byte offset (possibly negative) from start of amd_kernel_code_t
   /// object to kernel's entry point instruction. The actual code for
@@ -535,10 +531,6 @@ typedef struct amd_kernel_code_s {
   /// and size. The offset is from the start (possibly negative) of
   /// amd_kernel_code_t object. Set both to 0 if no prefetch
   /// information is available.
-  ///
-  /// \todo ttye 11/15/2013 Is the prefetch definition we want? Did
-  /// not make the size a uint64_t as prefetching more than 4GiB seems
-  /// excessive.
   int64_t kernel_code_prefetch_byte_offset;
   uint64_t kernel_code_prefetch_byte_size;
 
@@ -553,11 +545,11 @@ typedef struct amd_kernel_code_s {
 
   /// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
   /// COMPUTE_PGM_RSRC2 registers.
-  amd_compute_pgm_resource_register64_t compute_pgm_resource_registers;
+  uint64_t compute_pgm_resource_registers;
 
   /// Code properties. See amd_code_property_mask_t for a full list of
   /// properties.
-  amd_code_property32_t code_properties;
+  uint32_t code_properties;
 
   /// The amount of memory required for the combined private, spill
   /// and arg segments for a work-item in bytes. If
@@ -629,76 +621,21 @@ typedef struct amd_kernel_code_s {
   /// The maximum byte alignment of variables used by the kernel in
   /// the specified memory segment. Expressed as a power of two. Must
   /// be at least HSA_POWERTWO_16.
-  hsa_powertwo8_t kernarg_segment_alignment;
-  hsa_powertwo8_t group_segment_alignment;
-  hsa_powertwo8_t private_segment_alignment;
-
-  uint8_t reserved3;
-
-  /// Type of code object.
-  hsa_ext_code_kind32_t code_type;
-
-  /// Reserved for code properties if any are defined in the future.
-  /// There are currently no code properties so this field must be 0.
-  uint32_t reserved4;
+  uint8_t kernarg_segment_alignment;
+  uint8_t group_segment_alignment;
+  uint8_t private_segment_alignment;
 
   /// Wavefront size expressed as a power of two. Must be a power of 2
   /// in range 1..64 inclusive. Used to support runtime query that
   /// obtains wavefront size, which may be used by application to
   /// allocated dynamic group memory and set the dispatch work-group
   /// size.
-  hsa_powertwo8_t wavefront_size;
-
-  /// The optimization level specified when the kernel was
-  /// finalized.
-  uint8_t optimization_level;
-
-  /// The HSAIL profile defines which features are used. This
-  /// information is from the HSAIL version directive. If this
-  /// amd_kernel_code_t is not generated from an HSAIL compilation
-  /// unit then must be 0.
-  hsa_ext_brig_profile8_t hsail_profile;
-
-  /// The HSAIL machine model gives the address sizes used by the
-  /// code. This information is from the HSAIL version directive. If
-  /// not generated from an HSAIL compilation unit then must still
-  /// indicate for what machine mode the code is generated.
-  hsa_ext_brig_machine_model8_t hsail_machine_model;
-
-  /// The HSAIL major version. This information is from the HSAIL
-  /// version directive. If this amd_kernel_code_t is not
-  /// generated from an HSAIL compilation unit then must be 0.
-  uint32_t hsail_version_major;
-
-  /// The HSAIL minor version. This information is from the HSAIL
-  /// version directive. If this amd_kernel_code_t is not
-  /// generated from an HSAIL compilation unit then must be 0.
-  uint32_t hsail_version_minor;
-
-  /// Reserved for HSAIL target options if any are defined in the
-  /// future. There are currently no target options so this field
-  /// must be 0.
-  uint16_t reserved5;
-
-  /// Reserved. Must be 0.
-  uint16_t reserved6;
-
-  /// The values should be the actually values used by the finalizer
-  /// in generating the code. This may be the union of values
-  /// specified as finalizer arguments and explicit HSAIL control
-  /// directives. If the finalizer chooses to ignore a control
-  /// directive, and not generate constrained code, then the control
-  /// directive should not be marked as enabled even though it was
-  /// present in the HSAIL or finalizer argument. The values are
-  /// intended to reflect the constraints that the code actually
-  /// requires to correctly execute, not the values that were
-  /// actually specified at finalize time.
-  hsa_ext_control_directives_t control_directive;
-
-  /// The code can immediately follow the amd_kernel_code_t, or can
-  /// come after subsequent amd_kernel_code_t structs when there are
-  /// multiple kernels in the compilation unit.
+  uint8_t wavefront_size;
 
+  int32_t call_convention;
+  uint8_t reserved3[12];
+  uint64_t runtime_loader_kernel_symbol;
+  uint64_t control_directives[16];
 } amd_kernel_code_t;
 
 #endif // AMDKERNELCODET_H
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 80081d4..2018983 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "AMDKernelCodeT.h"
 #include "SIDefines.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallString.h"
@@ -314,6 +317,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
 
   /// }
 
+private:
+  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
+  bool ParseDirectiveHSACodeObjectVersion();
+  bool ParseDirectiveHSACodeObjectISA();
+  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
+  bool ParseDirectiveAMDKernelCodeT();
+
 public:
   AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
                const MCInstrInfo &MII,
@@ -329,6 +339,11 @@ public:
     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
   }
 
+  AMDGPUTargetStreamer &getTargetStreamer() {
+    MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
+    return static_cast<AMDGPUTargetStreamer &>(TS);
+  }
+
   unsigned getForcedEncodingSize() const {
     return ForcedEncodingSize;
   }
@@ -403,7 +418,7 @@ struct OptionalOperand {
   bool (*ConvertResult)(int64_t&);
 };
 
-} // namespace
+}
 
 static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
   if (IsVgpr) {
@@ -581,7 +596,304 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   llvm_unreachable("Implement any new match types added!");
 }
 
+bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
+                                               uint32_t &Minor) {
+  if (getLexer().isNot(AsmToken::Integer))
+    return TokError("invalid major version");
+
+  Major = getLexer().getTok().getIntVal();
+  Lex();
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("minor version number required, comma expected");
+  Lex();
+
+  if (getLexer().isNot(AsmToken::Integer))
+    return TokError("invalid minor version");
+
+  Minor = getLexer().getTok().getIntVal();
+  Lex();
+
+  return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
+
+  uint32_t Major;
+  uint32_t Minor;
+
+  if (ParseDirectiveMajorMinor(Major, Minor))
+    return true;
+
+  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
+  return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
+
+  uint32_t Major;
+  uint32_t Minor;
+  uint32_t Stepping;
+  StringRef VendorName;
+  StringRef ArchName;
+
+  // If this directive has no arguments, then use the ISA version for the
+  // targeted GPU.
+  if (getLexer().is(AsmToken::EndOfStatement)) {
+    AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(STI.getFeatureBits());
+    getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor,
+                                                      Isa.Stepping,
+                                                      "AMD", "AMDGPU");
+    return false;
+  }
+
+
+  if (ParseDirectiveMajorMinor(Major, Minor))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("stepping version number required, comma expected");
+  Lex();
+
+  if (getLexer().isNot(AsmToken::Integer))
+    return TokError("invalid stepping version");
+
+  Stepping = getLexer().getTok().getIntVal();
+  Lex();
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("vendor name required, comma expected");
+  Lex();
+
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("invalid vendor name");
+
+  VendorName = getLexer().getTok().getStringContents();
+  Lex();
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("arch name required, comma expected");
+  Lex();
+
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("invalid arch name");
+
+  ArchName = getLexer().getTok().getStringContents();
+  Lex();
+
+  getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
+                                                    VendorName, ArchName);
+  return false;
+}
+
+bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
+                                               amd_kernel_code_t &Header) {
+
+  if (getLexer().isNot(AsmToken::Equal))
+    return TokError("expected '='");
+  Lex();
+
+  if (getLexer().isNot(AsmToken::Integer))
+    return TokError("amd_kernel_code_t values must be integers");
+
+  uint64_t Value = getLexer().getTok().getIntVal();
+  Lex();
+
+  if (ID == "kernel_code_version_major")
+    Header.amd_kernel_code_version_major = Value;
+  else if (ID == "kernel_code_version_minor")
+    Header.amd_kernel_code_version_minor = Value;
+  else if (ID == "machine_kind")
+    Header.amd_machine_kind = Value;
+  else if (ID == "machine_version_major")
+    Header.amd_machine_version_major = Value;
+  else if (ID == "machine_version_minor")
+    Header.amd_machine_version_minor = Value;
+  else if (ID == "machine_version_stepping")
+    Header.amd_machine_version_stepping = Value;
+  else if (ID == "kernel_code_entry_byte_offset")
+    Header.kernel_code_entry_byte_offset = Value;
+  else if (ID == "kernel_code_prefetch_byte_size")
+    Header.kernel_code_prefetch_byte_size = Value;
+  else if (ID == "max_scratch_backing_memory_byte_size")
+    Header.max_scratch_backing_memory_byte_size = Value;
+  else if (ID == "compute_pgm_rsrc1_vgprs")
+    Header.compute_pgm_resource_registers |= S_00B848_VGPRS(Value);
+  else if (ID == "compute_pgm_rsrc1_sgprs")
+    Header.compute_pgm_resource_registers |= S_00B848_SGPRS(Value);
+  else if (ID == "compute_pgm_rsrc1_priority")
+    Header.compute_pgm_resource_registers |= S_00B848_PRIORITY(Value);
+  else if (ID == "compute_pgm_rsrc1_float_mode")
+    Header.compute_pgm_resource_registers |= S_00B848_FLOAT_MODE(Value);
+  else if (ID == "compute_pgm_rsrc1_priv")
+    Header.compute_pgm_resource_registers |= S_00B848_PRIV(Value);
+  else if (ID == "compute_pgm_rsrc1_dx10_clamp")
+    Header.compute_pgm_resource_registers |= S_00B848_DX10_CLAMP(Value);
+  else if (ID == "compute_pgm_rsrc1_debug_mode")
+    Header.compute_pgm_resource_registers |= S_00B848_DEBUG_MODE(Value);
+  else if (ID == "compute_pgm_rsrc1_ieee_mode")
+    Header.compute_pgm_resource_registers |= S_00B848_IEEE_MODE(Value);
+  else if (ID == "compute_pgm_rsrc2_scratch_en")
+    Header.compute_pgm_resource_registers |= (S_00B84C_SCRATCH_EN(Value) << 32);
+  else if (ID == "compute_pgm_rsrc2_user_sgpr")
+    Header.compute_pgm_resource_registers |= (S_00B84C_USER_SGPR(Value) << 32);
+  else if (ID == "compute_pgm_rsrc2_tgid_x_en")
+    Header.compute_pgm_resource_registers |= (S_00B84C_TGID_X_EN(Value) << 32);
+  else if (ID == "compute_pgm_rsrc2_tgid_y_en")
+    Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Y_EN(Value) << 32);
+  else if (ID == "compute_pgm_rsrc2_tgid_z_en")
+    Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Z_EN(Value) << 32);
+  else if (ID == "compute_pgm_rsrc2_tg_size_en")
+    Header.compute_pgm_resource_registers |= (S_00B84C_TG_SIZE_EN(Value) << 32);
+  else if (ID == "compute_pgm_rsrc2_tidig_comp_cnt")
+    Header.compute_pgm_resource_registers |=
+        (S_00B84C_TIDIG_COMP_CNT(Value) << 32);
+  else if (ID == "compute_pgm_rsrc2_excp_en_msb")
+    Header.compute_pgm_resource_registers |=
+        (S_00B84C_EXCP_EN_MSB(Value) << 32);
+  else if (ID == "compute_pgm_rsrc2_lds_size")
+    Header.compute_pgm_resource_registers |= (S_00B84C_LDS_SIZE(Value) << 32);
+  else if (ID == "compute_pgm_rsrc2_excp_en")
+    Header.compute_pgm_resource_registers |= (S_00B84C_EXCP_EN(Value) << 32);
+  else if (ID == "compute_pgm_resource_registers")
+    Header.compute_pgm_resource_registers = Value;
+  else if (ID == "enable_sgpr_private_segment_buffer")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT);
+  else if (ID == "enable_sgpr_dispatch_ptr")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT);
+  else if (ID == "enable_sgpr_queue_ptr")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT);
+  else if (ID == "enable_sgpr_kernarg_segment_ptr")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT);
+  else if (ID == "enable_sgpr_dispatch_id")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT);
+  else if (ID == "enable_sgpr_flat_scratch_init")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT);
+  else if (ID == "enable_sgpr_private_segment_size")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT);
+  else if (ID == "enable_sgpr_grid_workgroup_count_x")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT);
+  else if (ID == "enable_sgpr_grid_workgroup_count_y")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT);
+  else if (ID == "enable_sgpr_grid_workgroup_count_z")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT);
+  else if (ID == "enable_ordered_append_gds")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT);
+  else if (ID == "private_element_size")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT);
+  else if (ID == "is_ptr64")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_IS_PTR64_SHIFT);
+  else if (ID == "is_dynamic_callstack")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT);
+  else if (ID == "is_debug_enabled")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT);
+  else if (ID == "is_xnack_enabled")
+    Header.code_properties |=
+        (Value << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT);
+  else if (ID == "workitem_private_segment_byte_size")
+    Header.workitem_private_segment_byte_size = Value;
+  else if (ID == "workgroup_group_segment_byte_size")
+    Header.workgroup_group_segment_byte_size = Value;
+  else if (ID == "gds_segment_byte_size")
+    Header.gds_segment_byte_size = Value;
+  else if (ID == "kernarg_segment_byte_size")
+    Header.kernarg_segment_byte_size = Value;
+  else if (ID == "workgroup_fbarrier_count")
+    Header.workgroup_fbarrier_count = Value;
+  else if (ID == "wavefront_sgpr_count")
+    Header.wavefront_sgpr_count = Value;
+  else if (ID == "workitem_vgpr_count")
+    Header.workitem_vgpr_count = Value;
+  else if (ID == "reserved_vgpr_first")
+    Header.reserved_vgpr_first = Value;
+  else if (ID == "reserved_vgpr_count")
+    Header.reserved_vgpr_count = Value;
+  else if (ID == "reserved_sgpr_first")
+    Header.reserved_sgpr_first = Value;
+  else if (ID == "reserved_sgpr_count")
+    Header.reserved_sgpr_count = Value;
+  else if (ID == "debug_wavefront_private_segment_offset_sgpr")
+    Header.debug_wavefront_private_segment_offset_sgpr = Value;
+  else if (ID == "debug_private_segment_buffer_sgpr")
+    Header.debug_private_segment_buffer_sgpr = Value;
+  else if (ID == "kernarg_segment_alignment")
+    Header.kernarg_segment_alignment = Value;
+  else if (ID == "group_segment_alignment")
+    Header.group_segment_alignment = Value;
+  else if (ID == "private_segment_alignment")
+    Header.private_segment_alignment = Value;
+  else if (ID == "wavefront_size")
+    Header.wavefront_size = Value;
+  else if (ID == "call_convention")
+    Header.call_convention = Value;
+  else if (ID == "runtime_loader_kernel_symbol")
+    Header.runtime_loader_kernel_symbol = Value;
+  else
+    return TokError("amd_kernel_code_t value not recognized.");
+
+  return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
+
+  amd_kernel_code_t Header;
+  AMDGPU::initDefaultAMDKernelCodeT(Header, STI.getFeatureBits());
+
+  while (true) {
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("amd_kernel_code_t values must begin on a new line");
+
+    // Lex EndOfStatement.  This is in a while loop, because lexing a comment
+    // will set the current token to EndOfStatement.
+    while(getLexer().is(AsmToken::EndOfStatement))
+      Lex();
+
+    if (getLexer().isNot(AsmToken::Identifier))
+      return TokError("expected value identifier or .end_amd_kernel_code_t");
+
+    StringRef ID = getLexer().getTok().getIdentifier();
+    Lex();
+
+    if (ID == ".end_amd_kernel_code_t")
+      break;
+
+    if (ParseAMDKernelCodeTValue(ID, Header))
+      return true;
+  }
+
+  getTargetStreamer().EmitAMDKernelCodeT(Header);
+
+  return false;
+}
+
 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getString();
+
+  if (IDVal == ".hsa_code_object_version")
+    return ParseDirectiveHSACodeObjectVersion();
+
+  if (IDVal == ".hsa_code_object_isa")
+    return ParseDirectiveHSACodeObjectISA();
+
+  if (IDVal == ".amd_kernel_code_t")
+    return ParseDirectiveAMDKernelCodeT();
+
   return true;
 }
 
diff --git a/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt b/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt
index 63d44d1..dab0c6f 100644
--- a/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt
+++ b/lib/Target/AMDGPU/AsmParser/LLVMBuild.txt
@@ -19,5 +19,5 @@
 type = Library
 name = AMDGPUAsmParser
 parent = AMDGPU
-required_libraries = MC MCParser AMDGPUDesc AMDGPUInfo Support
+required_libraries = MC MCParser AMDGPUDesc AMDGPUInfo AMDGPUUtils Support
 add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index 3e5ff1f..9460bf6 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -62,3 +62,4 @@ add_subdirectory(AsmParser)
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
+add_subdirectory(Utils)
diff --git a/lib/Target/AMDGPU/LLVMBuild.txt b/lib/Target/AMDGPU/LLVMBuild.txt
index c6861df..38c5489 100644
--- a/lib/Target/AMDGPU/LLVMBuild.txt
+++ b/lib/Target/AMDGPU/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo
+subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo Utils
 
 [component_0]
 type = TargetGroup
@@ -29,5 +29,5 @@ has_asmprinter = 1
 type = Library
 name = AMDGPUCodeGen
 parent = AMDGPU
-required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmParser AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo Scalar SelectionDAG Support Target TransformUtils
+required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmParser AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils
 add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 8bed2de..468563c 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -127,11 +127,14 @@ bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
 namespace {
 
 class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
+  bool Is64Bit;
+
 public:
-  ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
+  ELFAMDGPUAsmBackend(const Target &T, bool Is64Bit) :
+      AMDGPUAsmBackend(T), Is64Bit(Is64Bit) { }
 
   MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
-    return createAMDGPUELFObjectWriter(OS);
+    return createAMDGPUELFObjectWriter(Is64Bit, OS);
   }
 };
 
@@ -140,5 +143,8 @@ public:
 MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
                                            const MCRegisterInfo &MRI,
                                            const Triple &TT, StringRef CPU) {
-  return new ELFAMDGPUAsmBackend(T);
+  Triple TargetTriple(TT);
+
+  // Use 64-bit ELF for amdgcn
+  return new ELFAMDGPUAsmBackend(T, TargetTriple.getArch() == Triple::amdgcn);
 }
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 59f45ff..820f17d 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -18,7 +18,7 @@ namespace {
 
 class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
 public:
-  AMDGPUELFObjectWriter();
+  AMDGPUELFObjectWriter(bool Is64Bit);
 protected:
   unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                         bool IsPCRel) const override {
@@ -30,10 +30,11 @@ protected:
 
 } // End anonymous namespace
 
-AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
-  : MCELFObjectTargetWriter(false, 0, 0, false) { }
+AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit)
+  : MCELFObjectTargetWriter(Is64Bit, ELF::ELFOSABI_AMDGPU_HSA,
+                            ELF::EM_AMDGPU, false) { }
 
-MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_pwrite_stream &OS) {
-  MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit, raw_pwrite_stream &OS) {
+  MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter(Is64Bit);
   return createELFObjectWriter(MOTW, OS, true);
 }
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
index fa3b3c3..01021d6 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
@@ -28,7 +28,7 @@ enum Fixups {
   LastTargetFixupKind,
   NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
 };
-} // namespace AMDGPU
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index a7d3dd1..7172e4b 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -14,6 +14,7 @@
 
 #include "AMDGPUMCTargetDesc.h"
 #include "AMDGPUMCAsmInfo.h"
+#include "AMDGPUTargetStreamer.h"
 #include "InstPrinter/AMDGPUInstPrinter.h"
 #include "SIDefines.h"
 #include "llvm/MC/MCCodeGenInfo.h"
@@ -72,6 +73,19 @@ static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
   return new AMDGPUInstPrinter(MAI, MII, MRI);
 }
 
+static MCTargetStreamer *createAMDGPUAsmTargetStreamer(MCStreamer &S,
+                                                      formatted_raw_ostream &OS,
+                                                      MCInstPrinter *InstPrint,
+                                                      bool isVerboseAsm) {
+  return new AMDGPUTargetAsmStreamer(S, OS);
+}
+
+static MCTargetStreamer * createAMDGPUObjectTargetStreamer(
+                                                   MCStreamer &S,
+                                                   const MCSubtargetInfo &STI) {
+  return new AMDGPUTargetELFStreamer(S);
+}
+
 extern "C" void LLVMInitializeAMDGPUTargetMC() {
   for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
     RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
@@ -84,7 +98,15 @@ extern "C" void LLVMInitializeAMDGPUTargetMC() {
     TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
   }
 
+  // R600 specific registration
   TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget,
                                         createR600MCCodeEmitter);
+
+  // GCN specific registration
   TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createSIMCCodeEmitter);
+
+  TargetRegistry::RegisterAsmTargetStreamer(TheGCNTarget,
+                                            createAMDGPUAsmTargetStreamer);
+  TargetRegistry::RegisterObjectTargetStreamer(TheGCNTarget,
+                                              createAMDGPUObjectTargetStreamer);
 }
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index ac611b8..5d1b86b 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -46,8 +46,9 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
 MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
                                      const Triple &TT, StringRef CPU);
 
-MCObjectWriter *createAMDGPUELFObjectWriter(raw_pwrite_stream &OS);
-} // namespace llvm
+MCObjectWriter *createAMDGPUELFObjectWriter(bool Is64Bit,
+                                            raw_pwrite_stream &OS);
+} // End llvm namespace
 
 #define GET_REGINFO_ENUM
 #include "AMDGPUGenRegisterInfo.inc"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
new file mode 100644
index 0000000..09e6cb1
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -0,0 +1,297 @@
+//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AMDGPU specific target streamer methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetStreamer.h"
+#include "SIDefines.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+
+AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
+    : MCTargetStreamer(S) { }
+
+//===----------------------------------------------------------------------===//
+// AMDGPUTargetAsmStreamer
+//===----------------------------------------------------------------------===//
+
+AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
+                                                 formatted_raw_ostream &OS)
+    : AMDGPUTargetStreamer(S), OS(OS) { }
+
+void
+AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+                                                           uint32_t Minor) {
+  OS << "\t.hsa_code_object_version " <<
+        Twine(Major) << "," << Twine(Minor) << '\n';
+}
+
+void
+AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
+                                                       uint32_t Minor,
+                                                       uint32_t Stepping,
+                                                       StringRef VendorName,
+                                                       StringRef ArchName) {
+  OS << "\t.hsa_code_object_isa " <<
+        Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
+        ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
+
+}
+
+void
+AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
+  uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32);
+  bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties &
+      AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
+  bool EnableSGPRDispatchPtr = (Header.code_properties &
+      AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
+  bool EnableSGPRQueuePtr = (Header.code_properties &
+      AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
+  bool EnableSGPRKernargSegmentPtr = (Header.code_properties &
+      AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
+  bool EnableSGPRDispatchID = (Header.code_properties &
+      AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
+  bool EnableSGPRFlatScratchInit = (Header.code_properties &
+      AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
+  bool EnableSGPRPrivateSegmentSize = (Header.code_properties &
+      AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+  bool EnableSGPRGridWorkgroupCountX = (Header.code_properties &
+      AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X);
+  bool EnableSGPRGridWorkgroupCountY = (Header.code_properties &
+      AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y);
+  bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties &
+      AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z);
+  bool EnableOrderedAppendGDS = (Header.code_properties &
+      AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS);
+  uint32_t PrivateElementSize = (Header.code_properties &
+      AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >>
+          AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT;
+  bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64);
+  bool IsDynamicCallstack = (Header.code_properties &
+      AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK);
+  bool IsDebugEnabled = (Header.code_properties &
+      AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED);
+  bool IsXNackEnabled = (Header.code_properties &
+      AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED);
+
+  OS << "\t.amd_kernel_code_t\n" <<
+    "\t\tkernel_code_version_major = " <<
+        Header.amd_kernel_code_version_major << '\n' <<
+    "\t\tkernel_code_version_minor = " <<
+        Header.amd_kernel_code_version_minor << '\n' <<
+    "\t\tmachine_kind = " <<
+        Header.amd_machine_kind << '\n' <<
+    "\t\tmachine_version_major = " <<
+        Header.amd_machine_version_major << '\n' <<
+    "\t\tmachine_version_minor = " <<
+        Header.amd_machine_version_minor << '\n' <<
+    "\t\tmachine_version_stepping = " <<
+        Header.amd_machine_version_stepping << '\n' <<
+    "\t\tkernel_code_entry_byte_offset = " <<
+        Header.kernel_code_entry_byte_offset << '\n' <<
+    "\t\tkernel_code_prefetch_byte_size = " <<
+        Header.kernel_code_prefetch_byte_size << '\n' <<
+    "\t\tmax_scratch_backing_memory_byte_size = " <<
+        Header.max_scratch_backing_memory_byte_size << '\n' <<
+    "\t\tcompute_pgm_rsrc1_vgprs = " <<
+        G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' <<
+    "\t\tcompute_pgm_rsrc1_sgprs = " <<
+        G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' <<
+    "\t\tcompute_pgm_rsrc1_priority = " <<
+        G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' <<
+    "\t\tcompute_pgm_rsrc1_float_mode = " <<
+        G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' <<
+    "\t\tcompute_pgm_rsrc1_priv = " <<
+        G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' <<
+    "\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
+        G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' <<
+    "\t\tcompute_pgm_rsrc1_debug_mode = " <<
+        G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' <<
+    "\t\tcompute_pgm_rsrc1_ieee_mode = " <<
+        G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' <<
+    "\t\tcompute_pgm_rsrc2_scratch_en = " <<
+        G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' <<
+    "\t\tcompute_pgm_rsrc2_user_sgpr = " <<
+        G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' <<
+    "\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
+        G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' <<
+    "\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
+        G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' <<
+    "\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
+        G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' <<
+    "\t\tcompute_pgm_rsrc2_tg_size_en = " <<
+        G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' <<
+    "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
+        G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' <<
+    "\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
+        G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' <<
+    "\t\tcompute_pgm_rsrc2_lds_size = " <<
+        G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' <<
+    "\t\tcompute_pgm_rsrc2_excp_en = " <<
+        G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' <<
+
+    "\t\tenable_sgpr_private_segment_buffer = " <<
+        EnableSGPRPrivateSegmentBuffer << '\n' <<
+    "\t\tenable_sgpr_dispatch_ptr = " <<
+        EnableSGPRDispatchPtr << '\n' <<
+    "\t\tenable_sgpr_queue_ptr = " <<
+        EnableSGPRQueuePtr << '\n' <<
+    "\t\tenable_sgpr_kernarg_segment_ptr = " <<
+        EnableSGPRKernargSegmentPtr << '\n' <<
+    "\t\tenable_sgpr_dispatch_id = " <<
+        EnableSGPRDispatchID << '\n' <<
+    "\t\tenable_sgpr_flat_scratch_init = " <<
+        EnableSGPRFlatScratchInit << '\n' <<
+    "\t\tenable_sgpr_private_segment_size = " <<
+        EnableSGPRPrivateSegmentSize << '\n' <<
+    "\t\tenable_sgpr_grid_workgroup_count_x = " <<
+        EnableSGPRGridWorkgroupCountX << '\n' <<
+    "\t\tenable_sgpr_grid_workgroup_count_y = " <<
+        EnableSGPRGridWorkgroupCountY << '\n' <<
+    "\t\tenable_sgpr_grid_workgroup_count_z = " <<
+        EnableSGPRGridWorkgroupCountZ << '\n' <<
+    "\t\tenable_ordered_append_gds = " <<
+        EnableOrderedAppendGDS << '\n' <<
+    "\t\tprivate_element_size = " <<
+        PrivateElementSize << '\n' <<
+    "\t\tis_ptr64 = " <<
+        IsPtr64 << '\n' <<
+    "\t\tis_dynamic_callstack = " <<
+        IsDynamicCallstack << '\n' <<
+    "\t\tis_debug_enabled = " <<
+        IsDebugEnabled << '\n' <<
+    "\t\tis_xnack_enabled = " <<
+        IsXNackEnabled << '\n' <<
+    "\t\tworkitem_private_segment_byte_size = " <<
+        Header.workitem_private_segment_byte_size << '\n' <<
+    "\t\tworkgroup_group_segment_byte_size = " <<
+        Header.workgroup_group_segment_byte_size << '\n' <<
+    "\t\tgds_segment_byte_size = " <<
+        Header.gds_segment_byte_size << '\n' <<
+    "\t\tkernarg_segment_byte_size = " <<
+        Header.kernarg_segment_byte_size << '\n' <<
+    "\t\tworkgroup_fbarrier_count = " <<
+        Header.workgroup_fbarrier_count << '\n' <<
+    "\t\twavefront_sgpr_count = " <<
+        Header.wavefront_sgpr_count << '\n' <<
+    "\t\tworkitem_vgpr_count = " <<
+        Header.workitem_vgpr_count << '\n' <<
+    "\t\treserved_vgpr_first = " <<
+        Header.reserved_vgpr_first << '\n' <<
+    "\t\treserved_vgpr_count = " <<
+        Header.reserved_vgpr_count << '\n' <<
+    "\t\treserved_sgpr_first = " <<
+        Header.reserved_sgpr_first << '\n' <<
+    "\t\treserved_sgpr_count = " <<
+        Header.reserved_sgpr_count << '\n' <<
+    "\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
+        Header.debug_wavefront_private_segment_offset_sgpr << '\n' <<
+    "\t\tdebug_private_segment_buffer_sgpr = " <<
+        Header.debug_private_segment_buffer_sgpr << '\n' <<
+    "\t\tkernarg_segment_alignment = " <<
+        (uint32_t)Header.kernarg_segment_alignment << '\n' <<
+    "\t\tgroup_segment_alignment = " <<
+        (uint32_t)Header.group_segment_alignment << '\n' <<
+    "\t\tprivate_segment_alignment = " <<
+        (uint32_t)Header.private_segment_alignment << '\n' <<
+    "\t\twavefront_size = " <<
+        (uint32_t)Header.wavefront_size << '\n' <<
+    "\t\tcall_convention = " <<
+        Header.call_convention << '\n' <<
+    "\t\truntime_loader_kernel_symbol = " <<
+        Header.runtime_loader_kernel_symbol << '\n' <<
+    // TODO: control_directives
+    "\t.end_amd_kernel_code_t\n";
+
+}
+
+//===----------------------------------------------------------------------===//
+// AMDGPUTargetELFStreamer
+//===----------------------------------------------------------------------===//
+
+AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S)
+    : AMDGPUTargetStreamer(S), Streamer(S) { }
+
+MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
+  return static_cast<MCELFStreamer &>(Streamer);
+}
+
+void
+AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+                                                           uint32_t Minor) {
+  MCStreamer &OS = getStreamer();
+  MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
+
+  unsigned NameSZ = 4;
+
+  OS.PushSection();
+  OS.SwitchSection(Note);
+  OS.EmitIntValue(NameSZ, 4);                            // namesz
+  OS.EmitIntValue(8, 4);                                 // descz
+  OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type
+  OS.EmitBytes(StringRef("AMD", NameSZ));                // name
+  OS.EmitIntValue(Major, 4);                             // desc
+  OS.EmitIntValue(Minor, 4);
+  OS.EmitValueToAlignment(4);
+  OS.PopSection();
+}
+
+void
+AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
+                                                       uint32_t Minor,
+                                                       uint32_t Stepping,
+                                                       StringRef VendorName,
+                                                       StringRef ArchName) {
+  MCStreamer &OS = getStreamer();
+  MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
+
+  unsigned NameSZ = 4;
+  uint16_t VendorNameSize = VendorName.size() + 1;
+  uint16_t ArchNameSize = ArchName.size() + 1;
+  unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
+                    sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
+                    VendorNameSize + ArchNameSize;
+
+  OS.PushSection();
+  OS.SwitchSection(Note);
+  OS.EmitIntValue(NameSZ, 4);                            // namesz
+  OS.EmitIntValue(DescSZ, 4);                            // descsz
+  OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4);                 // type
+  OS.EmitBytes(StringRef("AMD", 4));                     // name
+  OS.EmitIntValue(VendorNameSize, 2);                    // desc
+  OS.EmitIntValue(ArchNameSize, 2);
+  OS.EmitIntValue(Major, 4);
+  OS.EmitIntValue(Minor, 4);
+  OS.EmitIntValue(Stepping, 4);
+  OS.EmitBytes(VendorName);
+  OS.EmitIntValue(0, 1); // NULL terminate VendorName
+  OS.EmitBytes(ArchName);
+  OS.EmitIntValue(0, 1); // NULL terminte ArchName
+  OS.EmitValueToAlignment(4);
+  OS.PopSection();
+}
+
+void
+AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
+
+  MCStreamer &OS = getStreamer();
+  OS.PushSection();
+  OS.SwitchSection(OS.getContext().getObjectFileInfo()->getTextSection());
+  OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
+  OS.PopSection();
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
new file mode 100644
index 0000000..d37677c
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -0,0 +1,77 @@
+//===-- AMDGPUTargetStreamer.h - AMDGPU Target Streamer --------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDKernelCodeT.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+namespace llvm {
+
+class MCELFStreamer;
+
+class AMDGPUTargetStreamer : public MCTargetStreamer {
+public:
+  AMDGPUTargetStreamer(MCStreamer &S);
+  virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+                                                 uint32_t Minor) = 0;
+
+  virtual void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
+                                             uint32_t Stepping,
+                                             StringRef VendorName,
+                                             StringRef ArchName) = 0;
+
+  virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0;
+};
+
+class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
+  formatted_raw_ostream &OS;
+public:
+  AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+  void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+                                         uint32_t Minor) override;
+
+  void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
+                                     uint32_t Stepping, StringRef VendorName,
+                                     StringRef ArchName) override;
+
+  void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+};
+
+class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
+
+  enum NoteType {
+    NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1,
+    NT_AMDGPU_HSA_HSAIL = 2,
+    NT_AMDGPU_HSA_ISA = 3,
+    NT_AMDGPU_HSA_PRODUCER = 4,
+    NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
+    NT_AMDGPU_HSA_EXTENSION = 6,
+    NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
+    NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
+  };
+
+  MCStreamer &Streamer;
+
+public:
+  AMDGPUTargetELFStreamer(MCStreamer &S);
+
+  MCELFStreamer &getStreamer();
+
+  void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
+                                         uint32_t Minor) override;
+
+  void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
+                                     uint32_t Stepping, StringRef VendorName,
+                                     StringRef ArchName) override;
+
+  void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+
+};
+
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 151d0d5..8306a05 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMAMDGPUDesc
   AMDGPUMCCodeEmitter.cpp
   AMDGPUMCTargetDesc.cpp
   AMDGPUMCAsmInfo.cpp
+  AMDGPUTargetStreamer.cpp
   R600MCCodeEmitter.cpp
   SIMCCodeEmitter.cpp
   )
diff --git a/lib/Target/AMDGPU/Makefile b/lib/Target/AMDGPU/Makefile
index 2e2de50..219f34d 100644
--- a/lib/Target/AMDGPU/Makefile
+++ b/lib/Target/AMDGPU/Makefile
@@ -18,6 +18,6 @@ BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \
 		AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \
 		AMDGPUGenAsmWriter.inc AMDGPUGenAsmMatcher.inc
 
-DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc
+DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc Utils
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td
index c0ffede..69efb8b 100644
--- a/lib/Target/AMDGPU/Processors.td
+++ b/lib/Target/AMDGPU/Processors.td
@@ -104,7 +104,7 @@ def : ProcessorModel<"hainan",   SIQuarterSpeedModel, [FeatureSouthernIslands]>;
 //===----------------------------------------------------------------------===//
 
 def : ProcessorModel<"bonaire",    SIQuarterSpeedModel,
-  [FeatureSeaIslands, FeatureLDSBankCount32]
+  [FeatureSeaIslands, FeatureLDSBankCount32, FeatureISAVersion7_0_0]
 >;
 
 def : ProcessorModel<"kabini",     SIQuarterSpeedModel,
@@ -112,11 +112,12 @@ def : ProcessorModel<"kabini",     SIQuarterSpeedModel,
 >;
 
 def : ProcessorModel<"kaveri",     SIQuarterSpeedModel,
-  [FeatureSeaIslands, FeatureLDSBankCount32]
+  [FeatureSeaIslands, FeatureLDSBankCount32, FeatureISAVersion7_0_0]
 >;
 
 def : ProcessorModel<"hawaii", SIFullSpeedModel,
-  [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32]
+  [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32,
+   FeatureISAVersion7_0_1]
 >;
 
 def : ProcessorModel<"mullins",    SIQuarterSpeedModel,
@@ -127,11 +128,13 @@ def : ProcessorModel<"mullins",    SIQuarterSpeedModel,
 //===----------------------------------------------------------------------===//
 
 def : ProcessorModel<"tonga",   SIQuarterSpeedModel,
-  [FeatureVolcanicIslands, FeatureSGPRInitBug]
+  [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0]
 >;
 
 def : ProcessorModel<"iceland", SIQuarterSpeedModel,
-  [FeatureVolcanicIslands, FeatureSGPRInitBug]
+  [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0]
 >;
 
-def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"carrizo", SIQuarterSpeedModel,
+  [FeatureVolcanicIslands, FeatureISAVersion8_0_1]
+>;
diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h
index 6ff0a22..51d87eda 100644
--- a/lib/Target/AMDGPU/R600Defines.h
+++ b/lib/Target/AMDGPU/R600Defines.h
@@ -48,7 +48,7 @@ namespace R600_InstFlag {
     IS_EXPORT = (1 << 17),
     LDS_1A2D = (1 << 18)
   };
-} // namespace R600_InstFlag
+}
 
 #define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS)
 
@@ -138,7 +138,7 @@ namespace OpName {
     VEC_COUNT
  };
 
-} // namespace OpName
+}
 
 //===----------------------------------------------------------------------===//
 // Config register definitions
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
index c252878..c06d3c4 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -75,6 +75,6 @@ private:
   SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
 };
 
-} // namespace llvm
+} // End namespace llvm;
 
 #endif
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index 5ef883c..855fa9f 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -697,15 +697,10 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   // Most of the following comes from the ARM implementation of AnalyzeBranch
 
   // If the block has no terminators, it just falls into the block after it.
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
     return false;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return false;
-    --I;
-  }
+
   // AMDGPU::BRANCH* instructions are only available after isel and are not
   // handled
   if (isBranch(I->getOpcode()))
diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h
index 9c5f76c..dee4c2b 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/lib/Target/AMDGPU/R600InstrInfo.h
@@ -298,6 +298,6 @@ int getLDSNoRetOp(uint16_t Opcode);
 
 } //End namespace AMDGPU
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
index f5556c1..263561e 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
@@ -29,6 +29,6 @@ public:
   unsigned StackSize;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index a1a1b40..0c06ccc 100644
--- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -375,7 +375,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
   return false;
 }
 
-} // namespace
+}
 
 llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) {
   return new R600VectorRegMerger(tm);
diff --git a/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp b/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp
index 93bcf68..2fc7b02 100644
--- a/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp
+++ b/lib/Target/AMDGPU/R600TextureIntrinsicsReplacer.cpp
@@ -296,7 +296,7 @@ public:
 
 char R600TextureIntrinsicsReplacer::ID = 0;
 
-} // namespace
+}
 
 FunctionPass *llvm::createR600TextureIntrinsicsReplacer() {
   return new R600TextureIntrinsicsReplacer();
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index f1b4ba1..4c32639 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -39,7 +39,7 @@ enum {
   WQM = 1 << 20,
   VGPRSpill = 1 << 21
 };
-} // namespace SIInstrFlags
+}
 
 namespace llvm {
 namespace AMDGPU {
@@ -74,7 +74,7 @@ namespace SIInstrFlags {
     P_NORMAL = 1 << 8,     // Positive normal
     P_INFINITY = 1 << 9    // Positive infinity
   };
-} // namespace SIInstrFlags
+}
 
 namespace SISrcMods {
   enum {
@@ -100,16 +100,41 @@ namespace SIOutMods {
 #define R_00B848_COMPUTE_PGM_RSRC1                                      0x00B848
 #define   S_00B028_VGPRS(x)                                           (((x) & 0x3F) << 0)
 #define   S_00B028_SGPRS(x)                                           (((x) & 0x0F) << 6)
+
 #define R_00B84C_COMPUTE_PGM_RSRC2                                      0x00B84C
 #define   S_00B84C_SCRATCH_EN(x)                                      (((x) & 0x1) << 0)
+#define   G_00B84C_SCRATCH_EN(x)                                      (((x) >> 0) & 0x1)
+#define   C_00B84C_SCRATCH_EN                                         0xFFFFFFFE
 #define   S_00B84C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
+#define   G_00B84C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
+#define   C_00B84C_USER_SGPR                                          0xFFFFFFC1
 #define   S_00B84C_TGID_X_EN(x)                                       (((x) & 0x1) << 7)
+#define   G_00B84C_TGID_X_EN(x)                                       (((x) >> 7) & 0x1)
+#define   C_00B84C_TGID_X_EN                                          0xFFFFFF7F
 #define   S_00B84C_TGID_Y_EN(x)                                       (((x) & 0x1) << 8)
+#define   G_00B84C_TGID_Y_EN(x)                                       (((x) >> 8) & 0x1)
+#define   C_00B84C_TGID_Y_EN                                          0xFFFFFEFF
 #define   S_00B84C_TGID_Z_EN(x)                                       (((x) & 0x1) << 9)
+#define   G_00B84C_TGID_Z_EN(x)                                       (((x) >> 9) & 0x1)
+#define   C_00B84C_TGID_Z_EN                                          0xFFFFFDFF
 #define   S_00B84C_TG_SIZE_EN(x)                                      (((x) & 0x1) << 10)
+#define   G_00B84C_TG_SIZE_EN(x)                                      (((x) >> 10) & 0x1)
+#define   C_00B84C_TG_SIZE_EN                                         0xFFFFFBFF
 #define   S_00B84C_TIDIG_COMP_CNT(x)                                  (((x) & 0x03) << 11)
-
+#define   G_00B84C_TIDIG_COMP_CNT(x)                                  (((x) >> 11) & 0x03)
+#define   C_00B84C_TIDIG_COMP_CNT                                     0xFFFFE7FF
+/* CIK */
+#define   S_00B84C_EXCP_EN_MSB(x)                                     (((x) & 0x03) << 13)
+#define   G_00B84C_EXCP_EN_MSB(x)                                     (((x) >> 13) & 0x03)
+#define   C_00B84C_EXCP_EN_MSB                                        0xFFFF9FFF
+/*     */
 #define   S_00B84C_LDS_SIZE(x)                                        (((x) & 0x1FF) << 15)
+#define   G_00B84C_LDS_SIZE(x)                                        (((x) >> 15) & 0x1FF)
+#define   C_00B84C_LDS_SIZE                                           0xFF007FFF
+#define   S_00B84C_EXCP_EN(x)                                         (((x) & 0x7F) << 24)
+#define   G_00B84C_EXCP_EN(x)                                         (((x) >> 24) & 0x7F)
+#define   C_00B84C_EXCP_EN 
+
 #define R_0286CC_SPI_PS_INPUT_ENA                                       0x0286CC
 
 
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 12d08cf..ead1a37 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -583,7 +583,8 @@ SDValue SITargetLowering::LowerFormalArguments(
     if (VA.isMemLoc()) {
       VT = Ins[i].VT;
       EVT MemVT = Splits[i].VT;
-      const unsigned Offset = 36 + VA.getLocMemOffset();
+      const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
+                              VA.getLocMemOffset();
       // The first 36 bytes of the input buffer contains information about
       // thread group and global sizes.
       SDValue Arg = LowerParameter(DAG, VT, MemVT,  DL, DAG.getRoot(),
@@ -2211,8 +2212,9 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
 
 std::pair<unsigned, const TargetRegisterClass *>
 SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                               const std::string &Constraint,
+                                               const std::string &Constraint_,
                                                MVT VT) const {
+  StringRef Constraint(Constraint_);
   if (Constraint == "r") {
     switch(VT.SimpleTy) {
       default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
@@ -2232,8 +2234,9 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     }
 
     if (RC) {
-      unsigned Idx = std::atoi(Constraint.substr(2).c_str());
-      if (Idx < RC->getNumRegs())
+      uint32_t Idx;
+      bool Failed = Constraint.substr(2).getAsInteger(10, Idx);
+      if (!Failed && Idx < RC->getNumRegs())
         return std::make_pair(RC->getRegister(Idx), RC);
     }
   }
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 47bc178..eb96bd0 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -440,22 +440,22 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   }
 }
 
-unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
+int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
   const unsigned Opcode = MI.getOpcode();
 
   int NewOpc;
 
   // Try to map original to commuted opcode
   NewOpc = AMDGPU::getCommuteRev(Opcode);
-  // Check if the commuted (REV) opcode exists on the target.
-  if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
-    return NewOpc;
+  if (NewOpc != -1)
+    // Check if the commuted (REV) opcode exists on the target.
+    return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
 
   // Try to map commuted to original opcode
   NewOpc = AMDGPU::getCommuteOrig(Opcode);
-  // Check if the original (non-REV) opcode exists on the target.
-  if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
-    return NewOpc;
+  if (NewOpc != -1)
+    // Check if the original (non-REV) opcode exists on the target.
+    return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
 
   return Opcode;
 }
@@ -771,6 +771,10 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
   if (MI->getNumOperands() < 3)
     return nullptr;
 
+  int CommutedOpcode = commuteOpcode(*MI);
+  if (CommutedOpcode == -1)
+    return nullptr;
+
   int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
                                            AMDGPU::OpName::src0);
   assert(Src0Idx != -1 && "Should always have src0 operand");
@@ -833,7 +837,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
   }
 
   if (MI)
-    MI->setDesc(get(commuteOpcode(*MI)));
+    MI->setDesc(get(CommutedOpcode));
 
   return MI;
 }
@@ -2716,8 +2720,13 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
 
 uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
   uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
-  if (ST.isAmdHsaOS())
+  if (ST.isAmdHsaOS()) {
     RsrcDataFormat |= (1ULL << 56);
 
+  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    // Set MTYPE = 2
+    RsrcDataFormat |= (2ULL << 59);
+  }
+
   return RsrcDataFormat;
 }
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 6fafb94..0382272 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -117,7 +117,7 @@ public:
   // register.  If there is no hardware instruction that can store to \p
   // DstRC, then AMDGPU::COPY is returned.
   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
-  unsigned commuteOpcode(const MachineInstr &MI) const;
+  int commuteOpcode(const MachineInstr &MI) const;
 
   MachineInstr *commuteInstruction(MachineInstr *MI,
                                    bool NewMI = false) const override;
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 93e4ca7..fcb58d5 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1740,7 +1740,7 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName,
        InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2,
        ClampMod:$clamp,
        omod:$omod),
-  " $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod",
+  "$dst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod",
   [(set P.DstVT:$dst,
             (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
                                        i1:$clamp, i32:$omod)),
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
new file mode 100644
index 0000000..b76b400
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -0,0 +1,60 @@
+//===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUBaseInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AMDGPUGenSubtargetInfo.inc"
+#undef GET_SUBTARGETINFO_ENUM
+
+namespace llvm {
+namespace AMDGPU {
+
+IsaVersion getIsaVersion(const FeatureBitset &Features) {
+
+  if (Features.test(FeatureISAVersion7_0_0))
+    return {7, 0, 0};
+
+  if (Features.test(FeatureISAVersion7_0_1))
+    return {7, 0, 1};
+
+  if (Features.test(FeatureISAVersion8_0_0))
+    return {8, 0, 0};
+
+  if (Features.test(FeatureISAVersion8_0_1))
+    return {8, 0, 1};
+
+  return {0, 0, 0};
+}
+
+void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+                               const FeatureBitset &Features) {
+
+  IsaVersion ISA = getIsaVersion(Features);
+
+  memset(&Header, 0, sizeof(Header));
+
+  Header.amd_kernel_code_version_major = 1;
+  Header.amd_kernel_code_version_minor = 0;
+  Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
+  Header.amd_machine_version_major = ISA.Major;
+  Header.amd_machine_version_minor = ISA.Minor;
+  Header.amd_machine_version_stepping = ISA.Stepping;
+  Header.kernel_code_entry_byte_offset = sizeof(Header);
+  // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
+  Header.wavefront_size = 6;
+  // These alignment values are specified in powers of two, so alignment =
+  // 2^n.  The minimum alignment is 2^4 = 16.
+  Header.kernarg_segment_alignment = 4;
+  Header.group_segment_alignment = 4;
+  Header.private_segment_alignment = 4;
+}
+
+} // End namespace AMDGPU
+} // End namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
new file mode 100644
index 0000000..f57028c
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -0,0 +1,34 @@
+//===-- AMDGPUBaseInfo.h - Top level definitions for AMDGPU -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
+#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
+
+#include "AMDKernelCodeT.h"
+
+namespace llvm {
+
+class FeatureBitset;
+
+namespace AMDGPU {
+
+struct IsaVersion {
+  unsigned Major;
+  unsigned Minor;
+  unsigned Stepping;
+};
+
+IsaVersion getIsaVersion(const FeatureBitset &Features);
+void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+                               const FeatureBitset &Features);
+
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AMDGPU/Utils/CMakeLists.txt b/lib/Target/AMDGPU/Utils/CMakeLists.txt
new file mode 100644
index 0000000..2c07aea
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMAMDGPUUtils
+  AMDGPUBaseInfo.cpp
+  )
diff --git a/lib/Target/AMDGPU/Utils/LLVMBuild.txt b/lib/Target/AMDGPU/Utils/LLVMBuild.txt
new file mode 100644
index 0000000..dec5360
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/AMDGPU/Utils/LLVMBuild.txt ------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AMDGPUUtils
+parent = AMDGPU
+required_libraries = Support
+add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/Utils/Makefile b/lib/Target/AMDGPU/Utils/Makefile
new file mode 100644
index 0000000..1019e72
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AMDGPU/Utils/Makefile --------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAMDGPUUtils
+
+# Hack: we need to include 'main' AMDGPU target directory to grab private
+# headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index d554fe5..9550a3a 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -46,6 +46,6 @@ FunctionPass *createThumb2SizeReductionPass(
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
 
-} // namespace llvm
+} // end namespace llvm;
 
 #endif
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index c7ea18a..96b4742 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -410,13 +410,13 @@ def : ProcessorModel<"cortex-r4",   CortexA8Model,
 def : ProcessorModel<"cortex-r4f",  CortexA8Model,
                                     [ProcR4,
                                      FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
-                                     FeatureVFP3, FeatureVFPOnlySP, FeatureD16]>;
+                                     FeatureVFP3, FeatureD16]>;
 
 // FIXME: R5 has currently the same ProcessorModel as A8.
 def : ProcessorModel<"cortex-r5",   CortexA8Model,
                                     [ProcR5, HasV7Ops, FeatureDB,
                                      FeatureVFP3, FeatureDSPThumb2,
-                                     FeatureHasRAS, FeatureVFPOnlySP,
+                                     FeatureHasRAS,
                                      FeatureD16, FeatureRClass]>;
 
 // FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 4530e41..738dded 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -630,7 +630,7 @@ void ARMAsmPrinter::emitAttributes() {
     } else if (STI.hasVFP4())
       ATS.emitFPU(ARM::FK_NEON_VFPV4);
     else
-      ATS.emitFPU(ARM::FK_NEON);
+      ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON);
     // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
     if (STI.hasV8Ops())
       ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
@@ -648,7 +648,13 @@ void ARMAsmPrinter::emitAttributes() {
                   ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16)
                   : ARM::FK_VFPV4);
     else if (STI.hasVFP3())
-      ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV3_D16 : ARM::FK_VFPV3);
+      ATS.emitFPU(STI.hasD16()
+                  // +d16
+                  ? (STI.isFPOnlySP()
+                     ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD)
+                     : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16))
+                  // -d16
+                  : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3));
     else if (STI.hasVFP2())
       ATS.emitFPU(ARM::FK_VFPV2);
   }
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index f2b7a64..b1a11d6 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -367,14 +367,10 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
 
 
 unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin()) return 0;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return 0;
-    --I;
-  }
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
+    return 0;
+
   if (!isUncondBranchOpcode(I->getOpcode()) &&
       !isCondBranchOpcode(I->getOpcode()))
     return 0;
@@ -594,7 +590,7 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
   // all definitions of CPSR are dead
   return true;
 }
-} // namespace llvm
+}
 
 /// GetInstSize - Return the size of the specified MachineInstr.
 ///
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 6fc0edd..b4706e3 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -493,6 +493,6 @@ bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
                          unsigned FrameReg, int &Offset,
                          const ARMBaseInstrInfo &TII);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 2edb96a..d687568 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -281,6 +281,6 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
   return true;
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index cb4eeb5..f4ec8c6 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -335,7 +335,7 @@ namespace {
     }
   };
   char ARMConstantIslands::ID = 0;
-} // namespace
+}
 
 /// verify - check BBOffsets, BBSizes, alignment of islands
 void ARMConstantIslands::verify() {
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index b429bed..36f63e2 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -44,7 +44,7 @@ namespace ARMCP {
     GOTTPOFF,
     TPOFF
   };
-} // namespace ARMCP
+}
 
 /// ARMConstantPoolValue - ARM specific constantpool value. This is used to
 /// represent PC-relative displacement between the address of the load
@@ -254,6 +254,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 963b46c..4438f50 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -69,7 +69,7 @@ namespace {
                            MachineBasicBlock::iterator &MBBI);
   };
   char ARMExpandPseudo::ID = 0;
-} // namespace
+}
 
 /// TransferImpOps - Transfer implicit operands on the pseudo instruction to
 /// the instructions created from the expansion.
@@ -129,7 +129,7 @@ namespace {
       return PseudoOpc < TE.PseudoOpc;
     }
   };
-} // namespace
+}
 
 static const NEONLdStTableEntry NEONLdStTable[] = {
 { ARM::VLD1LNq16Pseudo,     ARM::VLD1LNd16,     true, false, false, EvenDblSpc, 1, 4 ,true},
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index cead18f..4175b4a 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -2898,7 +2898,7 @@ const struct FoldableLoadExtendsStruct {
   { { ARM::SXTB,  ARM::t2SXTB  },   0, 0, MVT::i8  },
   { { ARM::UXTB,  ARM::t2UXTB  },   0, 1, MVT::i8  }
 };
-} // namespace
+}
 
 /// \brief The specified machine instr operand is a vreg, and that
 /// vreg is being provided by the specified load instruction.  If possible,
diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h
index 5b4a44c..0c910ab 100644
--- a/lib/Target/ARM/ARMFeatures.h
+++ b/lib/Target/ARM/ARMFeatures.h
@@ -92,6 +92,6 @@ inline bool isV8EligibleForIT(InstrType *Instr) {
   }
 }
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 091086d..a52e497 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -221,7 +221,7 @@ struct StackAdjustingInsts {
     }
   }
 };
-} // namespace
+}
 
 /// Emit an instruction sequence that will align the address in
 /// register Reg by zero-ing out the lower bits.  For versions of the
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 98313e6..d763d17 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -78,6 +78,6 @@ public:
                                 MachineBasicBlock::iterator MI) const override;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 575a9d9..50afb19 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -279,7 +279,7 @@ private:
   SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
                         bool is64BitVector);
 };
-} // namespace
+}
 
 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 /// operand. If so Imm will receive the 32-bit value.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 94a026b..4b2105b 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -83,7 +83,7 @@ namespace {
       CallOrPrologue = PC;
     }
   };
-} // namespace
+}
 
 // The APCS parameter registers.
 static const MCPhysReg GPRArgRegs[] = {
@@ -11404,6 +11404,167 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
               Addr});
 }
 
+/// \brief Lower an interleaved load into a vldN intrinsic.
+///
+/// E.g. Lower an interleaved load (Factor = 2):
+///        %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
+///        %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6>  ; Extract even elements
+///        %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7>  ; Extract odd elements
+///
+///      Into:
+///        %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
+///        %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
+///        %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
+bool ARMTargetLowering::lowerInterleavedLoad(
+    LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
+    ArrayRef<unsigned> Indices, unsigned Factor) const {
+  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+         "Invalid interleave factor");
+  assert(!Shuffles.empty() && "Empty shufflevector input");
+  assert(Shuffles.size() == Indices.size() &&
+         "Unmatched number of shufflevectors and indices");
+
+  VectorType *VecTy = Shuffles[0]->getType();
+  Type *EltTy = VecTy->getVectorElementType();
+
+  const DataLayout *DL = getDataLayout();
+  unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy);
+  bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;
+
+  // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't
+  // support i64/f64 element).
+  if ((VecSize != 64 && VecSize != 128) || EltIs64Bits)
+    return false;
+
+  // A pointer vector can not be the return type of the ldN intrinsics. Need to
+  // load integer vectors first and then convert to pointer vectors.
+  if (EltTy->isPointerTy())
+    VecTy = VectorType::get(DL->getIntPtrType(EltTy),
+                            VecTy->getVectorNumElements());
+
+  static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
+                                            Intrinsic::arm_neon_vld3,
+                                            Intrinsic::arm_neon_vld4};
+
+  Function *VldnFunc =
+      Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], VecTy);
+
+  IRBuilder<> Builder(LI);
+  SmallVector<Value *, 2> Ops;
+
+  Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
+  Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
+  Ops.push_back(Builder.getInt32(LI->getAlignment()));
+
+  CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
+
+  // Replace uses of each shufflevector with the corresponding vector loaded
+  // by ldN.
+  for (unsigned i = 0; i < Shuffles.size(); i++) {
+    ShuffleVectorInst *SV = Shuffles[i];
+    unsigned Index = Indices[i];
+
+    Value *SubVec = Builder.CreateExtractValue(VldN, Index);
+
+    // Convert the integer vector to pointer vector if the element is pointer.
+    if (EltTy->isPointerTy())
+      SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
+
+    SV->replaceAllUsesWith(SubVec);
+  }
+
+  return true;
+}
+
+/// \brief Get a mask consisting of sequential integers starting from \p Start.
+///
+/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
+static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
+                                   unsigned NumElts) {
+  SmallVector<Constant *, 16> Mask;
+  for (unsigned i = 0; i < NumElts; i++)
+    Mask.push_back(Builder.getInt32(Start + i));
+
+  return ConstantVector::get(Mask);
+}
+
+/// \brief Lower an interleaved store into a vstN intrinsic.
+///
+/// E.g. Lower an interleaved store (Factor = 3):
+///        %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
+///                                  <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+///        store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
+///
+///      Into:
+///        %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
+///        %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
+///        %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
+///        call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
+///
+/// Note that the new shufflevectors will be removed and we'll only generate one
+/// vst3 instruction in CodeGen.
+bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
+                                              ShuffleVectorInst *SVI,
+                                              unsigned Factor) const {
+  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+         "Invalid interleave factor");
+
+  VectorType *VecTy = SVI->getType();
+  assert(VecTy->getVectorNumElements() % Factor == 0 &&
+         "Invalid interleaved store");
+
+  unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
+  Type *EltTy = VecTy->getVectorElementType();
+  VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
+
+  const DataLayout *DL = getDataLayout();
+  unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy);
+  bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;
+
+  // Skip illegal sub vector types and vector types of i64/f64 element (vstN
+  // doesn't support i64/f64 element).
+  if ((SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits)
+    return false;
+
+  Value *Op0 = SVI->getOperand(0);
+  Value *Op1 = SVI->getOperand(1);
+  IRBuilder<> Builder(SI);
+
+  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
+  // vectors to integer vectors.
+  if (EltTy->isPointerTy()) {
+    Type *IntTy = DL->getIntPtrType(EltTy);
+
+    // Convert to the corresponding integer vector.
+    Type *IntVecTy =
+        VectorType::get(IntTy, Op0->getType()->getVectorNumElements());
+    Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
+    Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
+
+    SubVecTy = VectorType::get(IntTy, NumSubElts);
+  }
+
+  static Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
+                                       Intrinsic::arm_neon_vst3,
+                                       Intrinsic::arm_neon_vst4};
+  Function *VstNFunc = Intrinsic::getDeclaration(
+      SI->getModule(), StoreInts[Factor - 2], SubVecTy);
+
+  SmallVector<Value *, 6> Ops;
+
+  Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
+  Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
+
+  // Split the shufflevector operands into sub vectors for the new vstN call.
+  for (unsigned i = 0; i < Factor; i++)
+    Ops.push_back(Builder.CreateShuffleVector(
+        Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
+
+  Ops.push_back(Builder.getInt32(SI->getAlignment()));
+  Builder.CreateCall(VstNFunc, Ops);
+  return true;
+}
+
 enum HABaseType {
   HA_UNKNOWN = 0,
   HA_FLOAT,
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 71a47a2..74396392 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -215,7 +215,7 @@ namespace llvm {
       VST3LN_UPD,
       VST4LN_UPD
     };
-  } // namespace ARMISD
+  }
 
   /// Define some predicates that are used for node matching.
   namespace ARM {
@@ -433,6 +433,15 @@ namespace llvm {
     Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
                            bool IsStore, bool IsLoad) const override;
 
+    unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
+
+    bool lowerInterleavedLoad(LoadInst *LI,
+                              ArrayRef<ShuffleVectorInst *> Shuffles,
+                              ArrayRef<unsigned> Indices,
+                              unsigned Factor) const override;
+    bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+                               unsigned Factor) const override;
+
     bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
     TargetLoweringBase::AtomicRMWExpansionKind
@@ -638,6 +647,6 @@ namespace llvm {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                              const TargetLibraryInfo *libInfo);
   }
-} // namespace llvm
+}
 
 #endif  // ARMISELLOWERING_H
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 59e1535..84f95be 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -198,7 +198,7 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-} // namespace
+}
 
 char ARMCGBR::ID = 0;
 FunctionPass*
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 9e5700a..90f34ea 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -43,6 +43,6 @@ private:
                             Reloc::Model RM) const override;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 50e2292..245c9e8 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -142,7 +142,7 @@ namespace {
     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
   };
   char ARMLoadStoreOpt::ID = 0;
-} // namespace
+}
 
 static bool definesCPSR(const MachineInstr *MI) {
   for (const auto &MO : MI->operands()) {
@@ -444,7 +444,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
       return;
     }
 
-    if (MBBI->killsRegister(Base))
+    if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
       // Register got killed. Stop updating.
       return;
   }
@@ -743,6 +743,12 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
     }
   }
 
+  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+    MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0);
+    if (TransferOp.isUse() && TransferOp.getReg() == Base)
+      BaseKill = false;
+  }
+
   SmallVector<std::pair<unsigned, bool>, 8> Regs;
   SmallVector<unsigned, 8> ImpDefs;
   SmallVector<MachineOperand *, 8> UsesOfImpDefs;
@@ -1464,119 +1470,124 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator &MBBI) {
   MachineInstr *MI = &*MBBI;
   unsigned Opcode = MI->getOpcode();
-  if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
-    const MachineOperand &BaseOp = MI->getOperand(2);
-    unsigned BaseReg = BaseOp.getReg();
-    unsigned EvenReg = MI->getOperand(0).getReg();
-    unsigned OddReg  = MI->getOperand(1).getReg();
-    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
-    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
-    // ARM errata 602117: LDRD with base in list may result in incorrect base
-    // register when interrupted or faulted.
-    bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
-    if (!Errata602117 &&
-        ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
-      return false;
+  if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
+    return false;
 
-    MachineBasicBlock::iterator NewBBI = MBBI;
-    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
-    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
-    bool EvenDeadKill = isLd ?
-      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
-    bool EvenUndef = MI->getOperand(0).isUndef();
-    bool OddDeadKill  = isLd ?
-      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
-    bool OddUndef = MI->getOperand(1).isUndef();
-    bool BaseKill = BaseOp.isKill();
-    bool BaseUndef = BaseOp.isUndef();
-    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
-    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
-    int OffImm = getMemoryOpOffset(MI);
-    unsigned PredReg = 0;
-    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
-
-    if (OddRegNum > EvenRegNum && OffImm == 0) {
-      // Ascending register numbers and no offset. It's safe to change it to a
-      // ldm or stm.
-      unsigned NewOpc = (isLd)
-        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
-        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
-      if (isLd) {
-        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
-          .addReg(BaseReg, getKillRegState(BaseKill))
-          .addImm(Pred).addReg(PredReg)
-          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
-          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
-        ++NumLDRD2LDM;
-      } else {
-        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
-          .addReg(BaseReg, getKillRegState(BaseKill))
-          .addImm(Pred).addReg(PredReg)
-          .addReg(EvenReg,
-                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
-          .addReg(OddReg,
-                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
-        ++NumSTRD2STM;
-      }
+  const MachineOperand &BaseOp = MI->getOperand(2);
+  unsigned BaseReg = BaseOp.getReg();
+  unsigned EvenReg = MI->getOperand(0).getReg();
+  unsigned OddReg  = MI->getOperand(1).getReg();
+  unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+  unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
+
+  // ARM errata 602117: LDRD with base in list may result in incorrect base
+  // register when interrupted or faulted.
+  bool Errata602117 = EvenReg == BaseReg &&
+    (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
+  // ARM LDRD/STRD needs consecutive registers.
+  bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
+    (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
+
+  if (!Errata602117 && !NonConsecutiveRegs)
+    return false;
+
+  MachineBasicBlock::iterator NewBBI = MBBI;
+  bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
+  bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
+  bool EvenDeadKill = isLd ?
+    MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+  bool EvenUndef = MI->getOperand(0).isUndef();
+  bool OddDeadKill  = isLd ?
+    MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+  bool OddUndef = MI->getOperand(1).isUndef();
+  bool BaseKill = BaseOp.isKill();
+  bool BaseUndef = BaseOp.isUndef();
+  bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
+  bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
+  int OffImm = getMemoryOpOffset(MI);
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+  if (OddRegNum > EvenRegNum && OffImm == 0) {
+    // Ascending register numbers and no offset. It's safe to change it to a
+    // ldm or stm.
+    unsigned NewOpc = (isLd)
+      ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
+      : (isT2 ? ARM::t2STMIA : ARM::STMIA);
+    if (isLd) {
+      BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+        .addReg(BaseReg, getKillRegState(BaseKill))
+        .addImm(Pred).addReg(PredReg)
+        .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+        .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+      ++NumLDRD2LDM;
+    } else {
+      BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+        .addReg(BaseReg, getKillRegState(BaseKill))
+        .addImm(Pred).addReg(PredReg)
+        .addReg(EvenReg,
+                getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
+        .addReg(OddReg,
+                getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
+      ++NumSTRD2STM;
+    }
+    NewBBI = std::prev(MBBI);
+  } else {
+    // Split into two instructions.
+    unsigned NewOpc = (isLd)
+      ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+      : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+    // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
+    // so adjust and use t2LDRi12 here for that.
+    unsigned NewOpc2 = (isLd)
+      ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+      : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+    DebugLoc dl = MBBI->getDebugLoc();
+    // If this is a load and base register is killed, it may have been
+    // re-defed by the load, make sure the first load does not clobber it.
+    if (isLd &&
+        (BaseKill || OffKill) &&
+        (TRI->regsOverlap(EvenReg, BaseReg))) {
+      assert(!TRI->regsOverlap(OddReg, BaseReg));
+      InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
+                    OddReg, OddDeadKill, false,
+                    BaseReg, false, BaseUndef, false, OffUndef,
+                    Pred, PredReg, TII, isT2);
       NewBBI = std::prev(MBBI);
+      InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+                    EvenReg, EvenDeadKill, false,
+                    BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+                    Pred, PredReg, TII, isT2);
     } else {
-      // Split into two instructions.
-      unsigned NewOpc = (isLd)
-        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
-        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
-      // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
-      // so adjust and use t2LDRi12 here for that.
-      unsigned NewOpc2 = (isLd)
-        ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
-        : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
-      DebugLoc dl = MBBI->getDebugLoc();
-      // If this is a load and base register is killed, it may have been
-      // re-defed by the load, make sure the first load does not clobber it.
-      if (isLd &&
-          (BaseKill || OffKill) &&
-          (TRI->regsOverlap(EvenReg, BaseReg))) {
-        assert(!TRI->regsOverlap(OddReg, BaseReg));
-        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
-                      OddReg, OddDeadKill, false,
-                      BaseReg, false, BaseUndef, false, OffUndef,
-                      Pred, PredReg, TII, isT2);
-        NewBBI = std::prev(MBBI);
-        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
-                      EvenReg, EvenDeadKill, false,
-                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
-                      Pred, PredReg, TII, isT2);
-      } else {
-        if (OddReg == EvenReg && EvenDeadKill) {
-          // If the two source operands are the same, the kill marker is
-          // probably on the first one. e.g.
-          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
-          EvenDeadKill = false;
-          OddDeadKill = true;
-        }
-        // Never kill the base register in the first instruction.
-        if (EvenReg == BaseReg)
-          EvenDeadKill = false;
-        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
-                      EvenReg, EvenDeadKill, EvenUndef,
-                      BaseReg, false, BaseUndef, false, OffUndef,
-                      Pred, PredReg, TII, isT2);
-        NewBBI = std::prev(MBBI);
-        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
-                      OddReg, OddDeadKill, OddUndef,
-                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
-                      Pred, PredReg, TII, isT2);
+      if (OddReg == EvenReg && EvenDeadKill) {
+        // If the two source operands are the same, the kill marker is
+        // probably on the first one. e.g.
+        // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+        EvenDeadKill = false;
+        OddDeadKill = true;
       }
-      if (isLd)
-        ++NumLDRD2LDR;
-      else
-        ++NumSTRD2STR;
+      // Never kill the base register in the first instruction.
+      if (EvenReg == BaseReg)
+        EvenDeadKill = false;
+      InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+                    EvenReg, EvenDeadKill, EvenUndef,
+                    BaseReg, false, BaseUndef, false, OffUndef,
+                    Pred, PredReg, TII, isT2);
+      NewBBI = std::prev(MBBI);
+      InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
+                    OddReg, OddDeadKill, OddUndef,
+                    BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+                    Pred, PredReg, TII, isT2);
     }
-
-    MBB.erase(MI);
-    MBBI = NewBBI;
-    return true;
+    if (isLd)
+      ++NumLDRD2LDR;
+    else
+      ++NumSTRD2STR;
   }
-  return false;
+
+  MBB.erase(MI);
+  MBBI = NewBBI;
+  return true;
 }
 
 /// An optimization pass to turn multiple LDR / STR ops of the same base and
@@ -1859,7 +1870,7 @@ namespace {
     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
   };
   char ARMPreAllocLoadStoreOpt::ID = 0;
-} // namespace
+}
 
 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   TD = Fn.getTarget().getDataLayout();
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 8b12102..14dd9ef 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -229,6 +229,6 @@ public:
     return It;
   }
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index 1c8e1f8..30baf42 100644
--- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -32,7 +32,7 @@ public:
   }
 };
 char ARMOptimizeBarriersPass::ID = 0;
-} // namespace
+}
 
 // Returns whether the instruction can safely move past a DMB instruction
 // The current implementation allows this iif MI does not have any possible
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 4563caa..1db190f 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -70,6 +70,6 @@ public:
                                  RTLIB::Libcall LC) const;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index f00594f..9909a6a 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -453,6 +453,6 @@ public:
   /// True if fast-isel is used.
   bool useFastISel() const;
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif  // ARMSUBTARGET_H
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 104a34f..6e81bd2 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -332,6 +332,10 @@ void ARMPassConfig::addIRPasses() {
     }));
 
   TargetPassConfig::addIRPasses();
+
+  // Match interleaved memory accesses to ldN/stN intrinsics.
+  if (TM->getOptLevel() != CodeGenOpt::None)
+    addPass(createInterleavedAccessPass(TM));
 }
 
 bool ARMPassConfig::addPreISel() {
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 4e1b371..f4901fc 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -478,3 +478,28 @@ unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
   }
   return LT.first;
 }
+
+unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                                unsigned Factor,
+                                                ArrayRef<unsigned> Indices,
+                                                unsigned Alignment,
+                                                unsigned AddressSpace) {
+  assert(Factor >= 2 && "Invalid interleave factor");
+  assert(isa<VectorType>(VecTy) && "Expect a vector type");
+
+  // vldN/vstN doesn't support vector types of i64/f64 element.
+  bool EltIs64Bits = DL->getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;
+
+  if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
+    unsigned NumElts = VecTy->getVectorNumElements();
+    Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
+    unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy);
+
+    // vldN/vstN only support legal vector types of size 64 or 128 in bits.
+    if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
+      return Factor;
+  }
+
+  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+                                           Alignment, AddressSpace);
+}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 9479d76..f2e5db6 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -126,6 +126,11 @@ public:
   unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                            unsigned AddressSpace);
 
+  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                      unsigned Factor,
+                                      ArrayRef<unsigned> Indices,
+                                      unsigned Alignment,
+                                      unsigned AddressSpace);
   /// @}
 };
 
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 35387d3..c2db746 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -28,6 +28,7 @@
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserUtils.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSection.h"
@@ -9887,22 +9888,13 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
   }
   Lex();
 
+  MCSymbol *Sym;
   const MCExpr *Value;
-  if (Parser.parseExpression(Value)) {
-    TokError("missing expression");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
-
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    TokError("unexpected token");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
-  Lex();
+  if (MCParserUtils::parseAssignmentExpression(Name, /* allow_redef */ true,
+                                               Parser, Sym, Value))
+    return true;
 
-  MCSymbol *Alias = getContext().getOrCreateSymbol(Name);
-  getTargetStreamer().emitThumbSet(Alias, Value);
+  getTargetStreamer().emitThumbSet(Sym, Value);
   return false;
 }
 
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index f973a8d..097ec04 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -81,7 +81,7 @@ namespace {
     private:
       std::vector<unsigned char> ITStates;
   };
-} // namespace
+}
 
 namespace {
 /// ARM disassembler for all ARM platforms.
@@ -118,7 +118,7 @@ private:
   DecodeStatus AddThumbPredicate(MCInst&) const;
   void UpdateThumbVFPPredicate(MCInst&) const;
 };
-} // namespace
+}
 
 static bool Check(DecodeStatus &Out, DecodeStatus In) {
   switch (In) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index e28f6e0..a6206e3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -29,6 +29,6 @@ public:
                                      Subtype);
   }
 };
-} // namespace
+}
 
 #endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 412feb8..68b12ed 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -23,6 +23,6 @@ public:
     return createARMELFObjectWriter(OS, OSABI, isLittle());
   }
 };
-} // namespace
+}
 
 #endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 1975bca..4289a73 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -114,7 +114,7 @@ namespace ARM_PROC {
     case ID: return "id";
     }
   }
-} // namespace ARM_PROC
+}
 
 namespace ARM_MB {
   // The Memory Barrier Option constants map directly to the 4-bit encoding of
@@ -459,6 +459,6 @@ namespace ARMII {
 
 } // end namespace ARMII
 
-} // namespace llvm
+} // end namespace llvm;
 
 #endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 9fe27fb..804d353 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -40,7 +40,7 @@ namespace {
     bool needsRelocateWithSymbol(const MCSymbol &Sym,
                                  unsigned Type) const override;
   };
-} // namespace
+}
 
 ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
   : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index bbc0b37..4d12bfb 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -563,20 +563,13 @@ private:
   }
 
   void EmitMappingSymbol(StringRef Name) {
-    MCSymbol *Start = getContext().createTempSymbol();
-    EmitLabel(Start);
-
     auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
         Name + "." + Twine(MappingSymbolCounter++)));
+    EmitLabel(Symbol);
 
-    getAssembler().registerSymbol(*Symbol);
     Symbol->setType(ELF::STT_NOTYPE);
     Symbol->setBinding(ELF::STB_LOCAL);
     Symbol->setExternal(false);
-    AssignSection(Symbol, getCurrentSection().first);
-
-    const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext());
-    Symbol->setVariableValue(Value);
   }
 
   void EmitThumbFunc(MCSymbol *Func) override {
@@ -804,12 +797,44 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
                      /* OverwriteExisting= */ false);
     break;
 
+  case ARM::FK_VFPV3_FP16:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3A,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+                     ARMBuildAttrs::AllowHPFP,
+                     /* OverwriteExisting= */ false);
+    break;
+
   case ARM::FK_VFPV3_D16:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv3B,
                      /* OverwriteExisting= */ false);
     break;
 
+  case ARM::FK_VFPV3_D16_FP16:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3B,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+                     ARMBuildAttrs::AllowHPFP,
+                     /* OverwriteExisting= */ false);
+    break;
+
+  case ARM::FK_VFPV3XD:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3B,
+                     /* OverwriteExisting= */ false);
+    break;
+  case ARM::FK_VFPV3XD_FP16:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3B,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+                     ARMBuildAttrs::AllowHPFP,
+                     /* OverwriteExisting= */ false);
+    break;
+
   case ARM::FK_VFPV4:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv4A,
@@ -849,6 +874,18 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
                      /* OverwriteExisting= */ false);
     break;
 
+  case ARM::FK_NEON_FP16:
+    setAttributeItem(ARMBuildAttrs::FP_arch,
+                     ARMBuildAttrs::AllowFPv3A,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+                     ARMBuildAttrs::AllowNeon,
+                     /* OverwriteExisting= */ false);
+    setAttributeItem(ARMBuildAttrs::FP_HP_extension,
+                     ARMBuildAttrs::AllowHPFP,
+                     /* OverwriteExisting= */ false);
+    break;
+
   case ARM::FK_NEON_VFPV4:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv4A,
@@ -1345,6 +1382,6 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
     return S;
   }
 
-} // namespace llvm
+}
 
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 23ef501..46ba571 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -104,7 +104,7 @@ enum Fixups {
   LastTargetFixupKind,
   NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
 };
-} // namespace ARM
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 0fb395e..fafe25a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -370,7 +370,7 @@ public:
   }
 };
 
-} // namespace
+}
 
 static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
   return new ARMMCInstrAnalysis(Info);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index c6f2d13..fd30623 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -103,7 +103,7 @@ MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
 
 /// Construct ARM Mach-O relocation info.
 MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
-} // namespace llvm
+} // End llvm namespace
 
 // Defines symbolic names for ARM registers.  This defines a mapping from
 // register name to register number.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 6ac778e..95d7ea7 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -56,7 +56,7 @@ public:
                         const MCFixup &Fixup, MCValue Target,
                         uint64_t &FixedValue) override;
 };
-} // namespace
+}
 
 static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
                               unsigned &Log2Size) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 32481e2..173cc93 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -60,7 +60,7 @@ namespace {
         EmitByte(ARM::EHABI::UNWIND_OPCODE_FINISH);
     }
   };
-} // namespace
+}
 
 void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
   if (RegSave == 0u)
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 34b552f..166c04b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -79,7 +79,7 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
 bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
   return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16;
 }
-} // namespace
+}
 
 namespace llvm {
 MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index 6515a65..b993b1b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -35,7 +35,7 @@ void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
 void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
   getAssembler().setIsThumbFunc(Symbol);
 }
-} // namespace
+}
 
 MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
                                            MCAsmBackend &MAB,
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index ca98f69..ed2deea 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -71,7 +71,7 @@ namespace {
     bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
   };
   char MLxExpansion::ID = 0;
-} // namespace
+}
 
 void MLxExpansion::clearStack() {
   std::fill(LastMIs, LastMIs + 4, nullptr);
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index e5e89fa..31d5732 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -47,6 +47,6 @@ public:
                                 MachineBasicBlock::iterator MI) const override;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 31b4df2..f3f493d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -58,6 +58,6 @@ private:
   void expandLoadStackGuard(MachineBasicBlock::iterator MI,
                             Reloc::Model RM) const override;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 7ce602d..68736bc 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -48,7 +48,7 @@ namespace {
     bool InsertITInstructions(MachineBasicBlock &MBB);
   };
   char Thumb2ITBlockPass::ID = 0;
-} // namespace
+}
 
 /// TrackDefUses - Tracking what registers are being defined and used by
 /// instructions in the IT block. This also tracks "dependencies", i.e. uses
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index d186dfb..916ab06 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -73,6 +73,6 @@ private:
 ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
 
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 0dd1b4c..d9ab824 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -202,7 +202,7 @@ namespace {
     std::function<bool(const Function &)> PredicateFtor;
   };
   char Thumb2SizeReduce::ID = 0;
-} // namespace
+}
 
 Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
     : MachineFunctionPass(ID), PredicateFtor(Ftor) {
diff --git a/lib/Target/ARM/ThumbRegisterInfo.h b/lib/Target/ARM/ThumbRegisterInfo.h
index e55f88f..23aaff3 100644
--- a/lib/Target/ARM/ThumbRegisterInfo.h
+++ b/lib/Target/ARM/ThumbRegisterInfo.h
@@ -60,6 +60,6 @@ public:
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp
index 9d0aa7a9..10ec658 100644
--- a/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -44,7 +44,7 @@ public:
                     const char *Modifier = nullptr);
   void EmitInstruction(const MachineInstr *MI) override;
 };
-} // namespace
+}
 
 void BPFAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
                                  raw_ostream &O, const char *Modifier) {
diff --git a/lib/Target/BPF/BPFFrameLowering.h b/lib/Target/BPF/BPFFrameLowering.h
index a6fe7c9..3b9fc44 100644
--- a/lib/Target/BPF/BPFFrameLowering.h
+++ b/lib/Target/BPF/BPFFrameLowering.h
@@ -37,5 +37,5 @@ public:
     MBB.erase(MI);
   }
 };
-} // namespace llvm
+}
 #endif
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index b49de3a..d9e654c 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -51,7 +51,7 @@ private:
   // Complex Pattern for address selection.
   bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset);
 };
-} // namespace
+}
 
 // ComplexPattern used on BPF Load/Store instructions
 bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index 21d160d..38c56bb 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -86,7 +86,7 @@ public:
 };
 
 int DiagnosticInfoUnsupported::KindID = 0;
-} // namespace
+}
 
 BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
                                      const BPFSubtarget &STI)
diff --git a/lib/Target/BPF/BPFISelLowering.h b/lib/Target/BPF/BPFISelLowering.h
index b56bb39..ec71dca 100644
--- a/lib/Target/BPF/BPFISelLowering.h
+++ b/lib/Target/BPF/BPFISelLowering.h
@@ -85,6 +85,6 @@ private:
     return true;
   }
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/BPF/BPFInstrInfo.h b/lib/Target/BPF/BPFInstrInfo.h
index bd96f76..ac60188 100644
--- a/lib/Target/BPF/BPFInstrInfo.h
+++ b/lib/Target/BPF/BPFInstrInfo.h
@@ -54,6 +54,6 @@ public:
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/BPF/BPFMCInstLower.h b/lib/Target/BPF/BPFMCInstLower.h
index ba91897..054e894 100644
--- a/lib/Target/BPF/BPFMCInstLower.h
+++ b/lib/Target/BPF/BPFMCInstLower.h
@@ -38,6 +38,6 @@ public:
 
   MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/BPF/BPFRegisterInfo.h b/lib/Target/BPF/BPFRegisterInfo.h
index 44977a2..7072dd0 100644
--- a/lib/Target/BPF/BPFRegisterInfo.h
+++ b/lib/Target/BPF/BPFRegisterInfo.h
@@ -35,6 +35,6 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
 
   unsigned getFrameRegister(const MachineFunction &MF) const override;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/BPF/BPFSubtarget.h b/lib/Target/BPF/BPFSubtarget.h
index 701ac57..5ad58db 100644
--- a/lib/Target/BPF/BPFSubtarget.h
+++ b/lib/Target/BPF/BPFSubtarget.h
@@ -59,6 +59,6 @@ public:
     return &InstrInfo.getRegisterInfo();
   }
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp
index 5a888a9..06cba22 100644
--- a/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/lib/Target/BPF/BPFTargetMachine.cpp
@@ -60,7 +60,7 @@ public:
 
   bool addInstSelector() override;
 };
-} // namespace
+}
 
 TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new BPFPassConfig(this, PM);
diff --git a/lib/Target/BPF/BPFTargetMachine.h b/lib/Target/BPF/BPFTargetMachine.h
index c715fd5..a0086df 100644
--- a/lib/Target/BPF/BPFTargetMachine.h
+++ b/lib/Target/BPF/BPFTargetMachine.h
@@ -38,6 +38,6 @@ public:
     return TLOF.get();
   }
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
index cb07471..adcaff6 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
+++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
@@ -37,6 +37,6 @@ public:
   void printInstruction(const MCInst *MI, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 33aecb7..36f9926 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -84,7 +84,7 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
 MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
   return createBPFELFObjectWriter(OS, 0, IsLittleEndian);
 }
-} // namespace
+}
 
 MCAsmBackend *llvm::createBPFAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI,
diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index ef4f05f..05ba618 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -25,7 +25,7 @@ protected:
   unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                         bool IsPCRel) const override;
 };
-} // namespace
+}
 
 BPFELFObjectWriter::BPFELFObjectWriter(uint8_t OSABI)
     : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_NONE,
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 2237654..d63bbf4 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -36,6 +36,6 @@ public:
     HasDotTypeDotSizeDirective = false;
   }
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index b579afd..dc4ede3 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -58,7 +58,7 @@ public:
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
 };
-} // namespace
+}
 
 MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,
                                             const MCRegisterInfo &MRI,
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index 3d2583a..e2ae652 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -49,7 +49,7 @@ MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCRegisterInfo &MRI,
 
 MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS,
                                          uint8_t OSABI, bool IsLittleEndian);
-} // namespace llvm
+}
 
 // Defines symbolic names for BPF registers.  This defines a mapping from
 // register name to register number.
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 9c9c097..bc5d7f6 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1678,9 +1678,8 @@ void CppWriter::printFunctionUses(const Function* F) {
                 consts.insert(GVar->getInitializer());
         } else if (Constant* C = dyn_cast<Constant>(operand)) {
           consts.insert(C);
-          for (unsigned j = 0; j < C->getNumOperands(); ++j) {
+          for (Value* operand : C->operands()) {
             // If the operand references a GVal or Constant, make a note of it
-            Value* operand = C->getOperand(j);
             printType(operand->getType());
             if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
               gvs.insert(GV);
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 0cd20da..ebf0635 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -37,7 +37,7 @@ public:
 
 extern Target TheCppBackendTarget;
 
-} // namespace llvm
+} // End llvm namespace
 
 
 #endif
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 837838a..9cc1e94 100644
--- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -53,7 +53,7 @@ public:
                               raw_ostream &VStream,
                               raw_ostream &CStream) const override;
 };
-} // namespace
+}
 
 static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
                                                uint64_t Address,
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index b24d24a..d360be2 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -58,6 +58,6 @@ namespace llvm {
 
   /// \brief Creates a Hexagon-specific Target Transformation Info pass.
   ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM);
-} // namespace llvm
+} // end namespace llvm;
 
 #endif
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h
index f09a5b9..792fc8b 100755
--- a/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -53,6 +53,6 @@ namespace llvm {
     static const char *getRegisterName(unsigned RegNo);
   };
 
-} // namespace llvm
+} // end of llvm namespace
 
 #endif
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index ff1a4fe..3753b745 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -228,7 +228,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
   }
   return true;
 }
-} // namespace
+}
 
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 33766df..37ed173 100644
--- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -173,7 +173,7 @@ namespace {
     bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
     bool coalesceSegments(MachineFunction &MF);
   };
-} // namespace
+}
 
 char HexagonExpandCondsets::ID = 0;
 
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index 1657d88..e4c8d8f 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -333,7 +333,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
   return true;
 }
 
-} // namespace
+}
 
 //===----------------------------------------------------------------------===//
 //                         Public Constructor Functions
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index 3ea77cd..d0c7f9c 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -67,7 +67,7 @@ namespace {
   };
 
   char HexagonFixupHwLoops::ID = 0;
-} // namespace
+}
 
 INITIALIZE_PASS(HexagonFixupHwLoops, "hwloopsfixup",
                 "Hexagon Hardware Loops Fixup", false, false)
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 9797134..868f87e 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -238,7 +238,7 @@ namespace {
         return true;
     return false;
   }
-} // namespace
+}
 
 
 /// Implements shrink-wrapping of the stack frame. By default, stack frame
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index 767e13c..89500cb 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -99,6 +99,6 @@ private:
   bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 1a14c88..6e9e69f 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -95,7 +95,7 @@ public:
 
   unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
 };
-} // namespace
+}
 
 // Implement calling convention for Hexagon.
 static bool
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index b9d18df..b80e847 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -86,7 +86,7 @@ bool isPositiveHalfWord(SDNode *N);
 
       OP_END
     };
-  } // namespace HexagonISD
+  }
 
   class HexagonSubtarget;
 
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 91f508e..d0b8a46 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -229,6 +229,6 @@ public:
 
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index 5681ae2..7672358 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -80,6 +80,6 @@ public:
   void setStackAlignBaseVReg(unsigned R) { StackAlignBaseReg = R; }
   unsigned getStackAlignBaseVReg() const { return StackAlignBaseReg; }
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index fae16e2..6034344 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -238,7 +238,7 @@ protected:
 #endif
 };
 
-} // namespace llvm
+} // namespace
 
 
 #endif
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 94ec2e7..93dcbe2 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -104,7 +104,7 @@ namespace {
   private:
     void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src);
   };
-} // namespace
+}
 
 char HexagonPeephole::ID = 0;
 
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
index d586c39..7069ad3 100644
--- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -48,7 +48,7 @@ namespace {
       FunctionPass::getAnalysisUsage(AU);
     }
   };
-} // namespace
+}
 
 char HexagonRemoveExtendArgs::ID = 0;
 
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index c72051c..8ac2e43 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -32,6 +32,6 @@ public:
                                   MachinePointerInfo SrcPtrInfo) const override;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 61bb7c5..d3eb56f 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -156,7 +156,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
   return true;
 }
 
-} // namespace
+}
 
 //===----------------------------------------------------------------------===//
 //                         Public Constructor Functions
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 90f1ced..a173a80 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -77,7 +77,7 @@ namespace llvm {
   FunctionPass *createHexagonCopyToCombine();
   FunctionPass *createHexagonPacketizer();
   FunctionPass *createHexagonNewValueJump();
-} // namespace llvm
+} // end namespace llvm;
 
 /// HexagonTargetMachine ctor - Create an ILP32 architecture model.
 ///
diff --git a/lib/Target/Hexagon/HexagonTargetStreamer.h b/lib/Target/Hexagon/HexagonTargetStreamer.h
index 2b4a3ad..e19c404 100644
--- a/lib/Target/Hexagon/HexagonTargetStreamer.h
+++ b/lib/Target/Hexagon/HexagonTargetStreamer.h
@@ -26,6 +26,6 @@ public:
                                            unsigned ByteAlign,
                                            unsigned AccessGranularity){};
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 66fdd65..b91a3f6 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -170,7 +170,7 @@ namespace {
     void reserveResourcesForConstExt(MachineInstr* MI);
     bool isNewValueInst(MachineInstr* MI);
   };
-} // namespace
+}
 
 INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
                       false, false)
@@ -272,9 +272,8 @@ static bool IsIndirectCall(MachineInstr* MI) {
 // reservation fail.
 void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
   const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  MachineFunction *MF = MI->getParent()->getParent();
-  MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext),
-                                                  MI->getDebugLoc());
+  MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
+                                                 MI->getDebugLoc());
 
   if (ResourceTracker->canReserveResources(PseudoMI)) {
     ResourceTracker->reserveResources(PseudoMI);
@@ -290,11 +289,10 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
   const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
   assert((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
          "Should only be called for constant extended instructions");
-  MachineFunction *MF = MI->getParent()->getParent();
-  MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext),
-                                                  MI->getDebugLoc());
+  MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
+                                                 MI->getDebugLoc());
   bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
-  MF->DeleteMachineInstr(PseudoMI);
+  MF.DeleteMachineInstr(PseudoMI);
   return CanReserve;
 }
 
@@ -302,9 +300,8 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
 // true, otherwise, return false.
 bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
   const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  MachineFunction *MF = MI->getParent()->getParent();
-  MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::A4_ext),
-                                                  MI->getDebugLoc());
+  MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
+                                                 MI->getDebugLoc());
 
   if (ResourceTracker->canReserveResources(PseudoMI)) {
     ResourceTracker->reserveResources(PseudoMI);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index 0f7cf0e..da5d4d1 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -31,7 +31,7 @@ public:
   unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
                         bool IsPCRel) const override;
 };
-} // namespace
+}
 
 HexagonELFObjectWriter::HexagonELFObjectWriter(uint8_t OSABI, StringRef C)
     : MCELFObjectTargetWriter(/*Is64bit*/ false, OSABI, ELF::EM_HEXAGON,
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 6f8cb90..9fc4e2a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -370,7 +370,7 @@ namespace {
       return false;
     }
   }
-} // namespace
+}
 
 unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
                                               const MCOperand &MO,
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 0d1f1e6..886f8db 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -174,7 +174,7 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
 
   return HexagonII::HCG_None;
 }
-} // namespace
+}
 
 /// getCompoundOp - Return the index from 0-7 into the above opcode lists.
 namespace {
@@ -199,7 +199,7 @@ unsigned getCompoundOp(MCInst const &HMCI) {
     return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t;
   }
 }
-} // namespace
+}
 
 namespace {
 MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
@@ -331,7 +331,7 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
 
   return CompoundInsn;
 }
-} // namespace
+}
 
 /// Non-Symmetrical. See if these two instructions are fit for compound pair.
 namespace {
@@ -348,7 +348,7 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
   return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) &&
           (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg()));
 }
-} // namespace
+}
 
 namespace {
 bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
@@ -396,7 +396,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
   }
   return false;
 }
-} // namespace
+}
 
 /// tryCompound - Given a bundle check for compound insns when one
 /// is found update the contents fo the bundle with the compound insn.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index e69a52d..48b15f8 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -461,4 +461,4 @@ void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) {
   MCOperand &Operand = MCI.getOperand(0);
   Operand.setImm(Operand.getImm() | outerLoopMask);
 }
-} // namespace llvm
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 9f7562a..32d61a4 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -229,7 +229,7 @@ bool subInstWouldBeExtended(MCInst const &potentialDuplex);
 
 // Attempt to find and replace compound pairs
 void tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
-} // namespace HexagonMCInstrInfo
-} // namespace llvm
+}
+}
 
 #endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
index 9c0e3f2..a21cce1 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
@@ -60,6 +60,6 @@ bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
 unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
                           MCContext &Context, MCInst &,
                           SmallVector<DuplexCandidate, 8>);
-} // namespace llvm
+}
 
 #endif // HEXAGONMCSHUFFLER_H
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 4a4f0c2..83ce0ab 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -102,7 +102,7 @@ public:
       OS << "\n\t}" << PacketBundle.second;
   }
 };
-} // namespace
+}
 
 namespace {
 class HexagonTargetELFStreamer : public HexagonTargetStreamer {
@@ -137,7 +137,7 @@ public:
         Symbol, Size, ByteAlignment, AccessSize);
   }
 };
-} // namespace
+}
 
 static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
                                          const Triple &TT) {
@@ -172,9 +172,10 @@ static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T,
     return nullptr;
 }
 
-MCTargetStreamer *createMCAsmTargetStreamer(
-      MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint,
-      bool IsVerboseAsm) {
+static MCTargetStreamer *createMCAsmTargetStreamer(MCStreamer &S,
+                                                   formatted_raw_ostream &OS,
+                                                   MCInstPrinter *InstPrint,
+                                                   bool IsVerboseAsm) {
   return new HexagonTargetAsmStreamer(S,  OS, IsVerboseAsm, *InstPrint);
 }
 
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 89c3eb3..cb62650 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -49,7 +49,7 @@ MCAsmBackend *createHexagonAsmBackend(Target const &T,
 MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
                                              uint8_t OSABI, StringRef CPU);
 
-} // namespace llvm
+} // End llvm namespace
 
 // Define symbolic names for Hexagon registers.  This defines a mapping from
 // register name to register number.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index feaaa4f..41112ac 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -81,6 +81,9 @@ unsigned HexagonResource::setWeight(unsigned s) {
   const unsigned MaskWeight = SlotWeight - 1;
   bool Key = (1 << s) & getUnits();
 
+  // TODO: Improve this API so that we can prevent misuse statically.
+  assert(SlotWeight * s < 32 && "Argument to setWeight too large.");
+
   // Calculate relative weight of the insn for the given slot, weighing it the
   // heavier the more restrictive the insn is and the lowest the slots that the
   // insn may be executed in.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 53325f6..8b6c72e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -34,8 +34,7 @@ public:
   HexagonResource(unsigned s) { setUnits(s); };
 
   void setUnits(unsigned s) {
-    Slots = s & ~(-1 << HEXAGON_PACKET_SIZE);
-    setWeight(s);
+    Slots = s & ~(~0U << HEXAGON_PACKET_SIZE);
   };
   unsigned setWeight(unsigned s);
 
@@ -134,6 +133,6 @@ public:
   void setError(unsigned Err) { Error = Err; };
   unsigned getError() const { return (Error); };
 };
-} // namespace llvm
+}
 
 #endif // HEXAGONSHUFFLER_H
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index ab82324..f05d7a4 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -31,6 +31,7 @@ subdirectories =
  PowerPC
  Sparc
  SystemZ
+ WebAssembly
  X86
  XCore
 
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 80565aa..70141a9 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -40,6 +40,6 @@ namespace llvm {
     void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
 
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h
index 302012e..796f252 100644
--- a/lib/Target/MSP430/MSP430.h
+++ b/lib/Target/MSP430/MSP430.h
@@ -30,7 +30,7 @@ namespace MSP430CC {
 
     COND_INVALID = -1
   };
-} // namespace MSP430CC
+}
 
 namespace llvm {
   class MSP430TargetMachine;
@@ -42,6 +42,6 @@ namespace llvm {
 
   FunctionPass *createMSP430BranchSelectionPass();
 
-} // namespace llvm
+} // end namespace llvm;
 
 #endif
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index 2bc11c0..ffcf222 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -44,7 +44,7 @@ namespace {
     }
   };
   char MSP430BSel::ID = 0;
-} // namespace
+}
 
 /// createMSP430BranchSelectionPass - returns an instance of the Branch
 /// Selection Pass
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index 2f20bbd..48c4dc86 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -49,6 +49,6 @@ public:
                                      RegScavenger *RS = nullptr) const override;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index a60108d..5ce5013 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -85,7 +85,7 @@ namespace {
         errs() << " JT" << JT << " Align" << Align << '\n';
     }
   };
-} // namespace
+}
 
 /// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine
 /// instructions for SelectionDAG operations.
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index b090609..80d3ae1 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -64,7 +64,7 @@ namespace llvm {
       /// SHL, SRA, SRL - Non-constant shifts.
       SHL, SRA, SRL
     };
-  } // namespace MSP430ISD
+  }
 
   class MSP430Subtarget;
   class MSP430TargetLowering : public TargetLowering {
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index c6bad1e..3cf3b1b 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -38,7 +38,7 @@ namespace MSP430II {
     Size4Bytes  = 3 << SizeShift,
     Size6Bytes  = 4 << SizeShift
   };
-} // namespace MSP430II
+}
 
 class MSP430InstrInfo : public MSP430GenInstrInfo {
   const MSP430RegisterInfo RI;
@@ -87,6 +87,6 @@ public:
 
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h
index ebbc6e5..ebd6397 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/MSP430MCInstLower.h
@@ -42,6 +42,6 @@ public:
   MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 3d1a245..fcc5f5b 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -49,6 +49,6 @@ public:
   void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
index 95c9293..61a6b19 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -26,6 +26,6 @@ public:
   ~MSP430SelectionDAGInfo();
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 958a5d3..81f6f02 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -64,6 +64,6 @@ public:
     return &TSInfo;
   }
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif  // LLVM_TARGET_MSP430_SUBTARGET_H
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 5b8d633..f14156d 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -113,6 +113,7 @@ class MipsAsmParser : public MCTargetAsmParser {
                        // nullptr, which indicates that no function is currently
                        // selected. This usually happens after an '.end func'
                        // directive.
+  bool IsLittleEndian;
 
   // Print a warning along with its fix-it message at the given range.
   void printWarningWithFixIt(const Twine &Msg, const Twine &FixMsg,
@@ -214,11 +215,17 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool expandCondBranches(MCInst &Inst, SMLoc IDLoc,
                           SmallVectorImpl<MCInst> &Instructions);
 
+  bool expandUlhu(MCInst &Inst, SMLoc IDLoc,
+                  SmallVectorImpl<MCInst> &Instructions);
+
+  bool expandUlw(MCInst &Inst, SMLoc IDLoc,
+                 SmallVectorImpl<MCInst> &Instructions);
+
   void createNop(bool hasShortDelaySlot, SMLoc IDLoc,
                  SmallVectorImpl<MCInst> &Instructions);
 
   void createAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg,
-                  SmallVectorImpl<MCInst> &Instructions);
+                  bool Is64Bit, SmallVectorImpl<MCInst> &Instructions);
 
   bool reportParseError(Twine ErrorMsg);
   bool reportParseError(SMLoc Loc, Twine ErrorMsg);
@@ -251,6 +258,8 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool parseSetMips16Directive();
   bool parseSetNoMips16Directive();
   bool parseSetFpDirective();
+  bool parseSetOddSPRegDirective();
+  bool parseSetNoOddSPRegDirective();
   bool parseSetPopDirective();
   bool parseSetPushDirective();
   bool parseSetSoftFloatDirective();
@@ -352,6 +361,16 @@ class MipsAsmParser : public MCTargetAsmParser {
     }
   }
 
+  void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
+    setFeatureBits(Feature, FeatureString);
+    AssemblerOptions.front()->setFeatures(STI.getFeatureBits());
+  }
+
+  void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
+    clearFeatureBits(Feature, FeatureString);
+    AssemblerOptions.front()->setFeatures(STI.getFeatureBits());
+  }
+
 public:
   enum MipsMatchResultTy {
     Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY
@@ -387,6 +406,13 @@ public:
       report_fatal_error("-mno-odd-spreg requires the O32 ABI");
 
     CurrentFn = nullptr;
+
+    Triple TheTriple(sti.getTargetTriple());
+    if ((TheTriple.getArch() == Triple::mips) ||
+        (TheTriple.getArch() == Triple::mips64))
+      IsLittleEndian = false;
+    else
+      IsLittleEndian = true;
   }
 
   /// True if all of $fcc0 - $fcc7 exist for the current ISA.
@@ -462,6 +488,8 @@ public:
   void warnIfRegIndexIsAT(unsigned RegIndex, SMLoc Loc);
 
   void warnIfNoMacro(SMLoc Loc);
+
+  bool isLittle() const { return IsLittleEndian; }
 };
 }
 
@@ -486,11 +514,11 @@ public:
     RegKind_CCR = 128,    /// CCR
     RegKind_HWRegs = 256, /// HWRegs
     RegKind_COP3 = 512,   /// COP3
-
+    RegKind_COP0 = 1024,  /// COP0
     /// Potentially any (e.g. $1)
     RegKind_Numeric = RegKind_GPR | RegKind_FGR | RegKind_FCC | RegKind_MSA128 |
                       RegKind_MSACtrl | RegKind_COP2 | RegKind_ACC |
-                      RegKind_CCR | RegKind_HWRegs | RegKind_COP3
+                      RegKind_CCR | RegKind_HWRegs | RegKind_COP3 | RegKind_COP0
   };
 
 private:
@@ -652,6 +680,14 @@ private:
     return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
   }
 
+  /// Coerce the register to COP0 and return the real register for the
+  /// current target.
+  unsigned getCOP0Reg() const {
+    assert(isRegIdx() && (RegIdx.Kind & RegKind_COP0) && "Invalid access!");
+    unsigned ClassID = Mips::COP0RegClassID;
+    return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
+  }
+
   /// Coerce the register to COP2 and return the real register for the
   /// current target.
   unsigned getCOP2Reg() const {
@@ -793,6 +829,11 @@ public:
     Inst.addOperand(MCOperand::createReg(getMSACtrlReg()));
   }
 
+  void addCOP0AsmRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getCOP0Reg()));
+  }
+
   void addCOP2AsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(getCOP2Reg()));
@@ -1168,6 +1209,9 @@ public:
   bool isACCAsmReg() const {
     return isRegIdx() && RegIdx.Kind & RegKind_ACC && RegIdx.Index <= 3;
   }
+  bool isCOP0AsmReg() const {
+    return isRegIdx() && RegIdx.Kind & RegKind_COP0 && RegIdx.Index <= 31;
+  }
   bool isCOP2AsmReg() const {
     return isRegIdx() && RegIdx.Kind & RegKind_COP2 && RegIdx.Index <= 31;
   }
@@ -1635,6 +1679,8 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) {
   case Mips::BLEU:
   case Mips::BGEU:
   case Mips::BGTU:
+  case Mips::Ulhu:
+  case Mips::Ulw:
     return true;
   default:
     return false;
@@ -1673,6 +1719,10 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
   case Mips::BGEU:
   case Mips::BGTU:
     return expandCondBranches(Inst, IDLoc, Instructions);
+  case Mips::Ulhu:
+    return expandUlhu(Inst, IDLoc, Instructions);
+  case Mips::Ulw:
+    return expandUlw(Inst, IDLoc, Instructions);
   }
 }
 
@@ -1774,6 +1824,16 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
 
   MCInst tmpInst;
 
+  unsigned TmpReg = DstReg;
+  if (UseSrcReg && (DstReg == SrcReg)) {
+    // At this point we need AT to perform the expansions and we exit if it is
+    // not available.
+    unsigned ATReg = getATReg(IDLoc);
+    if (!ATReg)
+      return true;
+    TmpReg = ATReg;
+  }
+
   tmpInst.setLoc(IDLoc);
   // FIXME: gas has a special case for values that are 000...1111, which
   // becomes a li -1 and then a dsrl
@@ -1810,23 +1870,23 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
       // For DLI, expand to an ORi instead of a LUi to avoid sign-extending the
       // upper 32 bits.
       tmpInst.setOpcode(Mips::ORi);
-      tmpInst.addOperand(MCOperand::createReg(DstReg));
+      tmpInst.addOperand(MCOperand::createReg(TmpReg));
       tmpInst.addOperand(MCOperand::createReg(Mips::ZERO));
       tmpInst.addOperand(MCOperand::createImm(Bits31To16));
       tmpInst.setLoc(IDLoc);
       Instructions.push_back(tmpInst);
       // Move the value to the upper 16 bits by doing a 16-bit left shift.
-      createLShiftOri<16>(0, DstReg, IDLoc, Instructions);
+      createLShiftOri<16>(0, TmpReg, IDLoc, Instructions);
     } else {
       tmpInst.setOpcode(Mips::LUi);
-      tmpInst.addOperand(MCOperand::createReg(DstReg));
+      tmpInst.addOperand(MCOperand::createReg(TmpReg));
       tmpInst.addOperand(MCOperand::createImm(Bits31To16));
       Instructions.push_back(tmpInst);
     }
-    createLShiftOri<0>(Bits15To0, DstReg, IDLoc, Instructions);
+    createLShiftOri<0>(Bits15To0, TmpReg, IDLoc, Instructions);
 
     if (UseSrcReg)
-      createAddu(DstReg, DstReg, SrcReg, Instructions);
+      createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
 
   } else if ((ImmValue & (0xffffLL << 48)) == 0) {
     if (Is32BitImm) {
@@ -1853,14 +1913,14 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
     uint16_t Bits15To0 = ImmValue & 0xffff;
 
     tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::createReg(DstReg));
+    tmpInst.addOperand(MCOperand::createReg(TmpReg));
     tmpInst.addOperand(MCOperand::createImm(Bits47To32));
     Instructions.push_back(tmpInst);
-    createLShiftOri<0>(Bits31To16, DstReg, IDLoc, Instructions);
-    createLShiftOri<16>(Bits15To0, DstReg, IDLoc, Instructions);
+    createLShiftOri<0>(Bits31To16, TmpReg, IDLoc, Instructions);
+    createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions);
 
     if (UseSrcReg)
-      createAddu(DstReg, DstReg, SrcReg, Instructions);
+      createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
 
   } else {
     if (Is32BitImm) {
@@ -1889,22 +1949,22 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
     uint16_t Bits15To0 = ImmValue & 0xffff;
 
     tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::createReg(DstReg));
+    tmpInst.addOperand(MCOperand::createReg(TmpReg));
     tmpInst.addOperand(MCOperand::createImm(Bits63To48));
     Instructions.push_back(tmpInst);
-    createLShiftOri<0>(Bits47To32, DstReg, IDLoc, Instructions);
+    createLShiftOri<0>(Bits47To32, TmpReg, IDLoc, Instructions);
 
     // When Bits31To16 is 0, do a left shift of 32 bits instead of doing
     // two left shifts of 16 bits.
     if (Bits31To16 == 0) {
-      createLShiftOri<32>(Bits15To0, DstReg, IDLoc, Instructions);
+      createLShiftOri<32>(Bits15To0, TmpReg, IDLoc, Instructions);
     } else {
-      createLShiftOri<16>(Bits31To16, DstReg, IDLoc, Instructions);
-      createLShiftOri<16>(Bits15To0, DstReg, IDLoc, Instructions);
+      createLShiftOri<16>(Bits31To16, TmpReg, IDLoc, Instructions);
+      createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions);
     }
 
     if (UseSrcReg)
-      createAddu(DstReg, DstReg, SrcReg, Instructions);
+      createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
   }
   return false;
 }
@@ -1991,6 +2051,18 @@ bool MipsAsmParser::loadAndAddSymbolAddress(
   const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create(
       &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_ABS_LO, getContext());
 
+  bool UseSrcReg = SrcReg != Mips::NoRegister;
+
+  unsigned TmpReg = DstReg;
+  if (UseSrcReg && (DstReg == SrcReg)) {
+    // At this point we need AT to perform the expansions and we exit if it is
+    // not available.
+    unsigned ATReg = getATReg(IDLoc);
+    if (!ATReg)
+      return true;
+    TmpReg = ATReg;
+  }
+
   if (!Is32BitSym) {
     // If it's a 64-bit architecture, expand to:
     // la d,sym => lui  d,highest(sym)
@@ -2005,31 +2077,31 @@ bool MipsAsmParser::loadAndAddSymbolAddress(
         &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_HIGHER, getContext());
 
     tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::createReg(DstReg));
+    tmpInst.addOperand(MCOperand::createReg(TmpReg));
     tmpInst.addOperand(MCOperand::createExpr(HighestExpr));
     Instructions.push_back(tmpInst);
 
-    createLShiftOri<0>(MCOperand::createExpr(HigherExpr), DstReg, SMLoc(),
+    createLShiftOri<0>(MCOperand::createExpr(HigherExpr), TmpReg, SMLoc(),
                        Instructions);
-    createLShiftOri<16>(MCOperand::createExpr(HiExpr), DstReg, SMLoc(),
+    createLShiftOri<16>(MCOperand::createExpr(HiExpr), TmpReg, SMLoc(),
                         Instructions);
-    createLShiftOri<16>(MCOperand::createExpr(LoExpr), DstReg, SMLoc(),
+    createLShiftOri<16>(MCOperand::createExpr(LoExpr), TmpReg, SMLoc(),
                         Instructions);
   } else {
     // Otherwise, expand to:
     // la d,sym => lui  d,hi16(sym)
     //             ori  d,d,lo16(sym)
     tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::createReg(DstReg));
+    tmpInst.addOperand(MCOperand::createReg(TmpReg));
     tmpInst.addOperand(MCOperand::createExpr(HiExpr));
     Instructions.push_back(tmpInst);
 
-    createLShiftOri<0>(MCOperand::createExpr(LoExpr), DstReg, SMLoc(),
+    createLShiftOri<0>(MCOperand::createExpr(LoExpr), TmpReg, SMLoc(),
                        Instructions);
   }
 
-  if (SrcReg != Mips::NoRegister)
-    createAddu(DstReg, DstReg, SrcReg, Instructions);
+  if (UseSrcReg)
+    createAddu(DstReg, TmpReg, SrcReg, !Is32BitSym, Instructions);
 
   return false;
 }
@@ -2449,6 +2521,174 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
   return false;
 }
 
+bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc,
+                               SmallVectorImpl<MCInst> &Instructions) {
+  if (hasMips32r6() || hasMips64r6()) {
+    Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
+    return false;
+  }
+
+  warnIfNoMacro(IDLoc);
+
+  const MCOperand &DstRegOp = Inst.getOperand(0);
+  assert(DstRegOp.isReg() && "expected register operand kind");
+
+  const MCOperand &SrcRegOp = Inst.getOperand(1);
+  assert(SrcRegOp.isReg() && "expected register operand kind");
+
+  const MCOperand &OffsetImmOp = Inst.getOperand(2);
+  assert(OffsetImmOp.isImm() && "expected immediate operand kind");
+
+  unsigned DstReg = DstRegOp.getReg();
+  unsigned SrcReg = SrcRegOp.getReg();
+  int64_t OffsetValue = OffsetImmOp.getImm();
+
+  // NOTE: We always need AT for ULHU, as it is always used as the source
+  // register for one of the LBu's.
+  unsigned ATReg = getATReg(IDLoc);
+  if (!ATReg)
+    return true;
+
+  // When the value of offset+1 does not fit in 16 bits, we have to load the
+  // offset in AT, (D)ADDu the original source register (if there was one), and
+  // then use AT as the source register for the 2 generated LBu's.
+  bool LoadedOffsetInAT = false;
+  if (!isInt<16>(OffsetValue + 1) || !isInt<16>(OffsetValue)) {
+    LoadedOffsetInAT = true;
+
+    if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(),
+                      IDLoc, Instructions))
+      return true;
+
+    // NOTE: We do this (D)ADDu here instead of doing it in loadImmediate()
+    // because it will make our output more similar to GAS'. For example,
+    // generating an "ori $1, $zero, 32768" followed by an "addu $1, $1, $9",
+    // instead of just an "ori $1, $9, 32768".
+    // NOTE: If there is no source register specified in the ULHU, the parser
+    // will interpret it as $0.
+    if (SrcReg != Mips::ZERO && SrcReg != Mips::ZERO_64)
+      createAddu(ATReg, ATReg, SrcReg, ABI.ArePtrs64bit(), Instructions);
+  }
+
+  unsigned FirstLbuDstReg = LoadedOffsetInAT ? DstReg : ATReg;
+  unsigned SecondLbuDstReg = LoadedOffsetInAT ? ATReg : DstReg;
+  unsigned LbuSrcReg = LoadedOffsetInAT ? ATReg : SrcReg;
+
+  int64_t FirstLbuOffset = 0, SecondLbuOffset = 0;
+  if (isLittle()) {
+    FirstLbuOffset = LoadedOffsetInAT ? 1 : (OffsetValue + 1);
+    SecondLbuOffset = LoadedOffsetInAT ? 0 : OffsetValue;
+  } else {
+    FirstLbuOffset = LoadedOffsetInAT ? 0 : OffsetValue;
+    SecondLbuOffset = LoadedOffsetInAT ? 1 : (OffsetValue + 1);
+  }
+
+  unsigned SllReg = LoadedOffsetInAT ? DstReg : ATReg;
+
+  MCInst TmpInst;
+  TmpInst.setOpcode(Mips::LBu);
+  TmpInst.addOperand(MCOperand::createReg(FirstLbuDstReg));
+  TmpInst.addOperand(MCOperand::createReg(LbuSrcReg));
+  TmpInst.addOperand(MCOperand::createImm(FirstLbuOffset));
+  Instructions.push_back(TmpInst);
+
+  TmpInst.clear();
+  TmpInst.setOpcode(Mips::LBu);
+  TmpInst.addOperand(MCOperand::createReg(SecondLbuDstReg));
+  TmpInst.addOperand(MCOperand::createReg(LbuSrcReg));
+  TmpInst.addOperand(MCOperand::createImm(SecondLbuOffset));
+  Instructions.push_back(TmpInst);
+
+  TmpInst.clear();
+  TmpInst.setOpcode(Mips::SLL);
+  TmpInst.addOperand(MCOperand::createReg(SllReg));
+  TmpInst.addOperand(MCOperand::createReg(SllReg));
+  TmpInst.addOperand(MCOperand::createImm(8));
+  Instructions.push_back(TmpInst);
+
+  TmpInst.clear();
+  TmpInst.setOpcode(Mips::OR);
+  TmpInst.addOperand(MCOperand::createReg(DstReg));
+  TmpInst.addOperand(MCOperand::createReg(DstReg));
+  TmpInst.addOperand(MCOperand::createReg(ATReg));
+  Instructions.push_back(TmpInst);
+
+  return false;
+}
+
+bool MipsAsmParser::expandUlw(MCInst &Inst, SMLoc IDLoc,
+                              SmallVectorImpl<MCInst> &Instructions) {
+  if (hasMips32r6() || hasMips64r6()) {
+    Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
+    return false;
+  }
+
+  const MCOperand &DstRegOp = Inst.getOperand(0);
+  assert(DstRegOp.isReg() && "expected register operand kind");
+
+  const MCOperand &SrcRegOp = Inst.getOperand(1);
+  assert(SrcRegOp.isReg() && "expected register operand kind");
+
+  const MCOperand &OffsetImmOp = Inst.getOperand(2);
+  assert(OffsetImmOp.isImm() && "expected immediate operand kind");
+
+  unsigned SrcReg = SrcRegOp.getReg();
+  int64_t OffsetValue = OffsetImmOp.getImm();
+  unsigned ATReg = 0;
+
+  // When the value of offset+3 does not fit in 16 bits, we have to load the
+  // offset in AT, (D)ADDu the original source register (if there was one), and
+  // then use AT as the source register for the generated LWL and LWR.
+  bool LoadedOffsetInAT = false;
+  if (!isInt<16>(OffsetValue + 3) || !isInt<16>(OffsetValue)) {
+    ATReg = getATReg(IDLoc);
+    if (!ATReg)
+      return true;
+    LoadedOffsetInAT = true;
+
+    warnIfNoMacro(IDLoc);
+
+    if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(),
+                      IDLoc, Instructions))
+      return true;
+
+    // NOTE: We do this (D)ADDu here instead of doing it in loadImmediate()
+    // because it will make our output more similar to GAS'. For example,
+    // generating an "ori $1, $zero, 32768" followed by an "addu $1, $1, $9",
+    // instead of just an "ori $1, $9, 32768".
+    // NOTE: If there is no source register specified in the ULW, the parser
+    // will interpret it as $0.
+    if (SrcReg != Mips::ZERO && SrcReg != Mips::ZERO_64)
+      createAddu(ATReg, ATReg, SrcReg, ABI.ArePtrs64bit(), Instructions);
+  }
+
+  unsigned FinalSrcReg = LoadedOffsetInAT ? ATReg : SrcReg;
+  int64_t LeftLoadOffset = 0, RightLoadOffset  = 0;
+  if (isLittle()) {
+    LeftLoadOffset = LoadedOffsetInAT ? 3 : (OffsetValue + 3);
+    RightLoadOffset  = LoadedOffsetInAT ? 0 : OffsetValue;
+  } else {
+    LeftLoadOffset = LoadedOffsetInAT ? 0 : OffsetValue;
+    RightLoadOffset  = LoadedOffsetInAT ? 3 : (OffsetValue + 3);
+  }
+
+  MCInst LeftLoadInst;
+  LeftLoadInst.setOpcode(Mips::LWL);
+  LeftLoadInst.addOperand(DstRegOp);
+  LeftLoadInst.addOperand(MCOperand::createReg(FinalSrcReg));
+  LeftLoadInst.addOperand(MCOperand::createImm(LeftLoadOffset));
+  Instructions.push_back(LeftLoadInst);
+
+  MCInst RightLoadInst;
+  RightLoadInst.setOpcode(Mips::LWR);
+  RightLoadInst.addOperand(DstRegOp);
+  RightLoadInst.addOperand(MCOperand::createReg(FinalSrcReg));
+  RightLoadInst.addOperand(MCOperand::createImm(RightLoadOffset ));
+  Instructions.push_back(RightLoadInst);
+
+  return false;
+}
+
 void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc,
                               SmallVectorImpl<MCInst> &Instructions) {
   MCInst NopInst;
@@ -2466,10 +2706,10 @@ void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc,
 }
 
 void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg,
-                               unsigned TrgReg,
+                               unsigned TrgReg, bool Is64Bit,
                                SmallVectorImpl<MCInst> &Instructions) {
   MCInst AdduInst;
-  AdduInst.setOpcode(Mips::ADDu);
+  AdduInst.setOpcode(Is64Bit ? Mips::DADDu : Mips::ADDu);
   AdduInst.addOperand(MCOperand::createReg(DstReg));
   AdduInst.addOperand(MCOperand::createReg(SrcReg));
   AdduInst.addOperand(MCOperand::createReg(TrgReg));
@@ -2972,9 +3212,12 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
   MCAsmParser &Parser = getParser();
   SMLoc S;
   bool Result = true;
+  unsigned NumOfLParen = 0;
 
-  while (getLexer().getKind() == AsmToken::LParen)
+  while (getLexer().getKind() == AsmToken::LParen) {
     Parser.Lex();
+    ++NumOfLParen;
+  }
 
   switch (getLexer().getKind()) {
   default:
@@ -2985,7 +3228,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
   case AsmToken::Minus:
   case AsmToken::Plus:
     if (isParenExpr)
-      Result = getParser().parseParenExpression(Res, S);
+      Result = getParser().parseParenExprOfDepth(NumOfLParen, Res, S);
     else
       Result = (getParser().parseExpression(Res));
     while (getLexer().getKind() == AsmToken::RParen)
@@ -3867,6 +4110,34 @@ bool MipsAsmParser::parseSetFpDirective() {
   return false;
 }
 
+bool MipsAsmParser::parseSetOddSPRegDirective() {
+  MCAsmParser &Parser = getParser();
+
+  Parser.Lex(); // Eat "oddspreg".
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+
+  clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+  getTargetStreamer().emitDirectiveSetOddSPReg();
+  return false;
+}
+
+bool MipsAsmParser::parseSetNoOddSPRegDirective() {
+  MCAsmParser &Parser = getParser();
+
+  Parser.Lex(); // Eat "nooddspreg".
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+
+  setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+  getTargetStreamer().emitDirectiveSetNoOddSPReg();
+  return false;
+}
+
 bool MipsAsmParser::parseSetPopDirective() {
   MCAsmParser &Parser = getParser();
   SMLoc Loc = getLexer().getLoc();
@@ -4229,6 +4500,10 @@ bool MipsAsmParser::parseDirectiveSet() {
     return parseSetArchDirective();
   } else if (Tok.getString() == "fp") {
     return parseSetFpDirective();
+  } else if (Tok.getString() == "oddspreg") {
+    return parseSetOddSPRegDirective();
+  } else if (Tok.getString() == "nooddspreg") {
+    return parseSetNoOddSPRegDirective();
   } else if (Tok.getString() == "pop") {
     return parseSetPopDirective();
   } else if (Tok.getString() == "push") {
@@ -4428,6 +4703,8 @@ bool MipsAsmParser::parseInsnDirective() {
 ///  ::= .module oddspreg
 ///  ::= .module nooddspreg
 ///  ::= .module fp=value
+///  ::= .module softfloat
+///  ::= .module hardfloat
 bool MipsAsmParser::parseDirectiveModule() {
   MCAsmParser &Parser = getParser();
   MCAsmLexer &Lexer = getLexer();
@@ -4446,8 +4723,16 @@ bool MipsAsmParser::parseDirectiveModule() {
   }
 
   if (Option == "oddspreg") {
-    getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32());
-    clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+    clearModuleFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+
+    // Synchronize the abiflags information with the FeatureBits information we
+    // changed above.
+    getTargetStreamer().updateABIInfo(*this);
+
+    // If printing assembly, use the recently updated abiflags information.
+    // If generating ELF, don't do anything (the .MIPS.abiflags section gets
+    // emitted at the end).
+    getTargetStreamer().emitDirectiveModuleOddSPReg();
 
     // If this is not the end of the statement, report an error.
     if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -4462,8 +4747,16 @@ bool MipsAsmParser::parseDirectiveModule() {
       return false;
     }
 
-    getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32());
-    setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+    setModuleFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+
+    // Synchronize the abiflags information with the FeatureBits information we
+    // changed above.
+    getTargetStreamer().updateABIInfo(*this);
+
+    // If printing assembly, use the recently updated abiflags information.
+    // If generating ELF, don't do anything (the .MIPS.abiflags section gets
+    // emitted at the end).
+    getTargetStreamer().emitDirectiveModuleOddSPReg();
 
     // If this is not the end of the statement, report an error.
     if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -4474,6 +4767,44 @@ bool MipsAsmParser::parseDirectiveModule() {
     return false; // parseDirectiveModule has finished successfully.
   } else if (Option == "fp") {
     return parseDirectiveModuleFP();
+  } else if (Option == "softfloat") {
+    setModuleFeatureBits(Mips::FeatureSoftFloat, "soft-float");
+
+    // Synchronize the ABI Flags information with the FeatureBits information we
+    // updated above.
+    getTargetStreamer().updateABIInfo(*this);
+
+    // If printing assembly, use the recently updated ABI Flags information.
+    // If generating ELF, don't do anything (the .MIPS.abiflags section gets
+    // emitted later).
+    getTargetStreamer().emitDirectiveModuleSoftFloat();
+
+    // If this is not the end of the statement, report an error.
+    if (getLexer().isNot(AsmToken::EndOfStatement)) {
+      reportParseError("unexpected token, expected end of statement");
+      return false;
+    }
+
+    return false; // parseDirectiveModule has finished successfully.
+  } else if (Option == "hardfloat") {
+    clearModuleFeatureBits(Mips::FeatureSoftFloat, "soft-float");
+
+    // Synchronize the ABI Flags information with the FeatureBits information we
+    // updated above.
+    getTargetStreamer().updateABIInfo(*this);
+
+    // If printing assembly, use the recently updated ABI Flags information.
+    // If generating ELF, don't do anything (the .MIPS.abiflags section gets
+    // emitted later).
+    getTargetStreamer().emitDirectiveModuleHardFloat();
+
+    // If this is not the end of the statement, report an error.
+    if (getLexer().isNot(AsmToken::EndOfStatement)) {
+      reportParseError("unexpected token, expected end of statement");
+      return false;
+    }
+
+    return false; // parseDirectiveModule has finished successfully.
   } else {
     return Error(L, "'" + Twine(Option) + "' is not a valid .module option.");
   }
@@ -4502,8 +4833,15 @@ bool MipsAsmParser::parseDirectiveModuleFP() {
     return false;
   }
 
-  // Emit appropriate flags.
-  getTargetStreamer().emitDirectiveModuleFP(FpABI, isABI_O32());
+  // Synchronize the abiflags information with the FeatureBits information we
+  // changed above.
+  getTargetStreamer().updateABIInfo(*this);
+
+  // If printing assembly, use the recently updated abiflags information.
+  // If generating ELF, don't do anything (the .MIPS.abiflags section gets
+  // emitted at the end).
+  getTargetStreamer().emitDirectiveModuleFP();
+
   Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
@@ -4512,6 +4850,7 @@ bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
                                     StringRef Directive) {
   MCAsmParser &Parser = getParser();
   MCAsmLexer &Lexer = getLexer();
+  bool ModuleLevelOptions = Directive == ".module";
 
   if (Lexer.is(AsmToken::Identifier)) {
     StringRef Value = Parser.getTok().getString();
@@ -4528,6 +4867,13 @@ bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
     }
 
     FpABI = MipsABIFlagsSection::FpABIKind::XX;
+    if (ModuleLevelOptions) {
+      setModuleFeatureBits(Mips::FeatureFPXX, "fpxx");
+      clearModuleFeatureBits(Mips::FeatureFP64Bit, "fp64");
+    } else {
+      setFeatureBits(Mips::FeatureFPXX, "fpxx");
+      clearFeatureBits(Mips::FeatureFP64Bit, "fp64");
+    }
     return true;
   }
 
@@ -4547,8 +4893,23 @@ bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
       }
 
       FpABI = MipsABIFlagsSection::FpABIKind::S32;
-    } else
+      if (ModuleLevelOptions) {
+        clearModuleFeatureBits(Mips::FeatureFPXX, "fpxx");
+        clearModuleFeatureBits(Mips::FeatureFP64Bit, "fp64");
+      } else {
+        clearFeatureBits(Mips::FeatureFPXX, "fpxx");
+        clearFeatureBits(Mips::FeatureFP64Bit, "fp64");
+      }
+    } else {
       FpABI = MipsABIFlagsSection::FpABIKind::S64;
+      if (ModuleLevelOptions) {
+        clearModuleFeatureBits(Mips::FeatureFPXX, "fpxx");
+        setModuleFeatureBits(Mips::FeatureFP64Bit, "fp64");
+      } else {
+        clearFeatureBits(Mips::FeatureFPXX, "fpxx");
+        setFeatureBits(Mips::FeatureFP64Bit, "fp64");
+      }
+    }
 
     return true;
   }
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index c8629b5..a34ba3b 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -178,6 +178,11 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
                                                uint64_t Address,
                                                const void *Decoder);
 
+static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst,
+                                            unsigned RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder);
+
 static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst,
                                             unsigned RegNo,
                                             uint64_t Address,
@@ -1564,6 +1569,18 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeCOP0RegisterClass(MCInst &Inst,
+                                            unsigned RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  unsigned Reg = getReg(Decoder, Mips::COP0RegClassID, RegNo);
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst,
                                             unsigned RegNo,
                                             uint64_t Address,
@@ -1855,6 +1872,6 @@ static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeSimm23Lsl2(MCInst &Inst, unsigned Insn,
                                      uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::createImm(SignExtend32<23>(Insn) << 2));
+  Inst.addOperand(MCOperand::createImm(SignExtend32<25>(Insn << 2)));
   return MCDisassembler::Success;
 }
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
index 725ea7f..70b9cca 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
@@ -66,4 +66,4 @@ MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) {
   OS.EmitIntValue(ABIFlagsSection.getFlags2Value(), 4);          // flags2
   return OS;
 }
-} // namespace llvm
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
index bf306ee..b078cd3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -186,6 +186,6 @@ public:
 };
 
 MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection);
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
index aa965e82a..40c5681 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
@@ -73,6 +73,6 @@ public:
 
   unsigned GetEhDataReg(unsigned I) const;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 5c746b2..328e717 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -59,10 +59,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   case Mips::fixup_MIPS_PCLO16:
     break;
   case Mips::fixup_Mips_PC16:
-    // So far we are only using this type for branches.
-    // For branches we start 1 instruction after the branch
-    // so the displacement will be one instruction size less.
-    Value -= 4;
     // The displacement is then divided by 4 to give us an 18 bit
     // address range. Forcing a signed division because Value can be negative.
     Value = (int64_t)Value / 4;
@@ -135,7 +131,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
       Ctx->reportFatalError(Fixup.getLoc(), "out of range PC18 fixup");
     break;
   case Mips::fixup_MIPS_PC21_S2:
-    Value -= 4;
     // Forcing a signed division because Value can be negative.
     Value = (int64_t) Value / 4;
     // We now check if Value can be encoded as a 21-bit signed immediate.
@@ -143,7 +138,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
       Ctx->reportFatalError(Fixup.getLoc(), "out of range PC21 fixup");
     break;
   case Mips::fixup_MIPS_PC26_S2:
-    Value -= 4;
     // Forcing a signed division because Value can be negative.
     Value = (int64_t) Value / 4;
     // We now check if Value can be encoded as a 26-bit signed immediate.
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index fe84e40..b3d5a49 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -87,6 +87,6 @@ public:
 
 }; // class MipsAsmBackend
 
-} // namespace llvm
+} // namespace
 
 #endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index a7d5a1e..ff7779e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -119,7 +119,7 @@ namespace MipsII {
 
     FormMask = 15
   };
-} // namespace MipsII
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index a45e2ad..9b29527 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -51,7 +51,7 @@ struct MipsRelocationEntry {
     virtual void sortRelocs(const MCAssembler &Asm,
                             std::vector<ELFRelocationEntry> &Relocs) override;
   };
-} // namespace
+}
 
 MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
                                          bool _isN64, bool IsLittleEndian)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 93925bf..e36263d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -226,8 +226,9 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
   assert(MO.isExpr() &&
          "getBranchTargetOpValue expects only expressions or immediates");
 
-  const MCExpr *Expr = MO.getExpr();
-  Fixups.push_back(MCFixup::create(0, Expr,
+  const MCExpr *FixupExpression = MCBinaryExpr::createAdd(
+      MO.getExpr(), MCConstantExpr::create(-4, Ctx), Ctx);
+  Fixups.push_back(MCFixup::create(0, FixupExpression,
                                    MCFixupKind(Mips::fixup_Mips_PC16)));
   return 0;
 }
@@ -315,8 +316,9 @@ getBranchTarget21OpValue(const MCInst &MI, unsigned OpNo,
   assert(MO.isExpr() &&
          "getBranchTarget21OpValue expects only expressions or immediates");
 
-  const MCExpr *Expr = MO.getExpr();
-  Fixups.push_back(MCFixup::create(0, Expr,
+  const MCExpr *FixupExpression = MCBinaryExpr::createAdd(
+      MO.getExpr(), MCConstantExpr::create(-4, Ctx), Ctx);
+  Fixups.push_back(MCFixup::create(0, FixupExpression,
                                    MCFixupKind(Mips::fixup_MIPS_PC21_S2)));
   return 0;
 }
@@ -337,8 +339,9 @@ getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo,
   assert(MO.isExpr() &&
          "getBranchTarget26OpValue expects only expressions or immediates");
 
-  const MCExpr *Expr = MO.getExpr();
-  Fixups.push_back(MCFixup::create(0, Expr,
+  const MCExpr *FixupExpression = MCBinaryExpr::createAdd(
+      MO.getExpr(), MCConstantExpr::create(-4, Ctx), Ctx);
+  Fixups.push_back(MCFixup::create(0, FixupExpression,
                                    MCFixupKind(Mips::fixup_MIPS_PC26_S2)));
   return 0;
 }
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index 81a0a98..687b800 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -25,6 +25,6 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg);
 MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
                                          raw_pwrite_stream &OS,
                                          MCCodeEmitter *Emitter, bool RelaxAll);
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 20358a0..4069d7d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -62,7 +62,7 @@ namespace MIPS_MC {
 StringRef selectMipsCPU(const Triple &TT, StringRef CPU);
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 // Defines symbolic names for Mips registers.  This defines a mapping from
 // register name to register number.
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 5378675..aef9bd3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -265,4 +265,4 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
   return S;
 }
 
-} // namespace llvm
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
index 4911632..24b6028 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -79,6 +79,9 @@ void MipsRegInfoRecord::SetPhysRegUsed(unsigned Reg,
     if (GPR32RegClass->contains(CurrentSubReg) ||
         GPR64RegClass->contains(CurrentSubReg))
       ri_gprmask |= Value;
+    else if (COP0RegClass->contains(CurrentSubReg))
+      ri_cprmask[0] |= Value;
+    // MIPS COP1 is the FPU.
     else if (FGR32RegClass->contains(CurrentSubReg) ||
              FGR64RegClass->contains(CurrentSubReg) ||
              AFGR64RegClass->contains(CurrentSubReg) ||
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index a051f4c..e4da2df 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -92,15 +92,23 @@ void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
 void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
                                               const MCSymbol &Sym, bool IsReg) {
 }
-void MipsTargetStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
-                                                     bool IsO32ABI) {
-  if (!Enabled && !IsO32ABI)
+
+void MipsTargetStreamer::emitDirectiveModuleFP() {}
+
+void MipsTargetStreamer::emitDirectiveModuleOddSPReg() {
+  if (!ABIFlagsSection.OddSPReg && !ABIFlagsSection.Is32BitABI)
     report_fatal_error("+nooddspreg is only valid for O32");
 }
+void MipsTargetStreamer::emitDirectiveModuleSoftFloat() {}
+void MipsTargetStreamer::emitDirectiveModuleHardFloat() {}
 void MipsTargetStreamer::emitDirectiveSetFp(
     MipsABIFlagsSection::FpABIKind Value) {
   forbidModuleDirective();
 }
+void MipsTargetStreamer::emitDirectiveSetOddSPReg() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetNoOddSPReg() {
+  forbidModuleDirective();
+}
 
 MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S,
                                              formatted_raw_ostream &OS)
@@ -369,12 +377,9 @@ void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
   forbidModuleDirective();
 }
 
-void MipsTargetAsmStreamer::emitDirectiveModuleFP(
-    MipsABIFlagsSection::FpABIKind Value, bool Is32BitABI) {
-  MipsTargetStreamer::emitDirectiveModuleFP(Value, Is32BitABI);
-
+void MipsTargetAsmStreamer::emitDirectiveModuleFP() {
   OS << "\t.module\tfp=";
-  OS << ABIFlagsSection.getFpABIString(Value) << "\n";
+  OS << ABIFlagsSection.getFpABIString(ABIFlagsSection.getFpABI()) << "\n";
 }
 
 void MipsTargetAsmStreamer::emitDirectiveSetFp(
@@ -385,11 +390,28 @@ void MipsTargetAsmStreamer::emitDirectiveSetFp(
   OS << ABIFlagsSection.getFpABIString(Value) << "\n";
 }
 
-void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
-                                                        bool IsO32ABI) {
-  MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI);
+void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg() {
+  MipsTargetStreamer::emitDirectiveModuleOddSPReg();
+
+  OS << "\t.module\t" << (ABIFlagsSection.OddSPReg ? "" : "no") << "oddspreg\n";
+}
 
-  OS << "\t.module\t" << (Enabled ? "" : "no") << "oddspreg\n";
+void MipsTargetAsmStreamer::emitDirectiveSetOddSPReg() {
+  MipsTargetStreamer::emitDirectiveSetOddSPReg();
+  OS << "\t.set\toddspreg\n";
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetNoOddSPReg() {
+  MipsTargetStreamer::emitDirectiveSetNoOddSPReg();
+  OS << "\t.set\tnooddspreg\n";
+}
+
+void MipsTargetAsmStreamer::emitDirectiveModuleSoftFloat() {
+  OS << "\t.module\tsoftfloat\n";
+}
+
+void MipsTargetAsmStreamer::emitDirectiveModuleHardFloat() {
+  OS << "\t.module\thardfloat\n";
 }
 
 // This part is for ELF object output.
@@ -800,10 +822,3 @@ void MipsTargetELFStreamer::emitMipsAbiFlags() {
 
   OS << ABIFlagsSection;
 }
-
-void MipsTargetELFStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
-                                                        bool IsO32ABI) {
-  MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI);
-
-  ABIFlagsSection.OddSPReg = Enabled;
-}
diff --git a/lib/Target/Mips/MicroMips32r6InstrFormats.td b/lib/Target/Mips/MicroMips32r6InstrFormats.td
index 78ba76d..187a022 100644
--- a/lib/Target/Mips/MicroMips32r6InstrFormats.td
+++ b/lib/Target/Mips/MicroMips32r6InstrFormats.td
@@ -240,3 +240,50 @@ class ERETNC_FM_MMR6<string instr_asm> : MMR6Arch<instr_asm> {
   let Inst{15-6}  = 0x3cd;
   let Inst{5-0}   = 0x3c;
 }
+
+class BREAK_MMR6_ENC<string instr_asm> : MMR6Arch<instr_asm> {
+  bits<10> code_1;
+  bits<10> code_2;
+  bits<32> Inst;
+  let Inst{31-26} = 0x0;
+  let Inst{25-16} = code_1;
+  let Inst{15-6}  = code_2;
+  let Inst{5-0}   = 0x07;
+}
+
+class BARRIER_MMR6_ENC<string instr_asm, bits<5> op> : MMR6Arch<instr_asm> {
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x0;
+  let Inst{25-21} = 0x0;
+  let Inst{20-16} = 0x0;
+  let Inst{15-11} = op;
+  let Inst{10-6}  = 0x0;
+  let Inst{5-0}   = 0x0;
+}
+
+class EIDI_MMR6_ENC<string instr_asm, bits<10> funct> : MMR6Arch<instr_asm> {
+  bits<32> Inst;
+  bits<5> rt; // Actually rs but we're sharing code with the standard encodings which call it rt
+
+  let Inst{31-26} = 0x00;
+  let Inst{25-21} = 0x00;
+  let Inst{20-16} = rt;
+  let Inst{15-6}  = funct;
+  let Inst{5-0}   = 0x3c;
+}
+
+class SHIFT_MMR6_ENC<string instr_asm, bits<10> funct, bit rotate> : MMR6Arch<instr_asm> {
+  bits<5> rd;
+  bits<5> rt;
+  bits<5> shamt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rd;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = shamt;
+  let Inst{10}    = rotate;
+  let Inst{9-0}   = funct;
+}
diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td
index ed71c3d..53bde13 100644
--- a/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -29,6 +29,7 @@ class AUI_MMR6_ENC : AUI_FM_MMR6;
 class BALC_MMR6_ENC  : BRANCH_OFF26_FM<0b101101>;
 class BC_MMR6_ENC : BRANCH_OFF26_FM<0b100101>;
 class BITSWAP_MMR6_ENC : POOL32A_BITSWAP_FM_MMR6<0b101100>;
+class BRK_MMR6_ENC : BREAK_MMR6_ENC<"break">;
 class BEQZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b011101>;
 class BNEZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b011111>;
 class BGTZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b111000>;
@@ -40,6 +41,8 @@ class CLO_MMR6_ENC : POOL32A_2R_FM_MMR6<0b0100101100>;
 class CLZ_MMR6_ENC : SPECIAL_2R_FM_MMR6<0b010000>;
 class DIV_MMR6_ENC : ARITH_FM_MMR6<"div", 0x118>;
 class DIVU_MMR6_ENC : ARITH_FM_MMR6<"divu", 0x198>;
+class EHB_MMR6_ENC : BARRIER_MMR6_ENC<"ehb", 0x3>;
+class EI_MMR6_ENC : EIDI_MMR6_ENC<"ei", 0x15d>;
 class ERET_MMR6_ENC : ERET_FM_MMR6<"eret">;
 class ERETNC_MMR6_ENC : ERETNC_FM_MMR6<"eretnc">;
 class JIALC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b100000>;
@@ -60,6 +63,7 @@ class SEB_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seb", 0b0010101100>;
 class SEH_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seh", 0b0011101100>;
 class SELEQZ_MMR6_ENC : POOL32A_FM_MMR6<0b0101000000>;
 class SELNEZ_MMR6_ENC : POOL32A_FM_MMR6<0b0110000000>;
+class SLL_MMR6_ENC : SHIFT_MMR6_ENC<"sll", 0x00, 0b0>;
 class SUB_MMR6_ENC : ARITH_FM_MMR6<"sub", 0x190>;
 class SUBU_MMR6_ENC : ARITH_FM_MMR6<"subu", 0x1d0>;
 class XOR_MMR6_ENC : ARITH_FM_MMR6<"xor", 0x310>;
@@ -144,6 +148,8 @@ class BITSWAP_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
 
 class BITSWAP_MMR6_DESC : BITSWAP_MMR6_DESC_BASE<"bitswap", GPR32Opnd>;
 
+class BRK_MMR6_DESC : BRK_FT<"break">;
+
 class CACHE_HINT_MMR6_DESC<string instr_asm, Operand MemOpnd,
                            RegisterOperand GPROpnd> : MMR6Arch<instr_asm> {
   dag OutOperandList = (outs);
@@ -166,6 +172,9 @@ class CLO_CLZ_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
 class CLO_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clo", GPR32Opnd>;
 class CLZ_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clz", GPR32Opnd>;
 
+class EHB_MMR6_DESC : Barrier<"ehb">;
+class EI_MMR6_DESC : DEI_FT<"ei", GPR32Opnd>;
+
 class ERET_MMR6_DESC : ER_FT<"eret">;
 class ERETNC_MMR6_DESC : ER_FT<"eretnc">;
 
@@ -255,6 +264,7 @@ class SELEQNE_Z_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
 
 class SELEQZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"seleqz", GPR32Opnd>;
 class SELNEZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"selnez", GPR32Opnd>;
+class SLL_MMR6_DESC : shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL>;
 class DIV_MMR6_DESC : ArithLogicR<"div", GPR32Opnd>;
 class DIVU_MMR6_DESC : ArithLogicR<"divu", GPR32Opnd>;
 class MOD_MMR6_DESC : ArithLogicR<"mod", GPR32Opnd>;
@@ -302,11 +312,14 @@ def BLTZALC_MMR6 : R6MMR6Rel, BLTZALC_MMR6_ENC, BLTZALC_MMR6_DESC,
                    ISA_MICROMIPS32R6;
 def BNEZALC_MMR6 : R6MMR6Rel, BNEZALC_MMR6_ENC, BNEZALC_MMR6_DESC,
                    ISA_MICROMIPS32R6;
+def BREAK_MMR6 : StdMMR6Rel, BRK_MMR6_DESC, BRK_MMR6_ENC, ISA_MICROMIPS32R6;
 def CACHE_MMR6 : R6MMR6Rel, CACHE_MMR6_ENC, CACHE_MMR6_DESC, ISA_MICROMIPS32R6;
 def CLO_MMR6 : R6MMR6Rel, CLO_MMR6_ENC, CLO_MMR6_DESC, ISA_MICROMIPS32R6;
 def CLZ_MMR6 : R6MMR6Rel, CLZ_MMR6_ENC, CLZ_MMR6_DESC, ISA_MICROMIPS32R6;
 def DIV_MMR6 : R6MMR6Rel, DIV_MMR6_DESC, DIV_MMR6_ENC, ISA_MICROMIPS32R6;
 def DIVU_MMR6 : R6MMR6Rel, DIVU_MMR6_DESC, DIVU_MMR6_ENC, ISA_MICROMIPS32R6;
+def EHB_MMR6 : StdMMR6Rel, EHB_MMR6_DESC, EHB_MMR6_ENC, ISA_MICROMIPS32R6;
+def EI_MMR6 : StdMMR6Rel, EI_MMR6_DESC, EI_MMR6_ENC, ISA_MICROMIPS32R6;
 def ERET_MMR6 : R6MMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6;
 def ERETNC_MMR6 : R6MMR6Rel, ERETNC_MMR6_DESC, ERETNC_MMR6_ENC,
                   ISA_MICROMIPS32R6;
@@ -330,8 +343,18 @@ def SELEQZ_MMR6 : R6MMR6Rel, SELEQZ_MMR6_ENC, SELEQZ_MMR6_DESC,
                   ISA_MICROMIPS32R6;
 def SELNEZ_MMR6 : R6MMR6Rel, SELNEZ_MMR6_ENC, SELNEZ_MMR6_DESC,
                   ISA_MICROMIPS32R6;
+def SLL_MMR6 : StdMMR6Rel, SLL_MMR6_DESC, SLL_MMR6_ENC, ISA_MICROMIPS32R6;
 def SUB_MMR6 : StdMMR6Rel, SUB_MMR6_DESC, SUB_MMR6_ENC, ISA_MICROMIPS32R6;
 def SUBU_MMR6 : StdMMR6Rel, SUBU_MMR6_DESC, SUBU_MMR6_ENC, ISA_MICROMIPS32R6;
 def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6;
 def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6;
 }
+
+//===----------------------------------------------------------------------===//
+//
+// MicroMips instruction aliases
+//
+//===----------------------------------------------------------------------===//
+
+def : MipsInstAlias<"ei", (EI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"nop", (SLL_MMR6 ZERO, ZERO, 0), 1>, ISA_MICROMIPS32R6;
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index 2aab739..3939384 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -934,3 +934,7 @@ class UncondBranchMMPseudo<string opstr> :
   def : MipsInstAlias<"nop", (SLL_MM ZERO, ZERO, 0), 1>;
   def : MipsInstAlias<"nop", (MOVE16_MM ZERO, ZERO), 1>;
 }
+
+let Predicates = [InMicroMips] in {
+def : MipsInstAlias<"ei", (EI_MM ZERO), 1>, ISA_MIPS32R2;
+}
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 604b670..671d7a8 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -31,6 +31,6 @@ namespace llvm {
   FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
   FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
   FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
-} // namespace llvm
+} // end namespace llvm;
 
 #endif
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 2c33cfb..f281c92 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -42,6 +42,6 @@ public:
                                             RegScavenger *RS) const override;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index f2831fd..893fc7c 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -62,7 +62,7 @@ namespace {
   };
 
   char Mips16HardFloat::ID = 0;
-} // namespace
+}
 
 //
 // Return types that matter for hard float are:
diff --git a/lib/Target/Mips/Mips16HardFloatInfo.cpp b/lib/Target/Mips/Mips16HardFloatInfo.cpp
index bf82108..2eb6e5d 100644
--- a/lib/Target/Mips/Mips16HardFloatInfo.cpp
+++ b/lib/Target/Mips/Mips16HardFloatInfo.cpp
@@ -46,5 +46,5 @@ extern FuncSignature const *findFuncSignature(const char *name) {
   }
   return nullptr;
 }
-} // namespace Mips16HardFloatInfo
-} // namespace llvm
+}
+}
diff --git a/lib/Target/Mips/Mips16HardFloatInfo.h b/lib/Target/Mips/Mips16HardFloatInfo.h
index 8354c33..7295c28 100644
--- a/lib/Target/Mips/Mips16HardFloatInfo.h
+++ b/lib/Target/Mips/Mips16HardFloatInfo.h
@@ -44,7 +44,7 @@ struct FuncNameSignature {
 extern const FuncNameSignature PredefinedFuncs[];
 
 extern FuncSignature const *findFuncSignature(const char *name);
-} // namespace Mips16HardFloatInfo
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
index ce6b3f8..ae0e61e 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.h
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -48,6 +48,6 @@ private:
 
 FunctionPass *createMips16ISelDag(MipsTargetMachine &TM);
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index c52ef2a..846e3c9 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -54,7 +54,7 @@ struct Mips16IntrinsicHelperType{
     return std::strcmp(Name, RHS.Name) == 0;
   }
 };
-} // namespace
+}
 
 // Libcalls for which no helper is generated. Sorted by name for binary search.
 static const Mips16Libcall HardFloatLibCalls[] = {
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
index 99d3cac..d3b9f75 100644
--- a/lib/Target/Mips/Mips16ISelLowering.h
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -77,6 +77,6 @@ namespace llvm {
       unsigned SltiOpc, unsigned SltiXOpc,
       MachineInstr *MI,  MachineBasicBlock *BB )const;
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index 1132d8a..6540b40 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -123,6 +123,6 @@ private:
 
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 83781ff..c37cf95 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -427,10 +427,10 @@ def DMTC2_OCTEON : MFC2OP<"dmtc2", GPR64Opnd>, MFC2OP_FM<0x12, 5>;
 
 /// Move between CPU and coprocessor registers
 let DecoderNamespace = "Mips64", Predicates = [HasMips64] in {
-def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>;
-def DMTC0 : MFC3OP<"dmtc0", GPR64Opnd>, MFC3OP_FM<0x10, 5>, ISA_MIPS3;
-def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd>, MFC3OP_FM<0x12, 1>, ISA_MIPS3;
-def DMTC2 : MFC3OP<"dmtc2", GPR64Opnd>, MFC3OP_FM<0x12, 5>, ISA_MIPS3;
+def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd, COP0Opnd>, MFC3OP_FM<0x10, 1>, ISA_MIPS3;
+def DMTC0 : MTC3OP<"dmtc0", COP0Opnd, GPR64Opnd>, MFC3OP_FM<0x10, 5>, ISA_MIPS3;
+def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd, COP2Opnd>, MFC3OP_FM<0x12, 1>, ISA_MIPS3;
+def DMTC2 : MTC3OP<"dmtc2", COP2Opnd, GPR64Opnd>, MFC3OP_FM<0x12, 5>, ISA_MIPS3;
 }
 
 //===----------------------------------------------------------------------===//
@@ -613,10 +613,10 @@ def : MipsInstAlias<"dsrl $rd, $rt, $rs",
                     ISA_MIPS3;
 
 // Two operand (implicit 0 selector) versions:
-def : MipsInstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
-def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
-def : MipsInstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
-def : MipsInstAlias<"dmtc2 $rt, $rd", (DMTC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, COP0Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 COP0Opnd:$rd, GPR64Opnd:$rt, 0), 0>;
+def : MipsInstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, COP2Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"dmtc2 $rt, $rd", (DMTC2 COP2Opnd:$rd, GPR64Opnd:$rt, 0), 0>;
 
 let Predicates = [HasMips64, HasCnMips] in {
 def : MipsInstAlias<"synciobdma", (SYNC 0x2), 0>;
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h
index 6b5d02b..ae3c38c 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.h
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -58,6 +58,6 @@ namespace llvm {
     unsigned ADDiu, ORi, SLL, LUi;
     InstSeq Insts;
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 1c80021..fdba064 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -747,8 +747,7 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   // accept it. We therefore emit it when it contradicts the default or an
   // option has changed the default (i.e. FPXX) and omit it otherwise.
   if (ABI.IsO32() && (!STI.useOddSPReg() || STI.isABI_FPXX()))
-    getTargetStreamer().emitDirectiveModuleOddSPReg(STI.useOddSPReg(),
-                                                    ABI.IsO32());
+    getTargetStreamer().emitDirectiveModuleOddSPReg();
 }
 
 void MipsAsmPrinter::emitInlineAsmStart() const {
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 3c2b843..a7f3304 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -145,7 +145,7 @@ public:
   void EmitEndOfAsmFile(Module &M) override;
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
 };
-} // namespace llvm
+}
 
 #endif
 
diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h
index 04a9ef5..081c393 100644
--- a/lib/Target/Mips/MipsCCState.h
+++ b/lib/Target/Mips/MipsCCState.h
@@ -131,6 +131,6 @@ public:
   bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
   SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 3d020ab..c2651b8 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -16,6 +16,7 @@
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
@@ -143,7 +144,7 @@ private:
   unsigned materializeGV(const GlobalValue *GV, MVT VT);
   unsigned materializeInt(const Constant *C, MVT VT);
   unsigned materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
-  unsigned materializeExternalCallSym(const char *SynName);
+  unsigned materializeExternalCallSym(MCSymbol *Syn);
 
   MachineInstrBuilder emitInst(unsigned Opc) {
     return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
@@ -369,12 +370,12 @@ unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) {
   return DestReg;
 }
 
-unsigned MipsFastISel::materializeExternalCallSym(const char *SymName) {
+unsigned MipsFastISel::materializeExternalCallSym(MCSymbol *Sym) {
   const TargetRegisterClass *RC = &Mips::GPR32RegClass;
   unsigned DestReg = createResultReg(RC);
   emitInst(Mips::LW, DestReg)
       .addReg(MFI->getGlobalBaseReg())
-      .addExternalSymbol(SymName, MipsII::MO_GOT);
+      .addSym(Sym, MipsII::MO_GOT);
   return DestReg;
 }
 
@@ -1234,7 +1235,7 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
   bool IsTailCall = CLI.IsTailCall;
   bool IsVarArg = CLI.IsVarArg;
   const Value *Callee = CLI.Callee;
-  const char *SymName = CLI.SymName;
+  MCSymbol *Symbol = CLI.Symbol;
 
   // Allow SelectionDAG isel to handle tail calls.
   if (IsTailCall)
@@ -1286,8 +1287,8 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
 
   // Issue the call.
   unsigned DestAddress;
-  if (SymName)
-    DestAddress = materializeExternalCallSym(SymName);
+  if (Symbol)
+    DestAddress = materializeExternalCallSym(Symbol);
   else
     DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32);
   emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress);
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index dab9c05..5eabd58 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -49,6 +49,6 @@ protected:
 const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST);
 const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index 83be74f..1426d0f 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -129,6 +129,6 @@ private:
                                     unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index e4f3cde..bc9a1ce 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -204,7 +204,7 @@ namespace llvm {
       SDL,
       SDR
     };
-  } // namespace MipsISD
+  }
 
   //===--------------------------------------------------------------------===//
   // TargetLowering Implementation
@@ -566,6 +566,6 @@ namespace llvm {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                              const TargetLibraryInfo *libInfo);
   }
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 3daff5f..08efc35 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -146,6 +146,6 @@ private:
 const MipsInstrInfo *createMips16InstrInfo(const MipsSubtarget &STI);
 const MipsInstrInfo *createMipsSEInstrInfo(const MipsSubtarget &STI);
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 2a7949e..ab98c90 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1050,8 +1050,12 @@ class SCBase<string opstr, RegisterOperand RO> :
   let Constraints = "$rt = $dst";
 }
 
-class MFC3OP<string asmstr, RegisterOperand RO> :
-  InstSE<(outs RO:$rt, RO:$rd, uimm16:$sel), (ins),
+class MFC3OP<string asmstr, RegisterOperand RO, RegisterOperand RD> :
+  InstSE<(outs RO:$rt), (ins RD:$rd, uimm16:$sel),
+         !strconcat(asmstr, "\t$rt, $rd, $sel"), [], NoItinerary, FrmFR>;
+
+class MTC3OP<string asmstr, RegisterOperand RO, RegisterOperand RD> :
+  InstSE<(outs RO:$rd), (ins RD:$rt, uimm16:$sel),
          !strconcat(asmstr, "\t$rt, $rd, $sel"), [], NoItinerary, FrmFR>;
 
 class TrapBase<Instruction RealInst>
@@ -1278,7 +1282,9 @@ def TTLTIU : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM<0xb>,
 def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>,
            ISA_MIPS2_NOT_32R6_64R6;
 
-def BREAK : MMRel, BRK_FT<"break">, BRK_FM<0xd>;
+let AdditionalPredicates = [NotInMicroMips] in {
+def BREAK : MMRel, StdMMR6Rel, BRK_FT<"break">, BRK_FM<0xd>;
+}
 def SYSCALL : MMRel, SYS_FT<"syscall">, SYS_FM<0xc>;
 def TRAP : TrapBase<BREAK>;
 def SDBBP : MMRel, SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6;
@@ -1288,7 +1294,9 @@ def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32;
 }
 def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32;
 
-def EI : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2;
+let AdditionalPredicates = [NotInMicroMips] in {
+def EI : MMRel, StdMMR6Rel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2;
+}
 def DI : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>, ISA_MIPS32R2;
 
 let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug
@@ -1484,10 +1492,10 @@ def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>;
 def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>;
 
 /// Move Control Registers From/To CPU Registers
-def MFC0 : MFC3OP<"mfc0", GPR32Opnd>, MFC3OP_FM<0x10, 0>, ISA_MIPS32;
-def MTC0 : MFC3OP<"mtc0", GPR32Opnd>, MFC3OP_FM<0x10, 4>, ISA_MIPS32;
-def MFC2 : MFC3OP<"mfc2", GPR32Opnd>, MFC3OP_FM<0x12, 0>;
-def MTC2 : MFC3OP<"mtc2", GPR32Opnd>, MFC3OP_FM<0x12, 4>;
+def MFC0 : MFC3OP<"mfc0", GPR32Opnd, COP0Opnd>, MFC3OP_FM<0x10, 0>, ISA_MIPS32;
+def MTC0 : MTC3OP<"mtc0", COP0Opnd, GPR32Opnd>, MFC3OP_FM<0x10, 4>, ISA_MIPS32;
+def MFC2 : MFC3OP<"mfc2", GPR32Opnd, COP2Opnd>, MFC3OP_FM<0x12, 0>;
+def MTC2 : MTC3OP<"mtc2", COP2Opnd, GPR32Opnd>, MFC3OP_FM<0x12, 4>;
 
 class Barrier<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary,
                                       FrmOther, asmstr>;
@@ -1603,11 +1611,13 @@ def : MipsInstAlias<"or $rs, $rt, $imm",
                     (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
 def : MipsInstAlias<"or $rs, $imm",
                     (ORi GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
+let AdditionalPredicates = [NotInMicroMips] in {
 def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
-def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : MipsInstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : MipsInstAlias<"mtc2 $rt, $rd", (MTC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+}
+def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, COP0Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 COP0Opnd:$rd, GPR32Opnd:$rt, 0), 0>;
+def : MipsInstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, COP2Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"mtc2 $rt, $rd", (MTC2 COP2Opnd:$rd, GPR32Opnd:$rt, 0), 0>;
 let AdditionalPredicates = [NotInMicroMips] in {
 def : MipsInstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>;
 }
@@ -1623,7 +1633,9 @@ def : MipsInstAlias<"syscall", (SYSCALL 0), 1>;
     
 def : MipsInstAlias<"break", (BREAK 0, 0), 1>;
 def : MipsInstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>;
+let AdditionalPredicates = [NotInMicroMips] in {
 def : MipsInstAlias<"ei", (EI ZERO), 1>, ISA_MIPS32R2;
+}
 def : MipsInstAlias<"di", (DI ZERO), 1>, ISA_MIPS32R2;
 
 def : MipsInstAlias<"teq $rs, $rt",
@@ -1707,6 +1719,12 @@ def BLEU : CondBranchPseudo<"bleu">;
 def BGEU : CondBranchPseudo<"bgeu">;
 def BGTU : CondBranchPseudo<"bgtu">;
 
+def Ulhu : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
+                             "ulhu\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6;
+
+def Ulw : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
+                            "ulw\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6;
+
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 6b2a44d..80d9b75 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -88,6 +88,11 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
     Offset += MO.getOffset();
     break;
 
+  case MachineOperand::MO_MCSymbol:
+    Symbol = MO.getMCSymbol();
+    Offset += MO.getOffset();
+    break;
+
   case MachineOperand::MO_JumpTableIndex:
     Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
     break;
@@ -141,6 +146,7 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO,
   case MachineOperand::MO_MachineBasicBlock:
   case MachineOperand::MO_GlobalAddress:
   case MachineOperand::MO_ExternalSymbol:
+  case MachineOperand::MO_MCSymbol:
   case MachineOperand::MO_JumpTableIndex:
   case MachineOperand::MO_ConstantPoolIndex:
   case MachineOperand::MO_BlockAddress:
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index a8bd1cd..1ce27e4 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -45,6 +45,6 @@ private:
                             MCSymbolRefExpr::VariantKind Kind) const;
   bool lowerLongBranch(const MachineInstr *MI, MCInst &OutMI) const;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
index 8568137..b18a673 100644
--- a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -37,7 +37,7 @@ namespace {
   };
 
   char MipsModuleDAGToDAGISel::ID = 0;
-} // namespace
+}
 
 bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n");
diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h
index 746feab..23f0b70 100644
--- a/lib/Target/Mips/MipsOptionRecord.h
+++ b/lib/Target/Mips/MipsOptionRecord.h
@@ -49,6 +49,7 @@ public:
     FGR64RegClass = &(TRI->getRegClass(Mips::FGR64RegClassID));
     AFGR64RegClass = &(TRI->getRegClass(Mips::AFGR64RegClassID));
     MSA128BRegClass = &(TRI->getRegClass(Mips::MSA128BRegClassID));
+    COP0RegClass = &(TRI->getRegClass(Mips::COP0RegClassID));
     COP2RegClass = &(TRI->getRegClass(Mips::COP2RegClassID));
     COP3RegClass = &(TRI->getRegClass(Mips::COP3RegClassID));
   }
@@ -66,6 +67,7 @@ private:
   const MCRegisterClass *FGR64RegClass;
   const MCRegisterClass *AFGR64RegClass;
   const MCRegisterClass *MSA128BRegClass;
+  const MCRegisterClass *COP0RegClass;
   const MCRegisterClass *COP2RegClass;
   const MCRegisterClass *COP3RegClass;
   uint32_t ri_gprmask;
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index 5c71272..b6cd791 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -43,7 +43,7 @@ namespace {
   };
 
   char MipsOs16::ID = 0;
-} // namespace
+}
 
 // Figure out if we need float point based on the function signature.
 // We need to move variables in and/or out of floating point
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 7497a25..02bcac5 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -201,6 +201,10 @@ let Namespace = "Mips" in {
   foreach I = 0-7 in
   def FCC#I : MipsReg<#I, "fcc"#I>;
 
+  // COP0 registers.
+  foreach I = 0-31 in
+  def COP0#I : MipsReg<#I, ""#I>;
+
   // COP2 registers.
   foreach I = 0-31 in
   def COP2#I : MipsReg<#I, ""#I>;
@@ -431,6 +435,10 @@ def ACC64DSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
 
 def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>;
 
+// Coprocessor 0 registers.
+def COP0 : RegisterClass<"Mips", [i32], 32, (sequence "COP0%u", 0, 31)>,
+           Unallocatable;
+
 // Coprocessor 2 registers.
 def COP2 : RegisterClass<"Mips", [i32], 32, (sequence "COP2%u", 0, 31)>,
            Unallocatable;
@@ -559,6 +567,10 @@ def HWRegsAsmOperand : MipsAsmRegOperand {
   let Name = "HWRegsAsmReg";
 }
 
+def COP0AsmOperand : MipsAsmRegOperand {
+  let Name = "COP0AsmReg";
+}
+
 def COP2AsmOperand : MipsAsmRegOperand {
   let Name = "COP2AsmReg";
 }
@@ -609,6 +621,10 @@ def ACC64DSPOpnd : RegisterOperand<ACC64DSP> {
   let ParserMatchClass = ACC64DSPAsmOperand;
 }
 
+def COP0Opnd : RegisterOperand<COP0> {
+  let ParserMatchClass = COP0AsmOperand;
+}
+
 def COP2Opnd : RegisterOperand<COP2> {
   let ParserMatchClass = COP2AsmOperand;
 }
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index a858f30..ec7bf31 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -75,7 +75,7 @@ private:
   const MipsSEInstrInfo &TII;
   const MipsRegisterInfo &RegInfo;
 };
-} // namespace
+}
 
 ExpandPseudo::ExpandPseudo(MachineFunction &MF_)
     : MF(MF_), MRI(MF.getRegInfo()),
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index ee56b8b..2fcd6bb 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -39,6 +39,6 @@ public:
   unsigned ehDataReg(unsigned I) const;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index fb2f041..a894034 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -126,6 +126,6 @@ private:
 
 FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM);
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index 623630a..d44f8d8 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -112,6 +112,6 @@ namespace llvm {
     MachineBasicBlock *emitFEXP2_D_1(MachineInstr *MI,
                                      MachineBasicBlock *BB) const;
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index cdafe9f..bebbabf 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -113,6 +113,6 @@ private:
                       MachineBasicBlock::iterator I) const;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h
index feddf98..061423f 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.h
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -26,6 +26,6 @@ public:
   ~MipsSelectionDAGInfo();
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index c8a2e4b..5f92968 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -292,6 +292,6 @@ public:
     return &InstrItins;
   }
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 976970c..38b2ecf 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -90,6 +90,6 @@ public:
                       CodeGenOpt::Level OL);
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 39cadc1..6ce1be7 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -80,22 +80,15 @@ public:
   virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
                                     const MCSymbol &Sym, bool IsReg);
 
-  /// Emit a '.module fp=value' directive using the given values.
-  /// Updates the .MIPS.abiflags section
-  virtual void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value,
-                                     bool Is32BitABI) {
-    ABIFlagsSection.setFpABI(Value, Is32BitABI);
-  }
-
-  /// Emit a '.module fp=value' directive using the current values of the
-  /// .MIPS.abiflags section.
-  void emitDirectiveModuleFP() {
-    emitDirectiveModuleFP(ABIFlagsSection.getFpABI(),
-                          ABIFlagsSection.Is32BitABI);
-  }
-
-  virtual void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI);
+  // FP abiflags directives
+  virtual void emitDirectiveModuleFP();
+  virtual void emitDirectiveModuleOddSPReg();
+  virtual void emitDirectiveModuleSoftFloat();
+  virtual void emitDirectiveModuleHardFloat();
   virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value);
+  virtual void emitDirectiveSetOddSPReg();
+  virtual void emitDirectiveSetNoOddSPReg();
+
   void forbidModuleDirective() { ModuleDirectiveAllowed = false; }
   void reallowModuleDirective() { ModuleDirectiveAllowed = true; }
   bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; }
@@ -198,11 +191,14 @@ public:
   void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
                             const MCSymbol &Sym, bool IsReg) override;
 
-  // ABI Flags
-  void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value,
-                             bool Is32BitABI) override;
-  void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
+  // FP abiflags directives
+  void emitDirectiveModuleFP() override;
+  void emitDirectiveModuleOddSPReg() override;
+  void emitDirectiveModuleSoftFloat() override;
+  void emitDirectiveModuleHardFloat() override;
   void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override;
+  void emitDirectiveSetOddSPReg() override;
+  void emitDirectiveSetNoOddSPReg() override;
 };
 
 // This part is for ELF object output
@@ -244,9 +240,7 @@ public:
   void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
                             const MCSymbol &Sym, bool IsReg) override;
 
-  // ABI Flags
-  void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
   void emitMipsAbiFlags();
 };
-} // namespace llvm
+}
 #endif
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 99e950e..05fe06d 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -22,6 +22,7 @@ set(NVPTXCodeGen_sources
   NVPTXLowerAggrCopies.cpp
   NVPTXLowerKernelArgs.cpp
   NVPTXLowerAlloca.cpp
+  NVPTXPeephole.cpp
   NVPTXMCExpr.cpp
   NVPTXPrologEpilogPass.cpp
   NVPTXRegisterInfo.cpp
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 8144f3f..02c5a21 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -49,6 +49,6 @@ public:
                        raw_ostream &O, const char *Modifier = nullptr);
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index b55664e..a72ae2e 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -94,7 +94,7 @@ enum {
   IsSurfTexQueryFlag = 0x800,
   IsTexModeUnifiedFlag = 0x1000
 };
-} // namespace NVPTXII
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 8a28b089..221d2f0 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -54,7 +54,10 @@ createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
 static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
     StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
-  X->initMCCodeGenInfo(RM, CM, OL);
+
+  // The default relocation model is used regardless of what the client has
+  // specified, as it is the only relocation model currently supported.
+  X->initMCCodeGenInfo(Reloc::Default, CM, OL);
   return X;
 }
 
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
index 1480b61..a2d670f 100644
--- a/lib/Target/NVPTX/ManagedStringPool.h
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -43,6 +43,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index d06d61f..fe28214 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -71,6 +71,7 @@ MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
 FunctionPass *createNVPTXImageOptimizerPass();
 FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM);
 BasicBlockPass *createNVPTXLowerAllocaPass();
+MachineFunctionPass *createNVPTXPeephole();
 
 bool isImageOrSamplerVal(const Value *, const Module *);
 
@@ -133,7 +134,7 @@ enum VecType {
   V2 = 2,
   V4 = 4
 };
-} // namespace PTXLdStInstCode
+}
 
 /// PTXCvtMode - Conversion code enumeration
 namespace PTXCvtMode {
@@ -152,7 +153,7 @@ enum CvtMode {
   FTZ_FLAG = 0x10,
   SAT_FLAG = 0x20
 };
-} // namespace PTXCvtMode
+}
 
 /// PTXCmpMode - Comparison mode enumeration
 namespace PTXCmpMode {
@@ -180,9 +181,9 @@ enum CmpMode {
   BASE_MASK = 0xFF,
   FTZ_FLAG = 0x100
 };
-} // namespace PTXCmpMode
-} // namespace NVPTX
-} // namespace llvm
+}
+}
+} // end namespace llvm;
 
 // Defines symbolic names for NVPTX registers.  This defines a mapping from
 // register name to register number.
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 1a1a8ca..cadd7a4 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -109,7 +109,7 @@ void VisitGlobalVariableForEmission(
   Visited.insert(GV);
   Visiting.erase(GV);
 }
-} // namespace
+}
 
 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
   if (!EmitLineNumbers)
@@ -826,7 +826,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
       .Initialize(OutContext, TM);
 
-  Mang = new Mangler(TM.getDataLayout());
+  Mang = new Mangler();
 
   // Emit header before any dwarf directives are emitted below.
   emitHeader(M, OS1, STI);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 12d80a3..f6f7685 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -349,6 +349,6 @@ public:
   DebugLoc prevDebugLoc;
   void emitLineNumberAsDotLoc(const MachineInstr &);
 };
-} // namespace llvm
+} // end of namespace
 
 #endif
diff --git a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index 2d5e74c..7d4be8e 100644
--- a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -38,7 +38,7 @@ public:
   /// \brief Clean up the name to remove symbols invalid in PTX.
   std::string cleanUpName(StringRef Name);
 };
-} // namespace
+}
 
 char NVPTXAssignValidGlobalNames::ID = 0;
 
diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index 3eb7024..69a229e 100644
--- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -107,7 +107,7 @@ private:
   /// Helper function for bitcasts.
   Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth);
 };
-} // namespace
+}
 
 char NVPTXFavorNonGenericAddrSpaces::ID = 0;
 
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 5503494..9b34aef 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -35,35 +35,33 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
                                       MachineBasicBlock &MBB) const {
   if (MF.getFrameInfo()->hasStackObjects()) {
     assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
-    // Insert "mov.u32 %SP, %Depot"
-    MachineBasicBlock::iterator MBBI = MBB.begin();
+    MachineInstr *MI = MBB.begin();
+    MachineRegisterInfo &MR = MF.getRegInfo();
+
     // This instruction really occurs before first instruction
     // in the BB, so giving it no debug location.
     DebugLoc dl = DebugLoc();
 
-    MachineRegisterInfo &MRI = MF.getRegInfo();
-
-    // mov %SPL, %depot;
-    // cvta.local %SP, %SPL;
-    if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
-      unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass);
-      MachineInstr *MI =
-          BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get(
-                                     NVPTX::cvta_local_yes_64),
-                  NVPTX::VRFrame).addReg(LocalReg);
-      BuildMI(MBB, MI, dl,
-              MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
-              LocalReg).addImm(MF.getFunctionNumber());
-    } else {
-      unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass);
-      MachineInstr *MI =
-          BuildMI(MBB, MBBI, dl,
-                  MF.getSubtarget().getInstrInfo()->get(NVPTX::cvta_local_yes),
-                  NVPTX::VRFrame).addReg(LocalReg);
-      BuildMI(MBB, MI, dl,
-              MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
-              LocalReg).addImm(MF.getFunctionNumber());
+    // Emits
+    //   mov %SPL, %depot;
+    //   cvta.local %SP, %SPL;
+    // for local address accesses in MF.
+    bool Is64Bit =
+        static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit();
+    unsigned CvtaLocalOpcode =
+        (Is64Bit ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes);
+    unsigned MovDepotOpcode =
+        (Is64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR);
+    if (!MR.use_empty(NVPTX::VRFrame)) {
+      // If %SP is not used, do not bother emitting "cvta.local %SP, %SPL".
+      MI = BuildMI(MBB, MI, dl,
+                   MF.getSubtarget().getInstrInfo()->get(CvtaLocalOpcode),
+                   NVPTX::VRFrame)
+               .addReg(NVPTX::VRFrameLocal);
     }
+    BuildMI(MBB, MI, dl, MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
+            NVPTX::VRFrameLocal)
+        .addImm(MF.getFunctionNumber());
   }
 }
 
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 488edec..14f8bb7 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -31,6 +31,6 @@ public:
                                 MachineBasicBlock::iterator I) const override;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 5879df3..fe20580 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -95,6 +95,6 @@ private:
   bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
 
 };
-} // namespace
+}
 
 #endif
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b5af72a..09e0bd5 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -206,7 +206,14 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
   // Turn FP truncstore into trunc + store.
+  // FIXME: vector types should also be expanded
   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 276851f..ed94775 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -427,7 +427,7 @@ enum NodeType : unsigned {
   Suld3DV4I16Zero,
   Suld3DV4I32Zero
 };
-} // namespace NVPTXISD
+}
 
 class NVPTXSubtarget;
 
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index c86f861..aa36b6b 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -42,7 +42,7 @@ private:
   Value *cleanupValue(Value *V);
   void replaceWith(Instruction *From, ConstantInt *To);
 };
-} // namespace
+}
 
 char NVPTXImageOptimizer::ID = 0;
 
diff --git a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
index 24dcb12..b533f31 100644
--- a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
@@ -132,6 +132,10 @@ void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) {
   assert(!Arg->hasByValAttr() &&
          "byval params should be handled by handleByValParam");
 
+  // Do nothing if the argument already points to the global address space.
+  if (Arg->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
+    return;
+
   Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin();
   Instruction *ArgInGlobal = new AddrSpaceCastInst(
       Arg, PointerType::get(Arg->getType()->getPointerElementType(),
diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 4b9322c..10f1135 100644
--- a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -46,6 +46,6 @@ public:
     return ImageHandleList[Idx].c_str();
   }
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp
new file mode 100644
index 0000000..a61c291
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -0,0 +1,154 @@
+//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
+// of a MachineFunction.
+//
+//   mov %SPL, %depot
+//   cvta.local %SP, %SPL
+//
+// Because Frame Index is a generic address and alloca can only return generic
+// pointer, without this pass the instructions producing alloca'ed address will
+// be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
+// this address with their .local versions, but this may introduce a lot of
+// cvta.to.local instructions. Performance can be improved if we avoid casting
+// address back and forth and directly calculate local address based on %SPL.
+// This peephole pass optimizes these cases, for example
+//
+// It will transform the following pattern
+//    %vreg0<def> = LEA_ADDRi64 %VRFrame, 4
+//    %vreg1<def> = cvta_to_local_yes_64 %vreg0
+//
+// into
+//    %vreg1<def> = LEA_ADDRi64 %VRFrameLocal, 4
+//
+// %VRFrameLocal is the virtual register name of %SPL
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nvptx-peephole"
+
+namespace llvm {
+void initializeNVPTXPeepholePass(PassRegistry &);
+}
+
+namespace {
+struct NVPTXPeephole : public MachineFunctionPass {
+ public:
+  static char ID;
+  NVPTXPeephole() : MachineFunctionPass(ID) {
+    initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "NVPTX optimize redundant cvta.to.local instruction";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+}
+
+char NVPTXPeephole::ID = 0;
+
+INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
+
+static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
+  auto &MBB = *Root.getParent();
+  auto &MF = *MBB.getParent();
+  // Check current instruction is cvta.to.local
+  if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
+      Root.getOpcode() != NVPTX::cvta_to_local_yes)
+    return false;
+
+  auto &Op = Root.getOperand(1);
+  const auto &MRI = MF.getRegInfo();
+  MachineInstr *GenericAddrDef = nullptr;
+  if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
+    GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
+  }
+
+  // Check the register operand is uniquely defined by LEA_ADDRi instruction
+  if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
+      (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
+       GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
+    return false;
+  }
+
+  // Check the LEA_ADDRi operand is Frame index
+  auto &BaseAddrOp = GenericAddrDef->getOperand(1);
+  if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
+    return true;
+  }
+
+  return false;
+}
+
+static void CombineCVTAToLocal(MachineInstr &Root) {
+  auto &MBB = *Root.getParent();
+  auto &MF = *MBB.getParent();
+  const auto &MRI = MF.getRegInfo();
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+
+  MachineInstrBuilder MIB =
+      BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
+              Root.getOperand(0).getReg())
+          .addReg(NVPTX::VRFrameLocal)
+          .addOperand(Prev.getOperand(2));
+
+  MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
+
+  // Check if MRI has only one non dbg use, which is Root
+  if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
+    Prev.eraseFromParentAndMarkDBGValuesForRemoval();
+  }
+  Root.eraseFromParentAndMarkDBGValuesForRemoval();
+}
+
+bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+  // Loop over all of the basic blocks.
+  for (auto &MBB : MF) {
+    // Traverse the basic block.
+    auto BlockIter = MBB.begin();
+
+    while (BlockIter != MBB.end()) {
+      auto &MI = *BlockIter++;
+      if (isCVTAToLocalCombinationCandidate(MI)) {
+        CombineCVTAToLocal(MI);
+        Changed = true;
+      }
+    }  // Instruction
+  }    // Basic Block
+
+  // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
+  const auto &MRI = MF.getRegInfo();
+  if (MRI.use_empty(NVPTX::VRFrame)) {
+    if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
+      MI->eraseFromParentAndMarkDBGValuesForRemoval();
+    }
+  }
+
+  return Changed;
+}
+
+MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index ea58f77..5fd69a6 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -39,7 +39,7 @@ public:
 private:
   void calculateFrameObjectOffsets(MachineFunction &Fn);
 };
-} // namespace
+}
 
 MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
   return new NVPTXPrologEpilogPass();
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 3ef997b..6e97f9e 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -69,7 +69,7 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
   }
   return "";
 }
-} // namespace llvm
+}
 
 NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {}
 
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index efcee6b..ff6ccc4 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -65,5 +65,5 @@ def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>;
 def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>;
 
 // Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
-def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot,
+def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRFrameLocal, VRDepot,
                                             (sequence "ENVREG%u", 0, 31))>;
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index bb0adc5..e83f735 100644
--- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -45,7 +45,7 @@ private:
   bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF,
                           unsigned &Idx);
 };
-} // namespace
+}
 
 char NVPTXReplaceImageHandles::ID = 0;
 
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index d452045..c728771 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -103,6 +103,6 @@ public:
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index c071ee8..9d9072e 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -210,6 +210,10 @@ bool NVPTXPassConfig::addInstSelector() {
 
 void NVPTXPassConfig::addPostRegAlloc() {
   addPass(createNVPTXPrologEpilogPass(), false);
+  // NVPTXPrologEpilogPass calculates frame object offset and replace frame
+  // index with VRFrame register. NVPTXPeephole need to be run after that and
+  // will replace VRFrame with VRFrameLocal when possible.
+  addPass(createNVPTXPeephole());
 }
 
 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index 4d937c6..7e2ce73 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -91,6 +91,6 @@ void dumpInstRec(Value *v, std::set<Instruction *> *visited);
 void dumpInstRec(Value *v);
 void dumpParent(Value *v);
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index 1c20430..5e375b7 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -75,7 +75,7 @@ private:
   bool handleFunction(Function *ReflectFunction);
   void setVarMap();
 };
-} // namespace
+}
 
 ModulePass *llvm::createNVVMReflectPass() {
   return new NVVMReflect();
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 36119d5..992be5b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -31,7 +31,7 @@ namespace {
     bool needsRelocateWithSymbol(const MCSymbol &Sym,
                                  unsigned Type) const override;
   };
-} // namespace
+}
 
 PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
   : MCELFObjectTargetWriter(Is64Bit, OSABI,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index ad614f2..ae43e59d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -50,7 +50,7 @@ enum Fixups {
   LastTargetFixupKind,
   NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
 };
-} // namespace PPC
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 489905b..5c38fe1 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -219,7 +219,7 @@ public:
     llvm_unreachable("Unknown pseudo-op: .localentry");
   }
 };
-} // namespace
+}
 
 static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
                                                  formatted_raw_ostream &OS,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 18818a1..77fe458 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -81,7 +81,7 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
   return false;
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 // Generated files will use "namespace PPC". To avoid symbol clash,
 // undefine PPC here. PPC may be predefined on some hosts.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 9b5491f..9d72896 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -51,7 +51,7 @@ public:
                           FixedValue);
   }
 };
-} // namespace
+}
 
 /// computes the log2 of the size of the relocation,
 /// used for relocation_info::r_length.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index ff9b059..6075631 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -62,7 +62,7 @@ namespace PPC {
   /// Assume the condition register is set by MI(a,b), return the predicate if
   /// we modify the instructions such that condition register is set by MI(b,a).
   Predicate getSwappedPredicate(Predicate Opcode);
-} // namespace PPC
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 49f77b5..ae8d8b4 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -98,6 +98,6 @@ namespace llvm {
   };
   } // end namespace PPCII
   
-} // namespace llvm
+} // end namespace llvm;
 
 #endif
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 2b6030a..940d55a 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -51,7 +51,7 @@ namespace {
     }
   };
   char PPCBSel::ID = 0;
-} // namespace
+}
 
 INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector",
                 false, false)
diff --git a/lib/Target/PowerPC/PPCCallingConv.h b/lib/Target/PowerPC/PPCCallingConv.h
index 550cac6..eb904a8 100644
--- a/lib/Target/PowerPC/PPCCallingConv.h
+++ b/lib/Target/PowerPC/PPCCallingConv.h
@@ -29,7 +29,7 @@ inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
   return false;
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
 
diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp
index 9cd9c2f..fc89753 100644
--- a/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -191,7 +191,7 @@ public:
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-} // namespace
+}
 
 INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
                 "PowerPC Early-Return Creation", false, false)
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 82ff530..fafcd76 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1448,9 +1448,9 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
   bool IsTailCall     = CLI.IsTailCall;
   bool IsVarArg       = CLI.IsVarArg;
   const Value *Callee = CLI.Callee;
-  const char *SymName = CLI.SymName;
+  const MCSymbol *Symbol = CLI.Symbol;
 
-  if (!Callee && !SymName)
+  if (!Callee && !Symbol)
     return false;
 
   // Allow SelectionDAG isel to handle tail calls.
@@ -2347,4 +2347,4 @@ namespace llvm {
       return new PPCFastISel(FuncInfo, LibInfo);
     return nullptr;
   }
-} // namespace llvm
+}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index b232863..28d074e 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -93,6 +93,6 @@ public:
   const SpillSlot *
   getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 5f9f9f2..c85c261 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -234,7 +234,7 @@ private:
 
     SDNode *transferMemOperands(SDNode *N, SDNode *Result);
   };
-} // namespace
+}
 
 /// InsertVRSaveCode - Once the entire function has been instruction selected,
 /// all virtual registers are created and all machine instructions are built,
@@ -2773,18 +2773,6 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
         else
           DM[i] = 1;
 
-      // For little endian, we must swap the input operands and adjust
-      // the mask elements (reverse and invert them).
-      if (PPCSubTarget->isLittleEndian()) {
-        std::swap(Op1, Op2);
-        unsigned tmp = DM[0];
-        DM[0] = 1 - DM[1];
-        DM[1] = 1 - tmp;
-      }
-
-      SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
-                                              MVT::i32);
-
       if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
           Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
           isa<LoadSDNode>(Op1.getOperand(0))) {
@@ -2800,6 +2788,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
         }
       }
 
+      // For little endian, we must swap the input operands and adjust
+      // the mask elements (reverse and invert them).
+      if (PPCSubTarget->isLittleEndian()) {
+        std::swap(Op1, Op2);
+        unsigned tmp = DM[0];
+        DM[0] = 1 - DM[1];
+        DM[1] = 1 - tmp;
+      }
+
+      SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
+                                              MVT::i32);
       SDValue Ops[] = { Op1, Op2, DMV };
       return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
     }
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 1cdfb41..594472b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1279,6 +1279,99 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
   }
 }
 
+/**
+ * \brief Common function used to match vmrgew and vmrgow shuffles
+ *
+ * The indexOffset determines whether to look for even or odd words in
+ * the shuffle mask. This is based on the of the endianness of the target
+ * machine.
+ *   - Little Endian:
+ *     - Use offset of 0 to check for odd elements
+ *     - Use offset of 4 to check for even elements
+ *   - Big Endian:
+ *     - Use offset of 0 to check for even elements
+ *     - Use offset of 4 to check for odd elements
+ * A detailed description of the vector element ordering for little endian and
+ * big endian can be found at
+ * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
+ * Targeting your applications - what little endian and big endian IBM XL C/C++
+ * compiler differences mean to you
+ *
+ * The mask to the shuffle vector instruction specifies the indices of the
+ * elements from the two input vectors to place in the result. The elements are
+ * numbered in array-access order, starting with the first vector. These vectors
+ * are always of type v16i8, thus each vector will contain 16 elements of size
+ * 8. More info on the shuffle vector can be found in the
+ * http://llvm.org/docs/LangRef.html#shufflevector-instruction
+ * Language Reference.
+ *
+ * The RHSStartValue indicates whether the same input vectors are used (unary)
+ * or two different input vectors are used, based on the following:
+ *   - If the instruction uses the same vector for both inputs, the range of the
+ *     indices will be 0 to 15. In this case, the RHSStart value passed should
+ *     be 0.
+ *   - If the instruction has two different vectors then the range of the
+ *     indices will be 0 to 31. In this case, the RHSStart value passed should
+ *     be 16 (indices 0-15 specify elements in the first vector while indices 16
+ *     to 31 specify elements in the second vector).
+ *
+ * \param[in] N The shuffle vector SD Node to analyze
+ * \param[in] IndexOffset Specifies whether to look for even or odd elements
+ * \param[in] RHSStartValue Specifies the starting index for the righthand input
+ * vector to the shuffle_vector instruction
+ * \return true iff this shuffle vector represents an even or odd word merge
+ */
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
+                     unsigned RHSStartValue) {
+  if (N->getValueType(0) != MVT::v16i8)
+    return false;
+
+  for (unsigned i = 0; i < 2; ++i)
+    for (unsigned j = 0; j < 4; ++j)
+      if (!isConstantOrUndef(N->getMaskElt(i*4+j),
+                             i*RHSStartValue+j+IndexOffset) ||
+          !isConstantOrUndef(N->getMaskElt(i*4+j+8),
+                             i*RHSStartValue+j+IndexOffset+8))
+        return false;
+  return true;
+}
+
+/**
+ * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
+ * vmrgow instructions.
+ *
+ * \param[in] N The shuffle vector SD Node to analyze
+ * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
+ * \param[in] ShuffleKind Identify the type of merge:
+ *   - 0 = big-endian merge with two different inputs;
+ *   - 1 = either-endian merge with two identical inputs;
+ *   - 2 = little-endian merge with two different inputs (inputs are swapped for
+ *     little-endian merges).
+ * \param[in] DAG The current SelectionDAG
+ * \return true iff this shuffle mask 
+ */
+bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
+                              unsigned ShuffleKind, SelectionDAG &DAG) {
+  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+    unsigned indexOffset = CheckEven ? 4 : 0;
+    if (ShuffleKind == 1) // Unary
+      return isVMerge(N, indexOffset, 0);
+    else if (ShuffleKind == 2) // swapped
+      return isVMerge(N, indexOffset, 16);
+    else
+      return false;
+  }
+  else {
+    unsigned indexOffset = CheckEven ? 0 : 4;
+    if (ShuffleKind == 1) // Unary
+      return isVMerge(N, indexOffset, 0);
+    else if (ShuffleKind == 0) // Normal
+      return isVMerge(N, indexOffset, 16);
+    else
+      return false;
+  }
+  return false;
+}
 
 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
 /// amount, otherwise return -1.
@@ -3765,7 +3858,7 @@ struct TailCallArgumentInfo {
   TailCallArgumentInfo() : FrameIdx(0) {}
 };
 
-} // namespace
+}
 
 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
 static void
@@ -7046,7 +7139,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
-        PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
+        PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
+        PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG)   ||
+        PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) {
       return Op;
     }
   }
@@ -7064,7 +7159,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
-      PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
+      PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
+      PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG)             ||
+      PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))
     return Op;
 
   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
@@ -9863,7 +9960,9 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
   case ISD::INTRINSIC_W_CHAIN: {
     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
     Chain = Intrin->getChain();
-    Base = Intrin->getBasePtr();
+    // Similarly to the store case below, Intrin->getBasePtr() doesn't get
+    // us what we want. Get operand 2 instead.
+    Base = Intrin->getOperand(2);
     MMO = Intrin->getMemOperand();
     break;
   }
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index c33d605..02242b5 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -353,7 +353,7 @@ namespace llvm {
       /// the last operand.
       TOC_ENTRY
     };
-  } // namespace PPCISD
+  }
 
   /// Define some predicates that are used for node matching.
   namespace PPC {
@@ -382,6 +382,11 @@ namespace llvm {
     bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
                             unsigned ShuffleKind, SelectionDAG &DAG);
 
+    /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for
+    /// a VMRGEW or VMRGOW instruction
+    bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
+                             unsigned ShuffleKind, SelectionDAG &DAG);
+  
     /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
     /// shift amount, otherwise return -1.
     int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
@@ -405,7 +410,7 @@ namespace llvm {
     /// If this is a qvaligni shuffle mask, return the shift
     /// amount, otherwise return -1.
     int isQVALIGNIShuffleMask(SDNode *N);
-  } // namespace PPC
+  }
 
   class PPCTargetLowering : public TargetLowering {
     const PPCSubtarget &Subtarget;
@@ -871,6 +876,6 @@ namespace llvm {
                                            CCValAssign::LocInfo &LocInfo,
                                            ISD::ArgFlagsTy &ArgFlags,
                                            CCState &State);
-} // namespace llvm
+}
 
 #endif   // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 9ff604b..cb0271f 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -155,6 +155,33 @@ def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
 }]>;
 
 
+def vmrgew_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 0, *CurDAG);
+}]>;
+def vmrgow_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 0, *CurDAG);
+}]>;
+def vmrgew_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 1, *CurDAG);
+}]>;
+def vmrgow_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 1, *CurDAG);
+}]>;
+def vmrgew_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                     (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), true, 2, *CurDAG);
+}]>;
+def vmrgow_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                     (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGEOShuffleMask(cast<ShuffleVectorSDNode>(N), false, 2, *CurDAG);
+}]>;
+
+
+
 def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
   return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG), SDLoc(N));
 }]>;
@@ -1008,6 +1035,29 @@ def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
 def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
 } // isCommutable
 
+// Vector merge 
+def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                      "vmrgew $vD, $vA, $vB", IIC_VecFP,
+                      [(set v16i8:$vD, (vmrgew_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGOW : VXForm_1<1676, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                      "vmrgow $vD, $vA, $vB", IIC_VecFP,
+                      [(set v16i8:$vD, (vmrgow_shuffle v16i8:$vA, v16i8:$vB))]>;
+
+// Match vmrgew(x,x) and vmrgow(x,x)
+def:Pat<(vmrgew_unary_shuffle v16i8:$vA, undef),
+        (VMRGEW $vA, $vA)>;
+def:Pat<(vmrgow_unary_shuffle v16i8:$vA, undef),
+        (VMRGOW $vA, $vA)>;
+
+// Match vmrgew(y,x) and vmrgow(y,x), i.e., swapped operands.  These fragments
+// are matched for little-endian, where the inputs must be swapped for correct
+// semantics.w
+def:Pat<(vmrgew_swapped_shuffle v16i8:$vA, v16i8:$vB),
+        (VMRGEW $vB, $vA)>;
+def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB),
+        (VMRGOW $vB, $vA)>;
+
+
 // Vector shifts
 def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
 def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h
index ec94fa5..cf71b1c 100644
--- a/lib/Target/PowerPC/PPCInstrBuilder.h
+++ b/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -38,6 +38,6 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
     return MIB.addFrameIndex(FI).addImm(Offset);
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d3bb7a6..696a838 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -352,15 +352,10 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
   bool isPPC64 = Subtarget.isPPC64();
 
   // If the block has no terminators, it just falls into the block after it.
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
     return false;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return false;
-    --I;
-  }
+
   if (!isUnpredicatedTerminator(I))
     return false;
 
@@ -513,14 +508,10 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
 }
 
 unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin()) return 0;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return 0;
-    --I;
-  }
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
+    return 0;
+
   if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
       I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
       I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 39bf454..e2d6346 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -237,6 +237,6 @@ public:
   void getNoopForMachoTarget(MCInst &NopInst) const override;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index d08b808..43ba499 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -457,22 +457,34 @@ let Uses = [RM] in {
 
   defm XVCMPEQDP : XX3Form_Rcr<60, 99,
                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                             "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+                             "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
+                             [(set v2i64:$XT,
+                                (int_ppc_vsx_xvcmpeqdp v2f64:$XA, v2f64:$XB))]>;
   defm XVCMPEQSP : XX3Form_Rcr<60, 67,
                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                             "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+                             "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare,
+                             [(set v4i32:$XT,
+                                (int_ppc_vsx_xvcmpeqsp v4f32:$XA, v4f32:$XB))]>;
   defm XVCMPGEDP : XX3Form_Rcr<60, 115,
                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                             "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+                             "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare,
+                             [(set v2i64:$XT,
+                                (int_ppc_vsx_xvcmpgedp v2f64:$XA, v2f64:$XB))]>;
   defm XVCMPGESP : XX3Form_Rcr<60, 83,
                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                             "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+                             "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare,
+                             [(set v4i32:$XT,
+                                (int_ppc_vsx_xvcmpgesp v4f32:$XA, v4f32:$XB))]>;
   defm XVCMPGTDP : XX3Form_Rcr<60, 107,
                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                             "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+                             "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare,
+                             [(set v2i64:$XT,
+                                (int_ppc_vsx_xvcmpgtdp v2f64:$XA, v2f64:$XB))]>;
   defm XVCMPGTSP : XX3Form_Rcr<60, 75,
                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
-                             "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+                             "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare,
+                             [(set v4i32:$XT,
+                                (int_ppc_vsx_xvcmpgtsp v4f32:$XA, v4f32:$XB))]>;
 
   // Move Instructions
   def XSABSDP : XX2Form<60, 345,
diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
index e783b5e..b4e1c09 100644
--- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
+++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -88,7 +88,7 @@ namespace {
     const TargetTransformInfo *TTI;
     const DataLayout *DL;
   };
-} // namespace
+}
 
 char PPCLoopDataPrefetch::ID = 0;
 INITIALIZE_PASS_BEGIN(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 1891b63..b6e7799 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -87,7 +87,7 @@ namespace {
     LoopInfo *LI;
     ScalarEvolution *SE;
   };
-} // namespace
+}
 
 char PPCLoopPreIncPrep::ID = 0;
 static const char *name = "Prepare loop for pre-inc. addressing modes";
@@ -113,7 +113,7 @@ namespace {
   protected:
     ScalarEvolution *SE;
   };
-} // namespace
+}
 
 static bool IsPtrInBounds(Value *BasePtr) {
   Value *StrippedBasePtr = BasePtr;
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index c44d5d7..76837ec 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -57,7 +57,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
 
   if (!MO.isGlobal()) {
     assert(MO.isSymbol() && "Isn't a symbol reference");
-    Mang->getNameWithPrefix(Name, MO.getSymbolName());
+    Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL);
   } else {
     const GlobalValue *GV = MO.getGlobal();
     TM.getNameWithPrefix(Name, GV, *Mang);
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index d2eaeb4..2c1378d 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -26,6 +26,6 @@ public:
   ~PPCSelectionDAGInfo();
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index ea17e1c..e9cc3d4 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -58,7 +58,7 @@ namespace PPC {
     DIR_PWR8,
     DIR_64
   };
-} // namespace PPC
+}
 
 class GlobalValue;
 class TargetMachine;
@@ -286,6 +286,6 @@ public:
 
   bool enableSubRegLiveness() const override;
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 7a9db0f..2dc0d82 100644
--- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -156,7 +156,7 @@ public:
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-} // namespace
+}
 
 INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE,
                       "PowerPC TLS Dynamic Call Fixup", false, false)
diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 61b963f..bf165c9 100644
--- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -145,7 +145,7 @@ public:
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-} // namespace
+}
 
 INITIALIZE_PASS(PPCTOCRegDeps, DEBUG_TYPE,
                 "PowerPC TOC Register Dependencies", false, false)
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index a5c4c23..dbe7617 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -22,6 +22,6 @@ public:
   virtual void emitAbiVersion(int AbiVersion) = 0;
   virtual void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) = 0;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp
index 537db65..5e3ae2a 100644
--- a/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -165,7 +165,7 @@ public:
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-} // namespace
+}
 
 INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
                 "PowerPC VSX Copy Legalization", false, false)
diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index a029ddf..f352fa6 100644
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -317,7 +317,7 @@ public:
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-} // namespace
+}
 
 INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
                       "PowerPC VSX FMA Mutation", false, false)
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 939293a..e7ab71a 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -79,7 +79,6 @@ struct PPCVSXSwapEntry {
   unsigned int IsStore : 1;
   unsigned int IsSwap : 1;
   unsigned int MentionsPhysVR : 1;
-  unsigned int HasImplicitSubreg : 1;
   unsigned int IsSwappable : 1;
   unsigned int SpecialHandling : 3;
   unsigned int WebRejected : 1;
@@ -224,7 +223,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
     for (MachineInstr &MI : MBB) {
 
       bool RelevantInstr = false;
-      bool ImplicitSubreg = false;
 
       for (const MachineOperand &MO : MI.operands()) {
         if (!MO.isReg())
@@ -232,8 +230,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
         unsigned Reg = MO.getReg();
         if (isVecReg(Reg)) {
           RelevantInstr = true;
-          if (MO.getSubReg() != 0)
-            ImplicitSubreg = true;
           break;
         }
       }
@@ -249,9 +245,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
       PPCVSXSwapEntry SwapEntry{};
       int VecIdx = addSwapEntry(&MI, SwapEntry);
 
-      if (ImplicitSubreg)
-        SwapVector[VecIdx].HasImplicitSubreg = 1;
-
       switch(MI.getOpcode()) {
       default:
         // Unless noted otherwise, an instruction is considered
@@ -260,7 +253,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
         // select, compare, etc.).
         SwapVector[VecIdx].IsSwappable = 1;
         break;
-      case PPC::XXPERMDI:
+      case PPC::XXPERMDI: {
         // This is a swap if it is of the form XXPERMDI t, s, s, 2.
         // Unfortunately, MachineCSE ignores COPY and SUBREG_TO_REG, so we
         // can also see XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), 2,
@@ -268,9 +261,8 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
         // SUBREG_TO_REG to find the real source value for comparison.
         // If the real source value is a physical register, then mark the
         // XXPERMDI as mentioning a physical register.
-        // Any other form of XXPERMDI is lane-sensitive and unsafe
-        // for the optimization.
-        if (MI.getOperand(3).getImm() == 2) {
+        int immed = MI.getOperand(3).getImm();
+        if (immed == 2) {
           unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
                                                VecIdx);
           unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
@@ -278,7 +270,26 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
           if (trueReg1 == trueReg2)
             SwapVector[VecIdx].IsSwap = 1;
         }
+        // This is a doubleword splat if it is of the form
+        // XXPERMDI t, s, s, 0 or XXPERMDI t, s, s, 3.  As above we
+        // must look through chains of copy-likes to find the source
+        // register.  We turn off the marking for mention of a physical
+        // register, because splatting it is safe; the optimization
+        // will not swap the value in the physical register.
+        else if (immed == 0 || immed == 3) {
+          unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
+                                               VecIdx);
+          unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
+                                               VecIdx);
+          if (trueReg1 == trueReg2) {
+            SwapVector[VecIdx].IsSwappable = 1;
+            SwapVector[VecIdx].MentionsPhysVR = 0;
+          }
+        }
+        // Any other form of XXPERMDI is lane-sensitive and unsafe
+        // for the optimization.
         break;
+      }
       case PPC::LVX:
         // Non-permuting loads are currently unsafe.  We can use special
         // handling for this in the future.  By not marking these as
@@ -307,14 +318,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
         SwapVector[VecIdx].IsStore = 1;
         SwapVector[VecIdx].IsSwap = 1;
         break;
-      case PPC::SUBREG_TO_REG:
-        // These are fine provided they are moving between full vector
-        // register classes.  For example, the VRs are a subset of the
-        // VSRs, but each VR and each VSR is a full 128-bit register.
-        if (isVecReg(MI.getOperand(0).getReg()) &&
-            isVecReg(MI.getOperand(2).getReg()))
-          SwapVector[VecIdx].IsSwappable = 1;
-        break;
       case PPC::COPY:
         // These are fine provided they are moving between full vector
         // register classes.
@@ -349,7 +352,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
       case PPC::LVSL:
       case PPC::LVSR:
       case PPC::LVXL:
-      case PPC::LXVDSX:
       case PPC::STVEBX:
       case PPC::STVEHX:
       case PPC::STVEWX:
@@ -457,23 +459,19 @@ int PPCVSXSwapRemoval::addSwapEntry(MachineInstr *MI,
 // such operations to the ultimate source register.  If a
 // physical register is encountered, we stop the search and
 // flag the swap entry indicated by VecIdx (the original
-// XXPERMDI) as mentioning a physical register.  Similarly
-// for implicit subregister mentions (which should never
-// happen).
+// XXPERMDI) as mentioning a physical register.
 unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
                                              unsigned VecIdx) {
   MachineInstr *MI = MRI->getVRegDef(SrcReg);
   if (!MI->isCopyLike())
     return SrcReg;
 
-  unsigned CopySrcReg, CopySrcSubreg;
-  if (MI->isCopy()) {
+  unsigned CopySrcReg;
+  if (MI->isCopy())
     CopySrcReg = MI->getOperand(1).getReg();
-    CopySrcSubreg = MI->getOperand(1).getSubReg();
-  } else {
+  else {
     assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
     CopySrcReg = MI->getOperand(2).getReg();
-    CopySrcSubreg = MI->getOperand(2).getSubReg();
   }
 
   if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
@@ -481,11 +479,6 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
     return CopySrcReg;
   }
 
-  if (CopySrcSubreg != 0) {
-    SwapVector[VecIdx].HasImplicitSubreg = 1;
-    return CopySrcReg;
-  }
-
   return lookThruCopyLike(CopySrcReg, VecIdx);
 }
 
@@ -552,11 +545,9 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
   for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
     int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
 
-    // Reject webs containing mentions of physical registers or implicit
-    // subregs, or containing operations that we don't know how to handle
-    // in a lane-permuted region.
+    // Reject webs containing mentions of physical registers, or containing
+    // operations that we don't know how to handle in a lane-permuted region.
     if (SwapVector[EntryIdx].MentionsPhysVR ||
-        SwapVector[EntryIdx].HasImplicitSubreg ||
         !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
 
       SwapVector[Repr].WebRejected = 1;
@@ -765,8 +756,6 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
       DEBUG(dbgs() << "swap ");
     if (SwapVector[EntryIdx].MentionsPhysVR)
       DEBUG(dbgs() << "physreg ");
-    if (SwapVector[EntryIdx].HasImplicitSubreg)
-      DEBUG(dbgs() << "implsubreg ");
 
     if (SwapVector[EntryIdx].IsSwappable) {
       DEBUG(dbgs() << "swappable ");
@@ -809,7 +798,7 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
   DEBUG(dbgs() << "\n");
 }
 
-} // namespace
+} // end default namespace
 
 INITIALIZE_PASS_BEGIN(PPCVSXSwapRemoval, DEBUG_TYPE,
                       "PowerPC VSX Swap Removal", false, false)
diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 59f011a..3e56b9e 100644
--- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -41,7 +41,7 @@ public:
                               raw_ostream &VStream,
                               raw_ostream &CStream) const override;
 };
-} // namespace
+}
 
 namespace llvm {
 extern Target TheSparcTarget, TheSparcV9Target, TheSparcelTarget;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index 800a5f2..0be60fd 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -36,7 +36,7 @@ namespace {
                                  unsigned Type) const override;
 
   };
-} // namespace
+}
 
 unsigned SparcELFObjectWriter::GetRelocType(const MCValue &Target,
                                             const MCFixup &Fixup,
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
index 34c58da..8d79396 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
@@ -91,7 +91,7 @@ namespace llvm {
       LastTargetFixupKind,
       NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
     };
-  } // namespace Sparc
-} // namespace llvm
+  }
+}
 
 #endif
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index 8f62de4..a9c9f15 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -41,7 +41,7 @@ MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI,
                                     const Triple &TT, StringRef CPU);
 MCObjectWriter *createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                            bool IsLIttleEndian, uint8_t OSABI);
-} // namespace llvm
+} // End llvm namespace
 
 // Defines symbolic names for Sparc registers.  This defines a mapping from
 // register name to register number.
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index 133af86..96378d5 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -33,7 +33,7 @@ namespace llvm {
   void LowerSparcMachineInstrToMCInst(const MachineInstr *MI,
                                       MCInst &OutMI,
                                       AsmPrinter &AP);
-} // namespace llvm
+} // end namespace llvm;
 
 namespace llvm {
   // Enums corresponding to Sparc condition codes, both icc's and fcc's.  These
@@ -74,7 +74,7 @@ namespace llvm {
       FCC_ULE = 14+16,  // Unordered or Less or Equal
       FCC_O   = 15+16   // Ordered
     };
-  } // namespace SPCC
+  }
 
   inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
     switch (CC) {
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 3d73bbd..bb3b788 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -55,6 +55,6 @@ private:
 
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index a4b9c79..b6bc3d2 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -49,7 +49,7 @@ namespace llvm {
       TLS_LD,
       TLS_CALL
     };
-  } // namespace SPISD
+  }
 
   class SparcTargetLowering : public TargetLowering {
     const SparcSubtarget *Subtarget;
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index b59dd89..15673f1 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -96,6 +96,6 @@ public:
   unsigned getGlobalBaseReg(MachineFunction *MF) const;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index b1f795b..a02bae0 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -353,13 +353,6 @@ let hasSideEffects = 1, mayStore = 1 in {
                    [(flushw)]>;
 }
 
-let isBarrier = 1, isTerminator = 1, rd = 0b01000, rs1 = 0, simm13 = 5 in
-  def TA5 : F3_2<0b10, 0b111010, (outs), (ins), "ta 5", [(trap)]>;
-
-let rd = 0 in
-  def UNIMP : F2_1<0b000, (outs), (ins i32imm:$imm22),
-                  "unimp $imm22", []>;
-
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
 // instruction selection into a branch sequence.  This has to handle all
 // permutations of selection between i32/f32/f64 on ICC and FCC.
@@ -406,36 +399,6 @@ let usesCustomInserter = 1, Uses = [FCC0] in {
             [(set f128:$dst, (SPselectfcc f128:$T, f128:$F, imm:$Cond))]>;
 }
 
-// JMPL Instruction.
-let isTerminator = 1, hasDelaySlot = 1, isBarrier = 1,
-    DecoderMethod = "DecodeJMPL" in {
-  def JMPLrr: F3_1<2, 0b111000, (outs IntRegs:$dst), (ins MEMrr:$addr),
-                  "jmpl $addr, $dst", []>;
-  def JMPLri: F3_2<2, 0b111000, (outs IntRegs:$dst), (ins MEMri:$addr),
-                  "jmpl $addr, $dst", []>;
-}
-
-// Section A.3 - Synthetic Instructions, p. 85
-// special cases of JMPL:
-let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1,
-    isCodeGenOnly = 1 in {
-  let rd = 0, rs1 = 15 in
-    def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
-                   "jmp %o7+$val", [(retflag simm13:$val)]>;
-
-  let rd = 0, rs1 = 31 in
-    def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
-                  "jmp %i7+$val", []>;
-}
-
-let isReturn = 1, isTerminator = 1, hasDelaySlot = 1,
-     isBarrier = 1, rd = 0, DecoderMethod = "DecodeReturn" in {
-  def RETTrr : F3_1<2, 0b111001, (outs), (ins MEMrr:$addr),
-                       "rett $addr", []>;
-  def RETTri : F3_2<2, 0b111001, (outs), (ins MEMri:$addr),
-                       "rett $addr", []>;
-}
-
 // Section B.1 - Load Integer Instructions, p. 90
 let DecoderMethod = "DecodeLoadInt" in {
   defm LDSB : LoadA<"ldsb", 0b001001, 0b011001, sextloadi8,  IntRegs, i32>;
@@ -470,6 +433,24 @@ let DecoderMethod = "DecodeStoreQFP" in
   defm STQF  : Store<"stq", 0b100110, store,         QFPRegs, f128>,
                Requires<[HasV9, HasHardQuad]>;
 
+// Section B.8 - SWAP Register with Memory Instruction
+// (Atomic swap)
+let Constraints = "$val = $dst", DecoderMethod = "DecodeSWAP" in {
+  def SWAPrr : F3_1<3, 0b001111,
+                 (outs IntRegs:$dst), (ins MEMrr:$addr, IntRegs:$val),
+                 "swap [$addr], $dst",
+                 [(set i32:$dst, (atomic_swap_32 ADDRrr:$addr, i32:$val))]>;
+  def SWAPri : F3_2<3, 0b001111,
+                 (outs IntRegs:$dst), (ins MEMri:$addr, IntRegs:$val),
+                 "swap [$addr], $dst",
+                 [(set i32:$dst, (atomic_swap_32 ADDRri:$addr, i32:$val))]>;
+  def SWAPArr : F3_1_asi<3, 0b011111,
+                 (outs IntRegs:$dst), (ins MEMrr:$addr, i8imm:$asi, IntRegs:$val),
+                 "swapa [$addr] $asi, $dst",
+                 [/*FIXME: pattern?*/]>;
+}
+
+
 // Section B.9 - SETHI Instruction, p. 104
 def SETHIi: F2_1<0b100,
                  (outs IntRegs:$rd), (ins i32imm:$imm22),
@@ -725,6 +706,56 @@ let Uses = [O6],
   }
 }
 
+// Section B.25 - Jump and Link Instruction
+
+// JMPL Instruction.
+let isTerminator = 1, hasDelaySlot = 1, isBarrier = 1,
+    DecoderMethod = "DecodeJMPL" in {
+  def JMPLrr: F3_1<2, 0b111000, (outs IntRegs:$dst), (ins MEMrr:$addr),
+                  "jmpl $addr, $dst", []>;
+  def JMPLri: F3_2<2, 0b111000, (outs IntRegs:$dst), (ins MEMri:$addr),
+                  "jmpl $addr, $dst", []>;
+}
+
+// Section A.3 - Synthetic Instructions, p. 85
+// special cases of JMPL:
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1,
+    isCodeGenOnly = 1 in {
+  let rd = 0, rs1 = 15 in
+    def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+                   "jmp %o7+$val", [(retflag simm13:$val)]>;
+
+  let rd = 0, rs1 = 31 in
+    def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+                  "jmp %i7+$val", []>;
+}
+
+// Section B.26 - Return from Trap Instruction
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1,
+     isBarrier = 1, rd = 0, DecoderMethod = "DecodeReturn" in {
+  def RETTrr : F3_1<2, 0b111001, (outs), (ins MEMrr:$addr),
+                       "rett $addr", []>;
+  def RETTri : F3_2<2, 0b111001, (outs), (ins MEMri:$addr),
+                       "rett $addr", []>;
+}
+
+
+// Section B.27 - Trap on Integer Condition Codes Instruction
+multiclass TRAP<string regStr> {
+  def rr : TRAPSPrr<0b111010, (outs), (ins IntRegs:$rs1, IntRegs:$rs2,
+                                       CCOp:$cond),
+              !strconcat(!strconcat("t$cond ", regStr), ", $rs1 + $rs2"), []>;
+  def ri : TRAPSPri<0b111010, (outs), (ins IntRegs:$rs1, i32imm:$imm,
+                                      CCOp:$cond),
+              !strconcat(!strconcat("t$cond ", regStr), ", $rs1 + $imm"), []>;
+}
+
+let hasSideEffects = 1, Uses = [ICC], cc = 0b00 in
+  defm TICC : TRAP<"%icc">;
+
+let isBarrier = 1, isTerminator = 1, rd = 0b01000, rs1 = 0, simm13 = 5 in
+  def TA5 : F3_2<0b10, 0b111010, (outs), (ins), "ta 5", [(trap)]>;
+
 // Section B.28 - Read State Register Instructions
 let rs2 = 0 in
   def RDASR : F3_1<2, 0b101000,
@@ -787,6 +818,18 @@ let Predicates = [HasNoV9] in {
   }
 }
 
+// Section B.30 - STBAR Instruction
+let hasSideEffects = 1, rd = 0, rs1 = 0b01111, rs2 = 0 in
+  def STBAR : F3_1<2, 0b101000, (outs), (ins), "stbar", []>;
+
+
+// Section B.31 - Unimplmented Instruction
+let rd = 0 in
+  def UNIMP : F2_1<0b000, (outs), (ins i32imm:$imm22),
+                  "unimp $imm22", []>;
+
+// Section B.33 - Floating-point Operate (FPop) Instructions
+
 // Convert Integer to Floating-point Instructions, p. 141
 def FITOS : F3_3u<2, 0b110100, 0b011000100,
                  (outs FPRegs:$rd), (ins FPRegs:$rs2),
@@ -1168,29 +1211,10 @@ let rs1 = 0 in
 def : Pat<(ctpop i32:$src),
           (POPCrr (SRLri $src, 0))>;
 
-// Atomic swap.
-let hasSideEffects =1, rd = 0, rs1 = 0b01111, rs2 = 0 in
-  def STBAR : F3_1<2, 0b101000, (outs), (ins), "stbar", []>;
-
 let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in
  def MEMBARi : F3_2<2, 0b101000, (outs), (ins simm13Op:$simm13),
                     "membar $simm13", []>;
 
-let Constraints = "$val = $dst", DecoderMethod = "DecodeSWAP" in {
-  def SWAPrr : F3_1<3, 0b001111,
-                 (outs IntRegs:$dst), (ins MEMrr:$addr, IntRegs:$val),
-                 "swap [$addr], $dst",
-                 [(set i32:$dst, (atomic_swap_32 ADDRrr:$addr, i32:$val))]>;
-  def SWAPri : F3_2<3, 0b001111,
-                 (outs IntRegs:$dst), (ins MEMri:$addr, IntRegs:$val),
-                 "swap [$addr], $dst",
-                 [(set i32:$dst, (atomic_swap_32 ADDRri:$addr, i32:$val))]>;
-  def SWAPArr : F3_1_asi<3, 0b011111,
-                 (outs IntRegs:$dst), (ins MEMrr:$addr, i8imm:$asi, IntRegs:$val),
-                 "swapa [$addr] $asi, $dst",
-                 [/*FIXME: pattern?*/]>;
-}
-
 // TODO: Should add a CASArr variant. In fact, the CAS instruction,
 // unlike other instructions, only comes in a form which requires an
 // ASI be provided. The ASI value hardcoded here is ASI_PRIMARY, the
@@ -1215,18 +1239,6 @@ let hasSideEffects = 1 in {
 }
 }
 
-multiclass TRAP<string regStr> {
-  def rr : TRAPSPrr<0b111010, (outs), (ins IntRegs:$rs1, IntRegs:$rs2,
-                                       CCOp:$cond),
-              !strconcat(!strconcat("t$cond ", regStr), ", $rs1 + $rs2"), []>;
-  def ri : TRAPSPri<0b111010, (outs), (ins IntRegs:$rs1, i32imm:$imm,
-                                      CCOp:$cond),
-              !strconcat(!strconcat("t$cond ", regStr), ", $rs1 + $imm"), []>;
-}
-
-let hasSideEffects = 1, Uses = [ICC], cc = 0b00 in
-  defm TICC : TRAP<"%icc">;
-
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index 0471443..1047442 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -51,6 +51,6 @@ namespace llvm {
     void setLeafProc(bool rhs) { IsLeafProc = rhs; }
     bool isLeafProc() const { return IsLeafProc; }
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
index 2ceae82..6818291 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -26,6 +26,6 @@ public:
   ~SparcSelectionDAGInfo() override;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 0eb3d65..75fd37f 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -96,7 +96,10 @@ struct SystemZAddressingMode {
 
 // Return a mask with Count low bits set.
 static uint64_t allOnes(unsigned int Count) {
-  return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
+  assert(Count <= 64);
+  if (Count > 63)
+    return UINT64_MAX;
+  return (uint64_t(1) << Count) - 1;
 }
 
 // Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation
@@ -903,6 +906,8 @@ SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const {
 SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
   SDLoc DL(N);
   EVT VT = N->getValueType(0);
+  if (!VT.isInteger() || VT.getSizeInBits() > 64)
+    return nullptr;
   RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0));
   unsigned Count = 0;
   while (expandRxSBG(RISBG))
@@ -958,6 +963,10 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
 }
 
 SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+  if (!VT.isInteger() || VT.getSizeInBits() > 64)
+    return nullptr;
   // Try treating each operand of N as the second operand of the RxSBG
   // and see which goes deepest.
   RxSBGOperands RxSBG[] = {
@@ -993,8 +1002,6 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
       Opcode = SystemZ::RISBGN;
   }
 
-  SDLoc DL(N);
-  EVT VT = N->getValueType(0);
   SDValue Ops[5] = {
     convertTo(DL, MVT::i64, Op0),
     convertTo(DL, MVT::i64, RxSBG[I].Input),
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 7584579..372f6fb 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -2005,17 +2005,17 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
   else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
     // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
     // always true for CC>3.
-    C.CCMask = CC < 4 ? -1 << (4 - CC) : -1;
+    C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
   else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
     // ...and the inverse of that.
-    C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0;
+    C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
   else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
     // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
     // always true for CC>3.
-    C.CCMask = CC < 4 ? -1 << (3 - CC) : -1;
+    C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
   else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
     // ...and the inverse of that.
-    C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0;
+    C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
   else
     llvm_unreachable("Unexpected integer comparison type");
   C.CCMask &= CCValid;
@@ -3292,7 +3292,7 @@ struct Permute {
   unsigned Operand;
   unsigned char Bytes[SystemZ::VectorBytes];
 };
-} // namespace
+}
 
 static const Permute PermuteForms[] = {
   // VMRHG
@@ -3574,7 +3574,7 @@ struct GeneralShuffle {
   // The type of the shuffle result.
   EVT VT;
 };
-} // namespace
+}
 
 // Add an extra undefined element to the shuffle.
 void GeneralShuffle::addUndef() {
diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt
new file mode 100644
index 0000000..df04c2a
--- /dev/null
+++ b/lib/Target/WebAssembly/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(LLVM_TARGET_DEFINITIONS WebAssembly.td)
+
+tablegen(LLVM WebAssemblyGenMCCodeEmitter.inc -gen-emitter)
+tablegen(LLVM WebAssemblyGenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(WebAssemblyCommonTableGen)
+
+add_llvm_target(WebAssemblyCodeGen
+  WebAssemblyFrameLowering.cpp
+  WebAssemblyInstrInfo.cpp
+  WebAssemblyISelDAGToDAG.cpp
+  WebAssemblyISelLowering.cpp
+  WebAssemblyMachineFunctionInfo.cpp
+  WebAssemblyRegisterInfo.cpp
+  WebAssemblySelectionDAGInfo.cpp
+  WebAssemblySubtarget.cpp
+  WebAssemblyTargetMachine.cpp
+  WebAssemblyTargetTransformInfo.cpp
+)
+
+add_dependencies(LLVMWebAssemblyCodeGen intrinsics_gen)
+
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/WebAssembly/InstPrinter/CMakeLists.txt b/lib/Target/WebAssembly/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000..5394b67
--- /dev/null
+++ b/lib/Target/WebAssembly/InstPrinter/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMWebAssemblyAsmPrinter
+  WebAssemblyInstPrinter.cpp
+  )
diff --git a/lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt b/lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt
new file mode 100644
index 0000000..54df6d6
--- /dev/null
+++ b/lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/WebAssembly/InstPrinter/LLVMBuild.txt -------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = WebAssemblyAsmPrinter
+parent = WebAssembly
+required_libraries = MC Support
+add_to_library_groups = WebAssembly
diff --git a/lib/Target/WebAssembly/InstPrinter/Makefile b/lib/Target/WebAssembly/InstPrinter/Makefile
new file mode 100644
index 0000000..8753437
--- /dev/null
+++ b/lib/Target/WebAssembly/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/WebAssembly/AsmPrinter/Makefile ----------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMWebAssemblyAsmPrinter
+
+# Hack: we need to include 'main' wasm target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
new file mode 100644
index 0000000..fbb985a
--- /dev/null
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -0,0 +1,43 @@
+//=- WebAssemblyInstPrinter.cpp - WebAssembly assembly instruction printing -=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Print MCInst instructions to wasm format.
+///
+//===----------------------------------------------------------------------===//
+
+#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "WebAssembly.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cctype>
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
+                                               const MCInstrInfo &MII,
+                                               const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
+                                          unsigned RegNo) const {
+  llvm_unreachable("TODO: implement printRegName");
+}
+
+void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+                                       StringRef Annot,
+                                       const MCSubtargetInfo &STI) {
+  llvm_unreachable("TODO: implement printInst");
+}
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
new file mode 100644
index 0000000..70fcef2
--- /dev/null
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -0,0 +1,38 @@
+// WebAssemblyInstPrinter.h - Print wasm MCInst to assembly syntax -*- C++ -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This class prints an WebAssembly MCInst to wasm file syntax.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class MCOperand;
+class MCSubtargetInfo;
+
+class WebAssemblyInstPrinter : public MCInstPrinter {
+public:
+  WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                         const MCRegisterInfo &MRI);
+
+  void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/LLVMBuild.txt b/lib/Target/WebAssembly/LLVMBuild.txt
new file mode 100644
index 0000000..04ef9c4
--- /dev/null
+++ b/lib/Target/WebAssembly/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/WebAssembly/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = WebAssembly
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = WebAssemblyCodeGen
+parent = WebAssembly
+required_libraries = Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target WebAssemblyDesc WebAssemblyInfo
+add_to_library_groups = WebAssembly
diff --git a/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..ccc0f0d
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMWebAssemblyDesc
+  WebAssemblyMCAsmInfo.cpp
+  WebAssemblyMCTargetDesc.cpp
+)
diff --git a/lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt b/lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 0000000..ce7cb5d
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/WebAssembly/MCTargetDesc/LLVMBuild.txt ------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = WebAssemblyDesc
+parent = WebAssembly
+required_libraries = MC Support WebAssemblyAsmPrinter WebAssemblyInfo
+add_to_library_groups = WebAssembly
diff --git a/lib/Target/WebAssembly/MCTargetDesc/Makefile b/lib/Target/WebAssembly/MCTargetDesc/Makefile
new file mode 100644
index 0000000..11dcb4f
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/WebAssembly/TargetDesc/Makefile ----------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMWebAssemblyDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
new file mode 100644
index 0000000..55346f7
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -0,0 +1,53 @@
+//===-- WebAssemblyMCAsmInfo.cpp - WebAssembly asm properties -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declarations of the WebAssemblyMCAsmInfo
+/// properties.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-mc-asm-info"
+
+WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {}
+
+WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
+  PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit();
+
+  // TODO: What should MaxInstLength be?
+
+  PrivateGlobalPrefix = "";
+  PrivateLabelPrefix = "";
+
+  UseDataRegionDirectives = true;
+
+  Data8bitsDirective = "\t.int8\t";
+  Data16bitsDirective = "\t.int16\t";
+  Data32bitsDirective = "\t.int32\t";
+  Data64bitsDirective = "\t.int64\t";
+
+  AlignmentIsInBytes = false;
+  COMMDirectiveAlignmentIsInBytes = false;
+  LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment;
+
+  HasDotTypeDotSizeDirective = false;
+  HasSingleParameterDotFile = false;
+
+  SupportsDebugInformation = true;
+
+  // For now, WebAssembly does not support exceptions.
+  ExceptionsType = ExceptionHandling::None;
+
+  // TODO: UseIntegratedAssembler?
+}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
new file mode 100644
index 0000000..d2b8fb7
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
@@ -0,0 +1,32 @@
+//===-- WebAssemblyMCAsmInfo.h - WebAssembly asm properties -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declaration of the WebAssemblyMCAsmInfo class.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+
+class Triple;
+
+class WebAssemblyMCAsmInfo final : public MCAsmInfo {
+public:
+  explicit WebAssemblyMCAsmInfo(const Triple &T);
+  ~WebAssemblyMCAsmInfo() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
new file mode 100644
index 0000000..d248556
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -0,0 +1,56 @@
+//===-- WebAssemblyMCTargetDesc.cpp - WebAssembly Target Descriptions -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file provides WebAssembly-specific target descriptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyMCTargetDesc.h"
+#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "WebAssemblyMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-mc-target-desc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "WebAssemblyGenSubtargetInfo.inc"
+
+static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo &MRI,
+                                             const Triple &TT) {
+  MCAsmInfo *MAI = new WebAssemblyMCAsmInfo(TT);
+  return MAI;
+}
+
+static MCInstPrinter *
+createWebAssemblyMCInstPrinter(const Triple &T, unsigned SyntaxVariant,
+                               const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                               const MCRegisterInfo &MRI) {
+  if (SyntaxVariant == 0 || SyntaxVariant == 1)
+    return new WebAssemblyInstPrinter(MAI, MII, MRI);
+  return nullptr;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeWebAssemblyTargetMC() {
+  for (Target *T : {&TheWebAssemblyTarget32, &TheWebAssemblyTarget64}) {
+    // Register the MC asm info.
+    RegisterMCAsmInfoFn X(*T, createWebAssemblyMCAsmInfo);
+
+    // Register the MCInstPrinter.
+    TargetRegistry::RegisterMCInstPrinter(*T, createWebAssemblyMCInstPrinter);
+  }
+}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
new file mode 100644
index 0000000..24893da
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -0,0 +1,53 @@
+//==- WebAssemblyMCTargetDesc.h - WebAssembly Target Descriptions -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file provides WebAssembly-specific target descriptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+
+class formatted_raw_ostream;
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCRegisterInfo;
+class MCObjectWriter;
+class MCStreamer;
+class MCSubtargetInfo;
+class MCTargetStreamer;
+class StringRef;
+class Target;
+class Triple;
+class raw_ostream;
+
+extern Target TheWebAssemblyTarget32;
+extern Target TheWebAssemblyTarget64;
+
+MCAsmBackend *createWebAssemblyAsmBackend(const Target &T,
+                                          const MCRegisterInfo &MRI,
+                                          StringRef TT, StringRef CPU);
+
+} // end namespace llvm
+
+// Defines symbolic names for WebAssembly registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_SUBTARGETINFO_ENUM
+#include "WebAssemblyGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/WebAssembly/Makefile b/lib/Target/WebAssembly/Makefile
new file mode 100644
index 0000000..35d835c
--- /dev/null
+++ b/lib/Target/WebAssembly/Makefile
@@ -0,0 +1,19 @@
+##===- lib/Target/WebAssembly/Makefile ---------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMWebAssemblyCodeGen
+TARGET = WebAssembly
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = WebAssemblyGenSubtargetInfo.inc WebAssemblyGenMCCodeEmitter.inc
+
+DIRS = InstPrinter TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt
new file mode 100644
index 0000000..7a71060
--- /dev/null
+++ b/lib/Target/WebAssembly/README.txt
@@ -0,0 +1,15 @@
+//===-- README.txt - Notes for WebAssembly code gen -----------------------===//
+
+This WebAssembly backend is presently in a very early stage of development.
+The code should build and not break anything else, but don't expect a lot more
+at this point.
+
+For more information on WebAssembly itself, see the design documents:
+  * https://github.com/WebAssembly/design/blob/master/README.md
+
+The following documents contain some information on the planned semantics and
+binary encoding of WebAssembly itself:
+  * https://github.com/WebAssembly/design/blob/master/AstSemantics.md
+  * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/WebAssembly/TargetInfo/CMakeLists.txt b/lib/Target/WebAssembly/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000..ef6e4d2
--- /dev/null
+++ b/lib/Target/WebAssembly/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMWebAssemblyInfo
+  WebAssemblyTargetInfo.cpp
+  )
+
+add_dependencies(LLVMWebAssemblyInfo WebAssemblyCommonTableGen)
diff --git a/lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt b/lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt
new file mode 100644
index 0000000..f4da923
--- /dev/null
+++ b/lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/WebAssembly/TargetInfo/LLVMBuild.txt --------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = WebAssemblyInfo
+parent = WebAssembly
+required_libraries = Support
+add_to_library_groups = WebAssembly
diff --git a/lib/Target/WebAssembly/TargetInfo/Makefile b/lib/Target/WebAssembly/TargetInfo/Makefile
new file mode 100644
index 0000000..b021eb6
--- /dev/null
+++ b/lib/Target/WebAssembly/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/WebAssembly/TargetInfo/Makefile ----------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMWebAssemblyInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
new file mode 100644
index 0000000..ddb1eb1
--- /dev/null
+++ b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
@@ -0,0 +1,30 @@
+//===-- WebAssemblyTargetInfo.cpp - WebAssembly Target Implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file registers the WebAssembly target.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-target-info"
+
+Target llvm::TheWebAssemblyTarget32;
+Target llvm::TheWebAssemblyTarget64;
+
+extern "C" void LLVMInitializeWebAssemblyTargetInfo() {
+  RegisterTarget<Triple::wasm32> X(TheWebAssemblyTarget32, "wasm32",
+                                   "WebAssembly 32-bit");
+  RegisterTarget<Triple::wasm64> Y(TheWebAssemblyTarget64, "wasm64",
+                                   "WebAssembly 64-bit");
+}
diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h
new file mode 100644
index 0000000..3ff19d4
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssembly.h
@@ -0,0 +1,31 @@
+//===-- WebAssembly.h - Top-level interface for WebAssembly  ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the entry points for global functions defined in
+/// the LLVM WebAssembly back-end.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLY_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLY_H
+
+#include "llvm/Support/CodeGen.h"
+
+namespace llvm {
+
+class WebAssemblyTargetMachine;
+class FunctionPass;
+
+FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
+                                       CodeGenOpt::Level OptLevel);
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssembly.td b/lib/Target/WebAssembly/WebAssembly.td
new file mode 100644
index 0000000..a123bf6
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssembly.td
@@ -0,0 +1,62 @@
+//- WebAssembly.td - Describe the WebAssembly Target Machine --*- tablegen -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the WebAssembly architecture, which is
+// also known as "wasm".
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// WebAssembly Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "false",
+                                      "Enable 128-bit SIMD">;
+
+//===----------------------------------------------------------------------===//
+// Architectures.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "WebAssemblyRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "WebAssemblyInstrInfo.td"
+
+def WebAssemblyInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// WebAssembly Processors supported.
+//===----------------------------------------------------------------------===//
+
+// Minimal Viable Product.
+def : ProcessorModel<"mvp", NoSchedModel, []>;
+
+// Latest and greatest experimental version of WebAssembly. Bugs included!
+def : ProcessorModel<"bleeding-edge", NoSchedModel, [FeatureSIMD128]>;
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def WebAssembly : Target {
+  let InstructionSet = WebAssemblyInstrInfo;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
new file mode 100644
index 0000000..e4ca82e
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -0,0 +1,74 @@
+//===-- WebAssemblyFrameLowering.cpp - WebAssembly Frame Lowering ----------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the WebAssembly implementation of
+/// TargetFrameLowering class.
+///
+/// On WebAssembly, there aren't a lot of things to do here. There are no
+/// callee-saved registers to save, and no spill slots.
+///
+/// The stack grows downward.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyFrameLowering.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyInstrInfo.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyTargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-frame-info"
+
+// TODO: Implement a red zone?
+
+/// Return true if the specified function should have a dedicated frame pointer
+/// register.
+bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const {
+  llvm_unreachable("TODO: implement hasFP");
+}
+
+/// Under normal circumstances, when a frame pointer is not required, we reserve
+/// argument space for call sites in the function immediately on entry to the
+/// current function. This eliminates the need for add/sub sp brackets around
+/// call sites. Returns true if the call frame is included as part of the stack
+/// frame.
+bool WebAssemblyFrameLowering::hasReservedCallFrame(
+    const MachineFunction &MF) const {
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
+    MachineFunction &MF, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator I) const {
+  llvm_unreachable("TODO: implement eliminateCallFramePseudoInstr");
+}
+
+void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
+                                            MachineBasicBlock &MBB) const {
+  llvm_unreachable("TODO: implement emitPrologue");
+}
+
+void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
+                                            MachineBasicBlock &MBB) const {
+  llvm_unreachable("TODO: implement emitEpilogue");
+}
+
+void WebAssemblyFrameLowering::processFunctionBeforeCalleeSavedScan(
+    MachineFunction &MF, RegScavenger *RS) const {
+  llvm_unreachable("TODO: implement processFunctionBeforeCalleeSavedScan");
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
new file mode 100644
index 0000000..0b112d0
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -0,0 +1,48 @@
+// WebAssemblyFrameLowering.h - TargetFrameLowering for WebAssembly -*- C++ -*-/
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This class implements WebAssembly-specific bits of
+/// TargetFrameLowering class.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYFRAMELOWERING_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class WebAssemblyFrameLowering final : public TargetFrameLowering {
+public:
+  WebAssemblyFrameLowering()
+      : TargetFrameLowering(StackGrowsDown, /*StackAlignment=*/16,
+                            /*LocalAreaOffset=*/0,
+                            /*TransientStackAlignment=*/16,
+                            /*StackRealignable=*/true) {}
+
+  void
+  eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I) const override;
+
+  /// These methods insert prolog and epilog code into the function.
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+  bool hasFP(const MachineFunction &MF) const override;
+  bool hasReservedCallFrame(const MachineFunction &MF) const override;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
new file mode 100644
index 0000000..518ef33
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -0,0 +1,73 @@
+//- WebAssemblyISelDAGToDAG.cpp - A dag to dag inst selector for WebAssembly -//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines an instruction selector for the WebAssembly target.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Function.h" // To access function attributes.
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-isel"
+
+//===--------------------------------------------------------------------===//
+/// WebAssembly-specific code to select WebAssembly machine instructions for
+/// SelectionDAG operations.
+///
+namespace {
+class WebAssemblyDAGToDAGISel final : public SelectionDAGISel {
+  /// Keep a pointer to the WebAssemblySubtarget around so that we can make the
+  /// right decision when generating code for different targets.
+  const WebAssemblySubtarget *Subtarget;
+
+  bool ForCodeSize;
+
+public:
+  WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &tm,
+                          CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), ForCodeSize(false) {
+  }
+
+  const char *getPassName() const override {
+    return "WebAssembly Instruction Selection";
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    ForCodeSize =
+        MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
+        MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+    Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
+    return SelectionDAGISel::runOnMachineFunction(MF);
+  }
+
+  SDNode *Select(SDNode *Node) override;
+
+private:
+  // add select functions here...
+};
+} // end anonymous namespace
+
+SDNode *WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
+  llvm_unreachable("TODO: implement Select");
+}
+
+/// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready
+/// for instruction scheduling.
+FunctionPass *llvm::createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
+                                             CodeGenOpt::Level OptLevel) {
+  return new WebAssemblyDAGToDAGISel(TM, OptLevel);
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
new file mode 100644
index 0000000..4eec02e
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -0,0 +1,63 @@
+//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the WebAssemblyTargetLowering class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyISelLowering.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyTargetMachine.h"
+#include "WebAssemblyTargetObjectFile.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-lower"
+
+WebAssemblyTargetLowering::WebAssemblyTargetLowering(
+    const TargetMachine &TM, const WebAssemblySubtarget &STI)
+    : TargetLowering(TM), Subtarget(&STI) {
+  // WebAssembly does not produce floating-point exceptions on normal floating
+  // point operations.
+  setHasFloatingPointExceptions(false);
+}
+
+//===----------------------------------------------------------------------===//
+// WebAssembly Lowering private implementation.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//                          WebAssembly Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+MCSection *WebAssemblyTargetObjectFile::SelectSectionForGlobal(
+    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+    const TargetMachine &TM) const {
+  return getDataSection();
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
new file mode 100644
index 0000000..efd60a7
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -0,0 +1,49 @@
+//- WebAssemblyISelLowering.h - WebAssembly DAG Lowering Interface -*- C++ -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the interfaces that WebAssembly uses to lower LLVM
+/// code into a selection DAG.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYISELLOWERING_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+namespace WebAssemblyISD {
+
+enum {
+  FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+  // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
+};
+
+} // end namespace WebAssemblyISD
+
+class WebAssemblySubtarget;
+class WebAssemblyTargetMachine;
+
+class WebAssemblyTargetLowering final : public TargetLowering {
+public:
+  WebAssemblyTargetLowering(const TargetMachine &TM,
+                            const WebAssemblySubtarget &STI);
+
+private:
+  /// Keep a pointer to the WebAssemblySubtarget around so that we can make the
+  /// right decision when generating code for different targets.
+  const WebAssemblySubtarget *Subtarget;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
new file mode 100644
index 0000000..35e88ee
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -0,0 +1,46 @@
+// WebAssemblyInstrAtomics.td-WebAssembly Atomic codegen support-*- tablegen -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// WebAssembly Atomic operand code-gen constructs.
+//
+//===----------------------------------------------------------------------===//
+
+// TODO: Implement atomic instructions.
+
+//===----------------------------------------------------------------------===//
+// Atomic fences
+//===----------------------------------------------------------------------===//
+
+// TODO: add atomic fences here...
+
+//===----------------------------------------------------------------------===//
+// Atomic loads
+//===----------------------------------------------------------------------===//
+
+// TODO: add atomic loads here...
+
+//===----------------------------------------------------------------------===//
+// Atomic stores
+//===----------------------------------------------------------------------===//
+
+// TODO: add atomic stores here...
+
+//===----------------------------------------------------------------------===//
+// Low-level exclusive operations
+//===----------------------------------------------------------------------===//
+
+// TODO: add exclusive operations here...
+
+// Load-exclusives.
+
+// Store-exclusives.
+
+// Store-release-exclusives.
+
+// And clear exclusive.
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
new file mode 100644
index 0000000..8bbf3e9
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -0,0 +1,28 @@
+// WebAssemblyInstrFormats.td - WebAssembly Instruction Formats -*- tblgen -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// WebAssembly instruction format definitions.
+//
+//===----------------------------------------------------------------------===//
+
+// WebAssembly Instruction Format
+class WebAssemblyInst<string cstr> : Instruction {
+  field bits<0> Inst; // Instruction encoding.
+  let Namespace   = "WebAssembly";
+  let Pattern     = [];
+  let Constraints = cstr;
+}
+
+// Normal instructions
+class I<dag oops, dag iops, list<dag> pattern, string cstr = "">
+    : WebAssemblyInst<cstr> {
+  dag OutOperandList = oops;
+  dag InOperandList  = iops;
+  let Pattern        = pattern;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
new file mode 100644
index 0000000..ea8937c
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -0,0 +1,28 @@
+//===-- WebAssemblyInstrInfo.cpp - WebAssembly Instruction Information ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the WebAssembly implementation of the
+/// TargetInstrInfo class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyInstrInfo.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-instr-info"
+
+WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI)
+    : RI(STI.getTargetTriple()) {}
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
new file mode 100644
index 0000000..1c4ae22
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -0,0 +1,37 @@
+//=- WebAssemblyInstrInfo.h - WebAssembly Instruction Information -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the WebAssembly implementation of the
+/// TargetInstrInfo class.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYINSTRINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYINSTRINFO_H
+
+#include "WebAssemblyRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+
+class WebAssemblySubtarget;
+
+class WebAssemblyInstrInfo final {
+  const WebAssemblyRegisterInfo RI;
+
+public:
+  explicit WebAssemblyInstrInfo(const WebAssemblySubtarget &STI);
+
+  const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
new file mode 100644
index 0000000..142eccf
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -0,0 +1,46 @@
+// WebAssemblyInstrInfo.td-Describe the WebAssembly Instructions-*- tablegen -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// WebAssembly Instruction definitions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// WebAssembly Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+
+def HasAddr32 : Predicate<"!Subtarget->hasAddr64()">;
+def HasAddr64 : Predicate<"Subtarget->hasAddr64()">;
+def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
+                           AssemblerPredicate<"FeatureSIMD128", "simd128">;
+
+//===----------------------------------------------------------------------===//
+// WebAssembly-specific DAG Node Types.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// WebAssembly-specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// WebAssembly-specific Operands.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// WebAssembly Instruction Format Definitions.
+//===----------------------------------------------------------------------===//
+
+include "WebAssemblyInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Additional sets of instructions.
+//===----------------------------------------------------------------------===//
+
+include "WebAssemblyInstrAtomics.td"
+include "WebAssemblyInstrSIMD.td"
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
new file mode 100644
index 0000000..e25483a
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -0,0 +1,15 @@
+// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// WebAssembly SIMD operand code-gen constructs.
+//
+//===----------------------------------------------------------------------===//
+
+// TODO: Implement SIMD instructions.
+// Note: use Requires<[HasSIMD128]>.
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
new file mode 100644
index 0000000..542d984
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -0,0 +1,19 @@
+//=- WebAssemblyMachineFunctionInfo.cpp - WebAssembly Machine Function Info -=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements WebAssembly-specific per-machine-function
+/// information.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyMachineFunctionInfo.h"
+using namespace llvm;
+
+WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() {}
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
new file mode 100644
index 0000000..fc5e910
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -0,0 +1,37 @@
+// WebAssemblyMachineFuctionInfo.h-WebAssembly machine function info -*- C++ -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares WebAssembly-specific per-machine-function
+/// information.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H
+
+#include "WebAssemblyRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+namespace llvm {
+
+/// This class is derived from MachineFunctionInfo and contains private
+/// WebAssembly-specific information for each MachineFunction.
+class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
+  MachineFunction &MF;
+
+public:
+  explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
+  ~WebAssemblyFunctionInfo() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
new file mode 100644
index 0000000..ad24c90
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -0,0 +1,33 @@
+//===-- WebAssemblyRegisterInfo.cpp - WebAssembly Register Information ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the WebAssembly implementation of the
+/// TargetRegisterInfo class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyRegisterInfo.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyFrameLowering.h"
+#include "WebAssemblyInstrInfo.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-reg-info"
+
+WebAssemblyRegisterInfo::WebAssemblyRegisterInfo(const Triple &TT) : TT(TT) {}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
new file mode 100644
index 0000000..5530028
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
@@ -0,0 +1,35 @@
+// WebAssemblyRegisterInfo.h - WebAssembly Register Information Impl -*- C++ -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the WebAssembly implementation of the
+/// WebAssemblyRegisterInfo class.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H
+
+namespace llvm {
+
+class MachineFunction;
+class RegScavenger;
+class TargetRegisterClass;
+class Triple;
+
+class WebAssemblyRegisterInfo final {
+  const Triple &TT;
+
+public:
+  explicit WebAssemblyRegisterInfo(const Triple &TT);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
new file mode 100644
index 0000000..7b3d636
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -0,0 +1,28 @@
+//WebAssemblyRegisterInfo.td-Describe the WebAssembly Registers -*- tablegen -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the WebAssembly register classes and some nominal
+// physical registers.
+//
+//===----------------------------------------------------------------------===//
+
+class WebAssemblyReg<string n> : Register<n> {
+  let Namespace = "WebAssembly";
+}
+
+class WebAssemblyRegClass<list<ValueType> regTypes, int alignment, dag regList>
+     : RegisterClass<"WebAssembly", regTypes, alignment, regList>;
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Register classes
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
new file mode 100644
index 0000000..cfd1baf
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- WebAssemblySelectionDAGInfo.cpp - WebAssembly SelectionDAG Info ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the WebAssemblySelectionDAGInfo class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyTargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-selectiondag-info"
+
+WebAssemblySelectionDAGInfo::WebAssemblySelectionDAGInfo(const DataLayout *DL)
+    : TargetSelectionDAGInfo(DL) {}
+
+WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() {}
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
new file mode 100644
index 0000000..03e8d39
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//=- WebAssemblySelectionDAGInfo.h - WebAssembly SelectionDAG Info -*- C++ -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the WebAssembly subclass for
+/// TargetSelectionDAGInfo.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class WebAssemblySelectionDAGInfo final : public TargetSelectionDAGInfo {
+public:
+  explicit WebAssemblySelectionDAGInfo(const DataLayout *DL);
+  ~WebAssemblySelectionDAGInfo() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
new file mode 100644
index 0000000..addea8e
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -0,0 +1,48 @@
+//===-- WebAssemblySubtarget.cpp - WebAssembly Subtarget Information ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the WebAssembly-specific subclass of
+/// TargetSubtarget.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyInstrInfo.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-subtarget"
+
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "WebAssemblyGenSubtargetInfo.inc"
+
+WebAssemblySubtarget &
+WebAssemblySubtarget::initializeSubtargetDependencies(StringRef FS) {
+  // Determine default and user-specified characteristics
+
+  if (CPUString.empty())
+    CPUString = "generic";
+
+  ParseSubtargetFeatures(CPUString, FS);
+  return *this;
+}
+
+WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
+                                           const std::string &CPU,
+                                           const std::string &FS,
+                                           const TargetMachine &TM)
+    : WebAssemblyGenSubtargetInfo(TT, CPU, FS), HasSIMD128(false),
+      CPUString(CPU), TargetTriple(TT), FrameLowering(),
+      InstrInfo(initializeSubtargetDependencies(FS)),
+      TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
+
+bool WebAssemblySubtarget::enableMachineScheduler() const { return true; }
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.h b/lib/Target/WebAssembly/WebAssemblySubtarget.h
new file mode 100644
index 0000000..6f17619
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -0,0 +1,79 @@
+//=- WebAssemblySubtarget.h - Define Subtarget for the WebAssembly -*- C++ -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares the WebAssembly-specific subclass of
+/// TargetSubtarget.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSUBTARGET_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSUBTARGET_H
+
+#include "WebAssemblyFrameLowering.h"
+#include "WebAssemblyISelLowering.h"
+#include "WebAssemblyInstrInfo.h"
+#include "WebAssemblySelectionDAGInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "WebAssemblyGenSubtargetInfo.inc"
+
+namespace llvm {
+
+class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
+  bool HasSIMD128;
+
+  /// String name of used CPU.
+  std::string CPUString;
+
+  /// What processor and OS we're targeting.
+  Triple TargetTriple;
+
+  WebAssemblyFrameLowering FrameLowering;
+  WebAssemblyInstrInfo InstrInfo;
+  WebAssemblySelectionDAGInfo TSInfo;
+  WebAssemblyTargetLowering TLInfo;
+
+  /// Initializes using CPUString and the passed in feature string so that we
+  /// can use initializer lists for subtarget initialization.
+  WebAssemblySubtarget &initializeSubtargetDependencies(StringRef FS);
+
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  WebAssemblySubtarget(const Triple &TT, const std::string &CPU,
+                       const std::string &FS, const TargetMachine &TM);
+
+  const WebAssemblySelectionDAGInfo *getSelectionDAGInfo() const override {
+    return &TSInfo;
+  }
+  const WebAssemblyFrameLowering *getFrameLowering() const override {
+    return &FrameLowering;
+  }
+  const WebAssemblyTargetLowering *getTargetLowering() const override {
+    return &TLInfo;
+  }
+  const Triple &getTargetTriple() const { return TargetTriple; }
+  bool enableMachineScheduler() const override;
+  bool useAA() const override { return true; }
+
+  // Predicates used by WebAssemblyInstrInfo.td.
+  bool hasAddr64() const { return TargetTriple.isArch64Bit(); }
+  bool hasSIMD128() const { return HasSIMD128; }
+
+  /// Parses features string setting specified subtarget options. Definition of
+  /// function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
new file mode 100644
index 0000000..6f93248
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -0,0 +1,173 @@
+//===- WebAssemblyTargetMachine.cpp - Define TargetMachine for WebAssembly -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the WebAssembly-specific subclass of TargetMachine.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyTargetMachine.h"
+#include "WebAssemblyTargetObjectFile.h"
+#include "WebAssemblyTargetTransformInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm"
+
+extern "C" void LLVMInitializeWebAssemblyTarget() {
+  // Register the target.
+  RegisterTargetMachine<WebAssemblyTargetMachine> X(TheWebAssemblyTarget32);
+  RegisterTargetMachine<WebAssemblyTargetMachine> Y(TheWebAssemblyTarget64);
+}
+
+//===----------------------------------------------------------------------===//
+// WebAssembly Lowering public interface.
+//===----------------------------------------------------------------------===//
+
+/// Create an WebAssembly architecture model.
+///
+WebAssemblyTargetMachine::WebAssemblyTargetMachine(
+    const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL)
+    : LLVMTargetMachine(T, TT.isArch64Bit()
+                               ? "e-p:64:64-i64:64-v128:8:128-n32:64-S128"
+                               : "e-p:32:32-i64:64-v128:8:128-n32:64-S128",
+                        TT, CPU, FS, Options, RM, CM, OL),
+      TLOF(make_unique<WebAssemblyTargetObjectFile>()) {
+  initAsmInfo();
+
+  // We need a reducible CFG, so disable some optimizations which tend to
+  // introduce irreducibility.
+  setRequiresStructuredCFG(true);
+}
+
+WebAssemblyTargetMachine::~WebAssemblyTargetMachine() {}
+
+const WebAssemblySubtarget *
+WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const {
+  Attribute CPUAttr = F.getFnAttribute("target-cpu");
+  Attribute FSAttr = F.getFnAttribute("target-features");
+
+  std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+                        ? CPUAttr.getValueAsString().str()
+                        : TargetCPU;
+  std::string FS = !FSAttr.hasAttribute(Attribute::None)
+                       ? FSAttr.getValueAsString().str()
+                       : TargetFS;
+
+  auto &I = SubtargetMap[CPU + FS];
+  if (!I) {
+    // This needs to be done before we create a new subtarget since any
+    // creation will depend on the TM and the code generation flags on the
+    // function that reside in TargetOptions.
+    resetTargetOptions(F);
+    I = make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
+  }
+  return I.get();
+}
+
+namespace {
+/// WebAssembly Code Generator Pass Configuration Options.
+class WebAssemblyPassConfig final : public TargetPassConfig {
+public:
+  WebAssemblyPassConfig(WebAssemblyTargetMachine *TM, PassManagerBase &PM)
+      : TargetPassConfig(TM, PM) {}
+
+  WebAssemblyTargetMachine &getWebAssemblyTargetMachine() const {
+    return getTM<WebAssemblyTargetMachine>();
+  }
+
+  FunctionPass *createTargetRegisterAllocator(bool) override;
+  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
+  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
+
+  void addIRPasses() override;
+  bool addPreISel() override;
+  bool addInstSelector() override;
+  bool addILPOpts() override;
+  void addPreRegAlloc() override;
+  void addRegAllocPasses(bool Optimized);
+  void addPostRegAlloc() override;
+  void addPreSched2() override;
+  void addPreEmitPass() override;
+};
+} // end anonymous namespace
+
+TargetIRAnalysis WebAssemblyTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis([this](Function &F) {
+    return TargetTransformInfo(WebAssemblyTTIImpl(this, F));
+  });
+}
+
+TargetPassConfig *
+WebAssemblyTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new WebAssemblyPassConfig(this, PM);
+}
+
+FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
+  return nullptr; // No reg alloc
+}
+
+void WebAssemblyPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+  assert(!RegAllocPass && "WebAssembly uses no regalloc!");
+  addRegAllocPasses(false);
+}
+
+void WebAssemblyPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+  assert(!RegAllocPass && "WebAssembly uses no regalloc!");
+  addRegAllocPasses(true);
+}
+
+//===----------------------------------------------------------------------===//
+// The following functions are called from lib/CodeGen/Passes.cpp to modify
+// the CodeGen pass sequence.
+//===----------------------------------------------------------------------===//
+
+void WebAssemblyPassConfig::addIRPasses() {
+  // FIXME: the default for this option is currently POSIX, whereas
+  // WebAssembly's MVP should default to Single.
+  if (TM->Options.ThreadModel == ThreadModel::Single)
+    addPass(createLowerAtomicPass());
+  else
+    // Expand some atomic operations. WebAssemblyTargetLowering has hooks which
+    // control specifically what gets lowered.
+    addPass(createAtomicExpandPass(TM));
+
+  TargetPassConfig::addIRPasses();
+}
+
+bool WebAssemblyPassConfig::addPreISel() { return false; }
+
+bool WebAssemblyPassConfig::addInstSelector() {
+  addPass(
+      createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel()));
+  return false;
+}
+
+bool WebAssemblyPassConfig::addILPOpts() { return true; }
+
+void WebAssemblyPassConfig::addPreRegAlloc() {}
+
+void WebAssemblyPassConfig::addRegAllocPasses(bool Optimized) {}
+
+void WebAssemblyPassConfig::addPostRegAlloc() {}
+
+void WebAssemblyPassConfig::addPreSched2() {}
+
+void WebAssemblyPassConfig::addPreEmitPass() {}
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
new file mode 100644
index 0000000..3226edc
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
@@ -0,0 +1,51 @@
+// WebAssemblyTargetMachine.h - Define TargetMachine for WebAssembly -*- C++ -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares the WebAssembly-specific subclass of
+/// TargetMachine.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETMACHINE_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETMACHINE_H
+
+#include "WebAssemblySubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class WebAssemblyTargetMachine final : public LLVMTargetMachine {
+  std::unique_ptr<TargetLoweringObjectFile> TLOF;
+  mutable StringMap<std::unique_ptr<WebAssemblySubtarget>> SubtargetMap;
+
+public:
+  WebAssemblyTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                           StringRef FS, const TargetOptions &Options,
+                           Reloc::Model RM, CodeModel::Model CM,
+                           CodeGenOpt::Level OL);
+
+  ~WebAssemblyTargetMachine() override;
+  const WebAssemblySubtarget *
+  getSubtargetImpl(const Function &F) const override;
+
+  // Pass Pipeline Configuration
+  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+  TargetLoweringObjectFile *getObjFileLowering() const override {
+    return TLOF.get();
+  }
+
+  /// \brief Get the TargetIRAnalysis for this target.
+  TargetIRAnalysis getTargetIRAnalysis() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
new file mode 100644
index 0000000..ee78b94
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
@@ -0,0 +1,67 @@
+//===-- WebAssemblyTargetObjectFile.h - WebAssembly Object Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares the WebAssembly-specific subclass of
+/// TargetLoweringObjectFile.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+class GlobalVariable;
+
+class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFile {
+public:
+  WebAssemblyTargetObjectFile() {
+    TextSection = nullptr;
+    DataSection = nullptr;
+    BSSSection = nullptr;
+    ReadOnlySection = nullptr;
+
+    StaticCtorSection = nullptr;
+    StaticDtorSection = nullptr;
+    LSDASection = nullptr;
+    EHFrameSection = nullptr;
+    DwarfAbbrevSection = nullptr;
+    DwarfInfoSection = nullptr;
+    DwarfLineSection = nullptr;
+    DwarfFrameSection = nullptr;
+    DwarfPubTypesSection = nullptr;
+    DwarfDebugInlineSection = nullptr;
+    DwarfStrSection = nullptr;
+    DwarfLocSection = nullptr;
+    DwarfARangesSection = nullptr;
+    DwarfRangesSection = nullptr;
+  }
+
+  MCSection *getSectionForConstant(SectionKind Kind,
+                                   const Constant *C) const override {
+    return ReadOnlySection;
+  }
+
+  MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                                      Mangler &Mang,
+                                      const TargetMachine &TM) const override {
+    return DataSection;
+  }
+
+  MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                                    Mangler &Mang,
+                                    const TargetMachine &TM) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
new file mode 100644
index 0000000..fa88ed5
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -0,0 +1,28 @@
+//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the WebAssembly-specific TargetTransformInfo
+/// implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyTargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasmtti"
+
+TargetTransformInfo::PopcntSupportKind
+WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) {
+  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+  // TODO: Make Math.popcount32 happen in WebAssembly.
+  return TTI::PSK_Software;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
new file mode 100644
index 0000000..08bd88c
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -0,0 +1,87 @@
+//==- WebAssemblyTargetTransformInfo.h - WebAssembly-specific TTI -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file a TargetTransformInfo::Concept conforming object specific
+/// to the WebAssembly target machine.
+///
+/// It uses the target's detailed information to provide more precise answers to
+/// certain TTI queries, while letting the target independent and default TTI
+/// implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETTRANSFORMINFO_H
+
+#include "WebAssemblyTargetMachine.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include <algorithm>
+
+namespace llvm {
+
+class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
+  typedef BasicTTIImplBase<WebAssemblyTTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const WebAssemblyTargetMachine *TM;
+  const WebAssemblySubtarget *ST;
+  const WebAssemblyTargetLowering *TLI;
+
+  const WebAssemblySubtarget *getST() const { return ST; }
+  const WebAssemblyTargetLowering *getTLI() const { return TLI; }
+
+public:
+  WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, Function &F)
+      : BaseT(TM), TM(TM), ST(TM->getSubtargetImpl(F)),
+        TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  WebAssemblyTTIImpl(const WebAssemblyTTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), TM(Arg.TM), ST(Arg.ST),
+        TLI(Arg.TLI) {}
+  WebAssemblyTTIImpl(WebAssemblyTTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), TM(std::move(Arg.TM)),
+        ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {}
+  WebAssemblyTTIImpl &operator=(const WebAssemblyTTIImpl &RHS) {
+    BaseT::operator=(static_cast<const BaseT &>(RHS));
+    TM = RHS.TM;
+    ST = RHS.ST;
+    TLI = RHS.TLI;
+    return *this;
+  }
+  WebAssemblyTTIImpl &operator=(WebAssemblyTTIImpl &&RHS) {
+    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+    TM = std::move(RHS.TM);
+    ST = std::move(RHS.ST);
+    TLI = std::move(RHS.TLI);
+    return *this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+
+  // TODO: Implement more Scalar TTI for WebAssembly
+
+  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
+  /// @}
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  // TODO: Implement Vector TTI for WebAssembly
+
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index 6ba897b..9eee4a0 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -1080,4 +1080,4 @@ CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
   return new X86AsmInstrumentation(STI);
 }
 
-} // namespace llvm
+} // End llvm namespace
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 341fc81..19ebcc4 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -61,6 +61,6 @@ protected:
   unsigned InitialFrameReg;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index b3066ef..7ec0240 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -238,18 +238,34 @@ struct X86Operand : public MCParsedAsmOperand {
     return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
       getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
   }
+  bool isMemVX32X() const {
+    return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
+      getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM31;
+  }
   bool isMemVY32() const {
     return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
       getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
   }
+  bool isMemVY32X() const {
+    return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
+      getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM31;
+  }
   bool isMemVX64() const {
     return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
       getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
   }
+  bool isMemVX64X() const {
+    return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
+      getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM31;
+  }
   bool isMemVY64() const {
     return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
       getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
   }
+  bool isMemVY64X() const {
+    return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
+      getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM31;
+  }
   bool isMemVZ32() const {
     return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
       getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 5b53fbe..cfc3ee2 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -69,7 +69,7 @@ namespace X86 {
 
 extern Target TheX86_32Target, TheX86_64Target;
 
-} // namespace llvm
+}
 
 static bool translateInstruction(MCInst &target,
                                 InternalInstruction &source,
@@ -551,9 +551,15 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
   case TYPE_REL8:
     isBranch = true;
     pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
-    if(immediate & 0x80)
+    if (immediate & 0x80)
       immediate |= ~(0xffull);
     break;
+  case TYPE_REL16:
+    isBranch = true;
+    pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
+    if (immediate & 0x8000)
+      immediate |= ~(0xffffull);
+    break;
   case TYPE_REL32:
   case TYPE_REL64:
     isBranch = true;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index d990bf3..f73fa75 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -1165,35 +1165,30 @@ static int readSIB(struct InternalInstruction* insn) {
     return -1;
 
   index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+
+  // FIXME: The fifth bit (bit index 4) is only to be used for instructions
+  // that understand VSIB indexing. ORing the bit in here is mildy dangerous
+  // because performing math on an 'enum SIBIndex' can produce garbage.
+  // Excluding the "none" value, it should cover 6 spaces of register names:
+  //   - 16 possibilities for 16-bit GPR starting at SIB_INDEX_BX_SI
+  //   - 16 possibilities for 32-bit GPR starting at SIB_INDEX_EAX
+  //   - 16 possibilities for 64-bit GPR starting at SIB_INDEX_RAX
+  //   - 32 possibilities for each of XMM, YMM, ZMM registers
+  // When sibIndexBase gets assigned SIB_INDEX_RAX as it does in 64-bit mode,
+  // summing in a fully decoded index between 0 and 31 can end up with a value
+  // that looks like something in the low half of the XMM range.
+  // translateRMMemory() tries to reverse the damage, with only partial success,
+  // as evidenced by known bugs in "test/MC/Disassembler/X86/x86-64.txt"
   if (insn->vectorExtensionType == TYPE_EVEX)
     index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4;
 
-  switch (index) {
-  case 0x4:
+  if (index == 0x4) {
     insn->sibIndex = SIB_INDEX_NONE;
-    break;
-  default:
+  } else {
     insn->sibIndex = (SIBIndex)(sibIndexBase + index);
-    if (insn->sibIndex == SIB_INDEX_sib ||
-        insn->sibIndex == SIB_INDEX_sib64)
-      insn->sibIndex = SIB_INDEX_NONE;
-    break;
   }
 
-  switch (scaleFromSIB(insn->sib)) {
-  case 0:
-    insn->sibScale = 1;
-    break;
-  case 1:
-    insn->sibScale = 2;
-    break;
-  case 2:
-    insn->sibScale = 4;
-    break;
-  case 3:
-    insn->sibScale = 8;
-    break;
-  }
+  insn->sibScale = 1 << scaleFromSIB(insn->sib);
 
   base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
 
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index ac484f3..62b6b73 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -140,6 +140,6 @@ public:
 private:
   bool HasCustomInstComment;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 2bee518..6e371da 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -159,6 +159,6 @@ public:
   }
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 2d85f84..3e0dc14 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -29,13 +29,6 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-// Option to allow disabling arithmetic relaxation to workaround PR9807, which
-// is useful when running bitwise comparison experiments on Darwin. We should be
-// able to remove this once PR9807 is resolved.
-static cl::opt<bool>
-MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
-         cl::desc("Disable relaxation of arithmetic instruction for X86"));
-
 static unsigned getFixupKindLog2Size(unsigned Kind) {
   switch (Kind) {
   default:
@@ -243,29 +236,18 @@ bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
   if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
     return true;
 
-  if (MCDisableArithRelaxation)
-    return false;
-
   // Check if this instruction is ever relaxable.
   if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
     return false;
 
 
-  // Check if it has an expression and is not RIP relative.
-  bool hasExp = false;
-  bool hasRIP = false;
-  for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
-    const MCOperand &Op = Inst.getOperand(i);
-    if (Op.isExpr())
-      hasExp = true;
-
-    if (Op.isReg() && Op.getReg() == X86::RIP)
-      hasRIP = true;
-  }
+  // Check if the relaxable operand has an expression. For the current set of
+  // relaxable instructions, the relaxable operand is always the last operand.
+  unsigned RelaxableOp = Inst.getNumOperands() - 1;
+  if (Inst.getOperand(RelaxableOp).isExpr())
+    return true;
 
-  // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on
-  // how we do relaxations?
-  return hasExp && !hasRIP;
+  return false;
 }
 
 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
@@ -426,7 +408,7 @@ namespace CU {
     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
   };
 
-} // namespace CU
+} // end CU namespace
 
 class DarwinX86AsmBackend : public X86AsmBackend {
   const MCRegisterInfo &MRI;
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 69e9c7b..f0d00b0 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -41,7 +41,7 @@ namespace X86 {
     /// AddrNumOperands - Total number of operands in a memory reference.
     AddrNumOperands = 5
   };
-} // namespace X86
+} // end namespace X86;
 
 /// X86II - This namespace holds all of the target specific flags that
 /// instruction info tracks.
@@ -213,11 +213,7 @@ namespace X86II {
     /// the offset from beginning of section.
     ///
     /// This is the TLS offset for the COFF/Windows TLS mechanism.
-    MO_SECREL,
-
-    /// MO_NOPREFIX - On a symbol operand this indicates that the symbol should
-    /// not be mangled with a prefix.
-    MO_NOPREFIX,
+    MO_SECREL
   };
 
   enum : uint64_t {
@@ -762,8 +758,8 @@ namespace X86II {
     return (reg == X86::SPL || reg == X86::BPL ||
             reg == X86::SIL || reg == X86::DIL);
   }
-} // namespace X86II
+}
 
-} // namespace llvm
+} // end namespace llvm;
 
 #endif
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 512afeb..a33468d 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -28,7 +28,7 @@ namespace {
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
   };
-} // namespace
+}
 
 X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
                                        uint16_t EMachine)
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index 7c09e5d..89f3945 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -26,14 +26,17 @@ public:
   X86_64ELFRelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {}
 
   const MCExpr *createExprForRelocation(RelocationRef Rel) override {
-    uint64_t RelType; Rel.getType(RelType);
-    symbol_iterator SymI = Rel.getSymbol();
+    uint64_t RelType = Rel.getType();
+    elf_symbol_iterator SymI = Rel.getSymbol();
+
+    ErrorOr<StringRef> SymNameOrErr = SymI->getName();
+    if (std::error_code EC = SymNameOrErr.getError())
+      report_fatal_error(EC.message());
+    StringRef SymName = *SymNameOrErr;
 
-    StringRef SymName; SymI->getName(SymName);
     uint64_t  SymAddr; SymI->getAddress(SymAddr);
     uint64_t SymSize = SymI->getSize();
-    auto *Obj = cast<ELFObjectFileBase>(Rel.getObjectFile());
-    int64_t Addend = *Obj->getRelocationAddend(Rel.getRawDataRefImpl());
+    int64_t Addend = *ELFRelocationRef(Rel).getAddend();
 
     MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
     // FIXME: check that the value is actually the same.
diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index a523a32..4899900 100644
--- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
+++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -28,7 +28,7 @@ enum Fixups {
   LastTargetFixupKind,
   NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
 };
-} // namespace X86
-} // namespace llvm
+}
+}
 
 #endif
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 020803b..6221bab 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -62,7 +62,7 @@ void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
 /// do not need to go through TargetRegistry.
 MCSubtargetInfo *createX86MCSubtargetInfo(const Triple &TT, StringRef CPU,
                                           StringRef FS);
-} // namespace X86_MC
+}
 
 MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
                                       const MCRegisterInfo &MRI,
@@ -98,7 +98,7 @@ MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx);
 
 /// Construct X86-64 ELF relocation info.
 MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx);
-} // namespace llvm
+} // End llvm namespace
 
 
 // Defines symbolic names for X86 registers.  This defines a mapping from
diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index a5aadd6..c9479b6 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -25,12 +25,15 @@ public:
   X86_64MachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {}
 
   const MCExpr *createExprForRelocation(RelocationRef Rel) override {
-    const MachOObjectFile *Obj = cast<MachOObjectFile>(Rel.getObjectFile());
+    const MachOObjectFile *Obj = cast<MachOObjectFile>(Rel.getObject());
 
-    uint64_t RelType; Rel.getType(RelType);
+    uint64_t RelType = Rel.getType();
     symbol_iterator SymI = Rel.getSymbol();
 
-    StringRef SymName; SymI->getName(SymName);
+    ErrorOr<StringRef> SymNameOrErr = SymI->getName();
+    if (std::error_code EC = SymNameOrErr.getError())
+      report_fatal_error(EC.message());
+    StringRef SymName = *SymNameOrErr;
     uint64_t  SymAddr; SymI->getAddress(SymAddr);
 
     any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl());
@@ -89,10 +92,11 @@ public:
         symbol_iterator RSymI = Rel.getSymbol();
         uint64_t RSymAddr;
         RSymI->getAddress(RSymAddr);
-        StringRef RSymName;
-        RSymI->getName(RSymName);
+        ErrorOr<StringRef> RSymName = RSymI->getName();
+        if (std::error_code EC = RSymName.getError())
+          report_fatal_error(EC.message());
 
-        MCSymbol *RSym = Ctx.getOrCreateSymbol(RSymName);
+        MCSymbol *RSym = Ctx.getOrCreateSymbol(*RSymName);
         if (!RSym->isVariable())
           RSym->setVariableValue(MCConstantExpr::create(RSymAddr, Ctx));
 
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 773fbf4..9e801fc 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -69,7 +69,7 @@ public:
                           FixedValue);
   }
 };
-} // namespace
+}
 
 static bool isFixupKindRIPRel(unsigned Kind) {
   return Kind == X86::reloc_riprel_4byte ||
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 7d262cd..bd1bc99 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -31,7 +31,7 @@ namespace {
                           bool IsCrossSection,
                           const MCAsmBackend &MAB) const override;
   };
-} // namespace
+}
 
 X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit)
     : MCWinCOFFObjectTargetWriter(Is64Bit ? COFF::IMAGE_FILE_MACHINE_AMD64
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index dc6dd66..92f42b6 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -46,7 +46,7 @@ void X86WinCOFFStreamer::FinishImpl() {
 
   MCWinCOFFStreamer::FinishImpl();
 }
-} // namespace
+}
 
 MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
                                            raw_pwrite_stream &OS,
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 1e7d942..ef3318b 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -431,4 +431,4 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
   for (unsigned i = 1; i < NumElts; i++)
     Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
 }
-} // namespace llvm
+} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 0139297..14b6943 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -100,6 +100,6 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
 /// \brief Decode a scalar float move instruction as a shuffle mask.
 void DecodeScalarMoveMask(MVT VT, bool IsLoad,
                           SmallVectorImpl<int> &ShuffleMask);
-} // namespace llvm
+} // llvm namespace
 
 #endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 80f4579..8403ae6 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -80,6 +80,6 @@ FunctionPass *createX86WinEHStatePass();
 /// must run after prologue/epilogue insertion and before lowering
 /// the MachineInstr to MC.
 FunctionPass *createX86ExpandPseudoPass();
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 2051401..ba33248 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -581,34 +581,6 @@ MCSymbol *X86AsmPrinter::GetCPISymbol(unsigned CPID) const {
   return AsmPrinter::GetCPISymbol(CPID);
 }
 
-void X86AsmPrinter::GenerateExportDirective(const MCSymbol *Sym, bool IsData) {
-  SmallString<128> Directive;
-  raw_svector_ostream OS(Directive);
-  StringRef Name = Sym->getName();
-  const Triple &TT = TM.getTargetTriple();
-
-  if (TT.isKnownWindowsMSVCEnvironment())
-    OS << " /EXPORT:";
-  else
-    OS << " -export:";
-
-  if ((TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) &&
-      (Name[0] == getDataLayout().getGlobalPrefix()))
-    Name = Name.drop_front();
-
-  OS << Name;
-
-  if (IsData) {
-    if (TT.isKnownWindowsMSVCEnvironment())
-      OS << ",DATA";
-    else
-      OS << ",data";
-  }
-
-  OS.flush();
-  OutStreamer->EmitBytes(Directive);
-}
-
 void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
   const Triple &TT = TM.getTargetTriple();
 
@@ -692,39 +664,28 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
   }
 
   if (TT.isOSBinFormatCOFF()) {
-    // Necessary for dllexport support
-    std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
+    const TargetLoweringObjectFileCOFF &TLOFCOFF =
+        static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
 
-    for (const auto &Function : M)
-      if (Function.hasDLLExportStorageClass() && !Function.isDeclaration())
-        DLLExportedFns.push_back(getSymbol(&Function));
+    std::string Flags;
+    raw_string_ostream FlagsOS(Flags);
 
+    for (const auto &Function : M)
+      TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Function, *Mang);
     for (const auto &Global : M.globals())
-      if (Global.hasDLLExportStorageClass() && !Global.isDeclaration())
-        DLLExportedGlobals.push_back(getSymbol(&Global));
-
-    for (const auto &Alias : M.aliases()) {
-      if (!Alias.hasDLLExportStorageClass())
-        continue;
-
-      if (Alias.getType()->getElementType()->isFunctionTy())
-        DLLExportedFns.push_back(getSymbol(&Alias));
-      else
-        DLLExportedGlobals.push_back(getSymbol(&Alias));
-    }
+      TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Global, *Mang);
+    for (const auto &Alias : M.aliases())
+      TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Alias, *Mang);
 
-    // Output linker support code for dllexported globals on windows.
-    if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
-      const TargetLoweringObjectFileCOFF &TLOFCOFF =
-        static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+    FlagsOS.flush();
 
+    // Output collected flags.
+    if (!Flags.empty()) {
       OutStreamer->SwitchSection(TLOFCOFF.getDrectveSection());
-
-      for (auto & Symbol : DLLExportedGlobals)
-        GenerateExportDirective(Symbol, /*IsData=*/true);
-      for (auto & Symbol : DLLExportedFns)
-        GenerateExportDirective(Symbol, /*IsData=*/false);
+      OutStreamer->EmitBytes(Flags);
     }
+
+    SM.serializeToStackMapSection();
   }
 
   if (TT.isOSBinFormatELF()) {
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index acba211..7f5d127 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -30,8 +30,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   StackMaps SM;
   FaultMaps FM;
 
-  void GenerateExportDirective(const MCSymbol *Sym, bool IsData);
-
   // This utility class tracks the length of a stackmap instruction's 'shadow'.
   // It is used by the X86AsmPrinter to ensure that the stackmap shadow
   // invariants (i.e. no other stackmaps, patchpoints, or control flow within
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp
index 6d6831b..031ba4b 100644
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -78,7 +78,7 @@ private:
   typedef DenseMap<MachineInstr *, CallContext> ContextMap;
 
   bool isLegal(MachineFunction &MF);
-  
+
   bool isProfitable(MachineFunction &MF, ContextMap &CallSeqMap);
 
   void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -90,6 +90,13 @@ private:
   MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
                                    unsigned Reg);
 
+  enum InstClassification { Convert, Skip, Exit };
+
+  InstClassification classifyInstruction(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                         const X86RegisterInfo &RegInfo,
+                                         DenseSet<unsigned int> &UsedRegs);
+
   const char *getPassName() const override { return "X86 Optimize Call Frame"; }
 
   const TargetInstrInfo *TII;
@@ -99,13 +106,13 @@ private:
 };
 
 char X86CallFrameOptimization::ID = 0;
-} // namespace
+}
 
 FunctionPass *llvm::createX86CallFrameOptimization() {
   return new X86CallFrameOptimization();
 }
 
-// This checks whether the transformation is legal. 
+// This checks whether the transformation is legal.
 // Also returns false in cases where it's potentially legal, but
 // we don't even want to try.
 bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
@@ -170,9 +177,8 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
   if (!OptForSize)
     return false;
 
-
   unsigned StackAlign = TFL->getStackAlignment();
-  
+
   int64_t Advantage = 0;
   for (auto CC : CallSeqMap) {
     // Call sites where no parameters are passed on the stack
@@ -205,7 +211,6 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
   return (Advantage >= 0);
 }
 
-
 bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getSubtarget().getInstrInfo();
   TFL = MF.getSubtarget().getFrameLowering();
@@ -237,6 +242,64 @@ bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
+X86CallFrameOptimization::InstClassification
+X86CallFrameOptimization::classifyInstruction(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+    const X86RegisterInfo &RegInfo, DenseSet<unsigned int> &UsedRegs) {
+  if (MI == MBB.end())
+    return Exit;
+
+  // The instructions we actually care about are movs onto the stack
+  int Opcode = MI->getOpcode();
+  if (Opcode == X86::MOV32mi || Opcode == X86::MOV32mr)
+    return Convert;
+
+  // Not all calling conventions have only stack MOVs between the stack
+  // adjust and the call.
+
+  // We want to tolerate other instructions, to cover more cases.
+  // In particular:
+  // a) PCrel calls, where we expect an additional COPY of the basereg.
+  // b) Passing frame-index addresses.
+  // c) Calling conventions that have inreg parameters. These generate
+  //    both copies and movs into registers.
+  // To avoid creating lots of special cases, allow any instruction
+  // that does not write into memory, does not def or use the stack
+  // pointer, and does not def any register that was used by a preceding
+  // push.
+  // (Reading from memory is allowed, even if referenced through a
+  // frame index, since these will get adjusted properly in PEI)
+
+  // The reason for the last condition is that the pushes can't replace
+  // the movs in place, because the order must be reversed.
+  // So if we have a MOV32mr that uses EDX, then an instruction that defs
+  // EDX, and then the call, after the transformation the push will use
+  // the modified version of EDX, and not the original one.
+  // Since we are still in SSA form at this point, we only need to
+  // make sure we don't clobber any *physical* registers that were
+  // used by an earlier mov that will become a push.
+
+  if (MI->isCall() || MI->mayStore())
+    return Exit;
+
+  for (const MachineOperand &MO : MI->operands()) {
+    if (!MO.isReg())
+      continue;
+    unsigned int Reg = MO.getReg();
+    if (!RegInfo.isPhysicalRegister(Reg))
+      continue;
+    if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister()))
+      return Exit;
+    if (MO.isDef()) {
+      for (unsigned int U : UsedRegs)
+        if (RegInfo.regsOverlap(Reg, U))
+          return Exit;
+    }
+  }
+
+  return Skip;
+}
+
 void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
                                                MachineBasicBlock &MBB,
                                                MachineBasicBlock::iterator I,
@@ -254,8 +317,8 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
 
   // How much do we adjust the stack? This puts an upper bound on
   // the number of parameters actually passed on it.
-  unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;  
-  
+  unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;
+
   // A zero adjustment means no stack parameters
   if (!MaxAdjust) {
     Context.NoStackParams = true;
@@ -284,11 +347,17 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
   if (MaxAdjust > 4)
     Context.MovVector.resize(MaxAdjust, nullptr);
 
-  do {
-    int Opcode = I->getOpcode();
-    if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
-      break;
+  InstClassification Classification;
+  DenseSet<unsigned int> UsedRegs;
 
+  while ((Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs)) !=
+         Exit) {
+    if (Classification == Skip) {
+      ++I;
+      continue;
+    }
+
+    // We know the instruction is a MOV32mi/MOV32mr.
     // We only want movs of the form:
     // movl imm/r32, k(%esp)
     // If we run into something else, bail.
@@ -323,24 +392,20 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
       return;
     Context.MovVector[StackDisp] = I;
 
-    ++I;
-  } while (I != MBB.end());
-
-  // We now expect the end of the sequence - a call and a stack adjust.
-  if (I == MBB.end())
-    return;
+    for (const MachineOperand &MO : I->uses()) {
+      if (!MO.isReg())
+        continue;
+      unsigned int Reg = MO.getReg();
+      if (RegInfo.isPhysicalRegister(Reg))
+        UsedRegs.insert(Reg);
+    }
 
-  // For PCrel calls, we expect an additional COPY of the basereg.
-  // If we find one, skip it.
-  if (I->isCopy()) {
-    if (I->getOperand(1).getReg() ==
-        MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg())
-      ++I;
-    else
-      return;
+    ++I;
   }
 
-  if (!I->isCall())
+  // We now expect the end of the sequence. If we stopped early,
+  // or reached the end of the block without finding a call, bail.
+  if (I == MBB.end() || !I->isCall())
     return;
 
   Context.Call = I;
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
index a377eb6..0eb2494 100644
--- a/lib/Target/X86/X86CallingConv.h
+++ b/lib/Target/X86/X86CallingConv.h
@@ -42,7 +42,7 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
   return false;
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
 
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 3dc75d7..0264546 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -38,6 +38,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
@@ -2821,7 +2822,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
   bool &IsTailCall    = CLI.IsTailCall;
   bool IsVarArg       = CLI.IsVarArg;
   const Value *Callee = CLI.Callee;
-  const char *SymName = CLI.SymName;
+  MCSymbol *Symbol = CLI.Symbol;
 
   bool Is64Bit        = Subtarget->is64Bit();
   bool IsWin64        = Subtarget->isCallingConvWin64(CC);
@@ -3117,8 +3118,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
     }
 
     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
-    if (SymName)
-      MIB.addExternalSymbol(SymName, OpFlags);
+    if (Symbol)
+      MIB.addSym(Symbol, OpFlags);
     else
       MIB.addGlobalAddress(GV, 0, OpFlags);
   }
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 8305a04..5eb4fae 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -91,7 +91,7 @@ private:
   const X86InstrInfo *TII; // Machine instruction info.
 };
 char FixupLEAPass::ID = 0;
-} // namespace
+}
 
 MachineInstr *
 FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 6f1d8e5..40b9c8a 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -279,7 +279,7 @@ namespace {
     void setKillFlags(MachineBasicBlock &MBB) const;
   };
   char FPS::ID = 0;
-} // namespace
+}
 
 FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
 
@@ -544,7 +544,7 @@ namespace {
       return V < TE.from;
     }
   };
-} // namespace
+}
 
 #ifndef NDEBUG
 static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
@@ -1530,7 +1530,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
     if (Op.isKill())
       moveToTop(FPReg, Inst);
     else
-      duplicateToTop(FPReg, FPReg, Inst);
+      duplicateToTop(FPReg, ScratchFPReg, Inst);
 
     // Emit the call. This will pop the operand.
     BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 2858e86..c274c88 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -153,6 +153,6 @@ private:
                                            bool InEpilogue) const;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index f6785e1..6b23e62 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -67,19 +67,19 @@ namespace {
     const Constant *CP;
     const BlockAddress *BlockAddr;
     const char *ES;
+    MCSymbol *MCSym;
     int JT;
     unsigned Align;    // CP alignment.
     unsigned char SymbolFlags;  // X86II::MO_*
 
     X86ISelAddressMode()
-      : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
-        Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
-        JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {
-    }
+        : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
+          Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
+          MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
 
     bool hasSymbolicDisplacement() const {
       return GV != nullptr || CP != nullptr || ES != nullptr ||
-             JT != -1 || BlockAddr != nullptr;
+             MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
     }
 
     bool hasBaseOrIndexReg() const {
@@ -134,11 +134,16 @@ namespace {
         dbgs() << ES;
       else
         dbgs() << "nul";
+      dbgs() << " MCSym ";
+      if (MCSym)
+        dbgs() << MCSym;
+      else
+        dbgs() << "nul";
       dbgs() << " JT" << JT << " Align" << Align << '\n';
     }
 #endif
   };
-} // namespace
+}
 
 namespace {
   //===--------------------------------------------------------------------===//
@@ -258,6 +263,10 @@ namespace {
       else if (AM.ES) {
         assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
         Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
+      } else if (AM.MCSym) {
+        assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
+        assert(AM.SymbolFlags == 0 && "oo");
+        Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
       } else if (AM.JT != -1) {
         assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
         Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
@@ -310,7 +319,7 @@ namespace {
       return true;
     }
   };
-} // namespace
+}
 
 
 bool
@@ -604,7 +613,7 @@ static bool isDispSafeForFrameIndex(int64_t Val) {
 bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
                                             X86ISelAddressMode &AM) {
   // Cannot combine ExternalSymbol displacements with integer offsets.
-  if (Offset != 0 && AM.ES)
+  if (Offset != 0 && (AM.ES || AM.MCSym))
     return true;
   int64_t Val = AM.Disp + Offset;
   CodeModel::Model M = TM.getCodeModel();
@@ -690,6 +699,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
     } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
       AM.ES = S->getSymbol();
       AM.SymbolFlags = S->getTargetFlags();
+    } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
+      AM.MCSym = S->getMCSymbol();
     } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
       AM.JT = J->getIndex();
       AM.SymbolFlags = J->getTargetFlags();
@@ -728,6 +739,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
     } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
       AM.ES = S->getSymbol();
       AM.SymbolFlags = S->getTargetFlags();
+    } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
+      AM.MCSym = S->getMCSymbol();
     } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
       AM.JT = J->getIndex();
       AM.SymbolFlags = J->getTargetFlags();
@@ -1001,7 +1014,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // FIXME: JumpTable and ExternalSymbol address currently don't like
     // displacements.  It isn't very important, but this should be fixed for
     // consistency.
-    if (!AM.ES && AM.JT != -1) return true;
+    if (!(AM.ES || AM.MCSym) && AM.JT != -1)
+      return true;
 
     if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
       if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
@@ -1013,13 +1027,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
   default: break;
   case ISD::FRAME_ALLOC_RECOVER: {
     if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
-      if (const auto *ESNode = dyn_cast<ExternalSymbolSDNode>(N.getOperand(0)))
-        if (ESNode->getOpcode() == ISD::TargetExternalSymbol) {
-          // Use the symbol and don't prefix it.
-          AM.ES = ESNode->getSymbol();
-          AM.SymbolFlags = X86II::MO_NOPREFIX;
-          return false;
-        }
+      if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
+        // Use the symbol and don't prefix it.
+        AM.MCSym = ESNode->getMCSymbol();
+        return false;
+      }
     break;
   }
   case ISD::Constant: {
@@ -1473,6 +1485,7 @@ bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
       N->getOpcode() != ISD::TargetJumpTable &&
       N->getOpcode() != ISD::TargetGlobalAddress &&
       N->getOpcode() != ISD::TargetExternalSymbol &&
+      N->getOpcode() != ISD::MCSymbol &&
       N->getOpcode() != ISD::TargetBlockAddress)
     return false;
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ce1ca20..b16bd18 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1111,7 +1111,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::CTPOP,             MVT::v8i32, Custom);
     setOperationAction(ISD::CTPOP,             MVT::v4i64, Custom);
 
-    if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
+    if (Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()) {
       setOperationAction(ISD::FMA,             MVT::v8f32, Legal);
       setOperationAction(ISD::FMA,             MVT::v4f64, Legal);
       setOperationAction(ISD::FMA,             MVT::v4f32, Legal);
@@ -6259,42 +6259,6 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
   return true;
 }
 
-/// \brief Test whether a shuffle mask is equivalent within each 256-bit lane.
-///
-/// This checks a shuffle mask to see if it is performing the same
-/// 256-bit lane-relative shuffle in each 256-bit lane. This trivially implies
-/// that it is also not lane-crossing. It may however involve a blend from the
-/// same lane of a second vector.
-///
-/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
-/// non-trivial to compute in the face of undef lanes. The representation is
-/// *not* suitable for use with existing 256-bit shuffles as it will contain
-/// entries from both V1 and V2 inputs to the wider mask.
-static bool
-is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
-                                SmallVectorImpl<int> &RepeatedMask) {
-  int LaneSize = 256 / VT.getScalarSizeInBits();
-  RepeatedMask.resize(LaneSize, -1);
-  int Size = Mask.size();
-  for (int i = 0; i < Size; ++i) {
-    if (Mask[i] < 0)
-      continue;
-    if ((Mask[i] % Size) / LaneSize != i / LaneSize)
-      // This entry crosses lanes, so there is no way to model this shuffle.
-      return false;
-
-    // Ok, handle the in-lane shuffles by detecting if and when they repeat.
-    if (RepeatedMask[i % LaneSize] == -1)
-      // This is the first non-undef entry in this slot of a 256-bit lane.
-      RepeatedMask[i % LaneSize] =
-          Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size;
-    else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i])
-      // Found a mismatch with the repeated mask.
-      return false;
-  }
-  return true;
-}
-
 /// \brief Checks whether a shuffle mask is equivalent to an explicit list of
 /// arguments.
 ///
@@ -6354,22 +6318,6 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
   return DAG.getConstant(Imm, DL, MVT::i8);
 }
 
-/// \brief Get a 8-bit shuffle, 1 bit per lane, immediate for a mask.
-///
-/// This helper function produces an 8-bit shuffle immediate corresponding to
-/// the ubiquitous shuffle encoding scheme used in x86 instructions for
-/// shuffling 8 lanes.
-static SDValue get1bitLaneShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
-                                             SelectionDAG &DAG) {
-  assert(Mask.size() <= 8 &&
-         "Up to 8 elts may be in Imm8 1-bit lane shuffle mask");
-  unsigned Imm = 0;
-  for (unsigned i = 0; i < Mask.size(); ++i)
-    if (Mask[i] >= 0)
-      Imm |= (Mask[i] % 2) << i;
-  return DAG.getConstant(Imm, DL, MVT::i8);
-}
-
 /// \brief Try to emit a blend instruction for a shuffle using bit math.
 ///
 /// This is used as a fallback approach when first class blend instructions are
@@ -9385,30 +9333,6 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
                      DAG.getConstant(PermMask, DL, MVT::i8));
 }
 
-/// \brief Handle lowering 4-lane 128-bit shuffles.
-static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
-                                        SDValue V2, ArrayRef<int> WidenedMask,
-                                        SelectionDAG &DAG) {
-
-  assert(WidenedMask.size() == 4 && "Unexpected mask size for 128bit shuffle!");
-  // form a 128-bit permutation.
-  // convert the 64-bit shuffle mask selection values into 128-bit selection
-  // bits defined by a vshuf64x2 instruction's immediate control byte.
-  unsigned PermMask = 0, Imm = 0;
-
-  for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
-    if(WidenedMask[i] == SM_SentinelZero)
-      return SDValue();
-
-    // use first element in place of undef musk
-    Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
-    PermMask |= (Imm % 4) << (i * 2);
-  }
-
-  return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
-                     DAG.getConstant(PermMask, DL, MVT::i8));
-}
-
 /// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then
 /// shuffling each lane.
 ///
@@ -10144,105 +10068,86 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   }
 }
 
-static SDValue lowerVectorShuffleWithVALIGN(SDLoc DL, MVT VT,
-                                            ArrayRef<int> Mask, SDValue V1,
-                                            SDValue V2, SelectionDAG &DAG) {
-
-  assert(VT.getScalarSizeInBits() >= 32 && "Unexpected data type for VALIGN");
-  // VALIGN pattern 2, 3, 4, 5, .. (sequential, shifted right)
-  int AlignVal = -1;
-  for (int i = 0; i < (signed)VT.getVectorNumElements(); ++i) {
-    if (Mask[i] < 0)
-      continue;
-    if (Mask[i] < i)
-      return SDValue();
-    if (AlignVal == -1)
-      AlignVal = Mask[i] - i;
-    else if (Mask[i] - i != AlignVal)
-      return SDValue();
-  }
-  // Vector source operands should be swapped
-  return DAG.getNode(X86ISD::VALIGN, DL, VT, V2, V1,
-                     DAG.getConstant(AlignVal, DL, MVT::i8));
-}
+/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
+static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+                                       const X86Subtarget *Subtarget,
+                                       SelectionDAG &DAG) {
+  SDLoc DL(Op);
+  assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
+  assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  ArrayRef<int> Mask = SVOp->getMask();
+  assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
 
-static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT,
-                                           ArrayRef<int> Mask, SDValue V1,
-                                           SDValue V2, SelectionDAG &DAG) {
+  // X86 has dedicated unpack instructions that can handle specific blend
+  // operations: UNPCKH and UNPCKL.
+  if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
+  if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
 
-  assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV");
+  // FIXME: Implement direct support for this type!
+  return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG);
+}
 
-  MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
-  MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
+/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
+static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+                                       const X86Subtarget *Subtarget,
+                                       SelectionDAG &DAG) {
+  SDLoc DL(Op);
+  assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
+  assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  ArrayRef<int> Mask = SVOp->getMask();
+  assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
 
-  SmallVector<SDValue, 32>  VPermMask;
-  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
-    VPermMask.push_back(Mask[i] < 0 ? DAG.getUNDEF(MaskEltVT) :
-                        DAG.getConstant(Mask[i], DL,MaskEltVT));
-  SDValue MaskNode = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecVT,
-                                 VPermMask);
-  if (isSingleInputShuffleMask(Mask))
-    return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
+  // Use dedicated unpack instructions for masks that match their pattern.
+  if (isShuffleEquivalent(V1, V2, Mask,
+                          {// First 128-bit lane.
+                           0, 16, 1, 17, 4, 20, 5, 21,
+                           // Second 128-bit lane.
+                           8, 24, 9, 25, 12, 28, 13, 29}))
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
+  if (isShuffleEquivalent(V1, V2, Mask,
+                          {// First 128-bit lane.
+                           2, 18, 3, 19, 6, 22, 7, 23,
+                           // Second 128-bit lane.
+                           10, 26, 11, 27, 14, 30, 15, 31}))
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
 
-  return DAG.getNode(X86ISD::VPERMV3, DL, VT, MaskNode, V1, V2);
+  // FIXME: Implement direct support for this type!
+  return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG);
 }
 
-
-/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
-static SDValue lowerV8X64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
+static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                                        const X86Subtarget *Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
-  MVT VT = Op.getSimpleValueType();
-  assert((V1.getSimpleValueType() == MVT::v8f64 ||
-          V1.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
-  assert((V2.getSimpleValueType() == MVT::v8f64 ||
-          V2.getSimpleValueType() == MVT::v8i64) && "Bad operand type!");
+  assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
+  assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
 
-  SmallVector<int, 4> WidenedMask;
-  if (canWidenShuffleElements(Mask, WidenedMask))
-    if(SDValue Op = lowerV4X128VectorShuffle(DL, VT, V1, V2, WidenedMask, DAG))
-      return Op;
   // X86 has dedicated unpack instructions that can handle specific blend
   // operations: UNPCKH and UNPCKL.
   if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
   if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
-
-  if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG))
-    return Op;
-
-  if (SDValue Op = lowerVectorShuffleWithSHUFPD(DL, VT, Mask, V1, V2, DAG))
-    return Op;
-
-  // PERMILPD instruction - mask 0/1, 0/1, 2/3, 2/3, 4/5, 4/5, 6/7, 6/7
-  if (isSingleInputShuffleMask(Mask)) {
-    if (!is128BitLaneCrossingShuffleMask(VT, Mask))
-      return DAG.getNode(X86ISD::VPERMILPI, DL, VT, V1,
-                         get1bitLaneShuffleImm8ForMask(Mask, DL, DAG));
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
 
-    SmallVector<int, 4> RepeatedMask;
-    if (is256BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
-      return DAG.getNode(X86ISD::VPERMI, DL, VT, V1,
-                         getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
-  }
-  return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG);
+  // FIXME: Implement direct support for this type!
+  return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG);
 }
 
 /// \brief Handle lowering of 16-lane 32-bit integer shuffles.
-static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                                        const X86Subtarget *Subtarget,
                                        SelectionDAG &DAG) {
-  MVT VT = Op.getSimpleValueType();
   SDLoc DL(Op);
-  assert((V1.getSimpleValueType() == MVT::v16i32 ||
-          V1.getSimpleValueType() == MVT::v16f32) && "Bad operand type!");
-  assert((V2.getSimpleValueType() == MVT::v16i32 ||
-          V2.getSimpleValueType() == MVT::v16f32) && "Bad operand type!");
+  assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
+  assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
@@ -10253,39 +10158,16 @@ static SDValue lowerV16X32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                            0, 16, 1, 17, 4, 20, 5, 21,
                            // Second 128-bit lane.
                            8, 24, 9, 25, 12, 28, 13, 29}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
+    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
   if (isShuffleEquivalent(V1, V2, Mask,
                           {// First 128-bit lane.
                            2, 18, 3, 19, 6, 22, 7, 23,
                            // Second 128-bit lane.
                            10, 26, 11, 27, 14, 30, 15, 31}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
+    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
 
-  if (isShuffleEquivalent(V1, V2, Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
-                                         12, 12, 14, 14}))
-    return DAG.getNode(X86ISD::MOVSLDUP, DL, VT, V1);
-  if (isShuffleEquivalent(V1, V2, Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11,
-                                         13, 13, 15, 15}))
-    return DAG.getNode(X86ISD::MOVSHDUP, DL, VT, V1);
-
-  SmallVector<int, 4> RepeatedMask;
-  if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) {
-    if (isSingleInputShuffleMask(Mask)) {
-      unsigned Opc = VT.isInteger() ? X86ISD::PSHUFD : X86ISD::VPERMILPI;
-      return DAG.getNode(Opc, DL, VT, V1,
-                         getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
-    }
-
-    for (int i = 0; i < 4; ++i)
-      if (RepeatedMask[i] >= 16)
-        RepeatedMask[i] -= 12;
-     return lowerVectorShuffleWithSHUFPS(DL, VT, RepeatedMask, V1, V2, DAG);
-  }
-
-  if (SDValue Op = lowerVectorShuffleWithVALIGN(DL, VT, Mask, V1, V2, DAG))
-    return Op;
-
-  return lowerVectorShuffleWithPERMV(DL, VT, Mask, V1, V2, DAG);
+  // FIXME: Implement direct support for this type!
+  return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG);
 }
 
 /// \brief Handle lowering of 32-lane 16-bit integer shuffles.
@@ -10345,11 +10227,13 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   // the requisite ISA extensions for that element type are available.
   switch (VT.SimpleTy) {
   case MVT::v8f64:
-  case MVT::v8i64:
-    return lowerV8X64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+    return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
   case MVT::v16f32:
+    return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+  case MVT::v8i64:
+    return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
   case MVT::v16i32:
-    return lowerV16X32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+    return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
   case MVT::v32i16:
     if (Subtarget->hasBWI())
       return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
@@ -10759,11 +10643,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
 
   assert(VecVT.is128BitVector() && "Unexpected vector length");
 
-  if (Subtarget->hasSSE41()) {
-    SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
-    if (Res.getNode())
+  if (Subtarget->hasSSE41())
+    if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
       return Res;
-  }
 
   MVT VT = Op.getSimpleValueType();
   // TODO: handle v16i8.
@@ -12253,11 +12135,9 @@ static  SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
 
 static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
                                SelectionDAG &DAG) {
-  if (Subtarget->hasFp256()) {
-    SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
-    if (Res.getNode())
+  if (Subtarget->hasFp256())
+    if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
       return Res;
-  }
 
   return SDValue();
 }
@@ -12272,11 +12152,9 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
   if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
     return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG);
 
-  if (Subtarget->hasFp256()) {
-    SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
-    if (Res.getNode())
+  if (Subtarget->hasFp256())
+    if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget))
       return Res;
-  }
 
   assert(!VT.is256BitVector() || !SVT.is128BitVector() ||
          VT.getVectorNumElements() != SVT.getVectorNumElements());
@@ -15117,6 +14995,54 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
     return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
 }
 
+/// When the 32-bit MSVC runtime transfers control to us, either to an outlined
+/// function or when returning to a parent frame after catching an exception, we
+/// recover the parent frame pointer by doing arithmetic on the incoming EBP.
+/// Here's the math:
+///   RegNodeBase = EntryEBP - RegNodeSize
+///   ParentFP = RegNodeBase - RegNodeFrameOffset
+/// Subtracting RegNodeSize takes us to the offset of the registration node, and
+/// subtracting the offset (negative on x86) takes us back to the parent FP.
+static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
+                                   SDValue EntryEBP) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SDLoc dl;
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MVT PtrVT = TLI.getPointerTy();
+
+  // It's possible that the parent function no longer has a personality function
+  // if the exceptional code was optimized away, in which case we just return
+  // the incoming EBP.
+  if (!Fn->hasPersonalityFn())
+    return EntryEBP;
+
+  // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
+  // WinEHStatePass for the full struct definition.
+  int RegNodeSize;
+  switch (classifyEHPersonality(Fn->getPersonalityFn())) {
+  default:
+    report_fatal_error("can only recover FP for MSVC EH personality functions");
+  case EHPersonality::MSVC_X86SEH: RegNodeSize = 24; break;
+  case EHPersonality::MSVC_CXX: RegNodeSize = 16; break;
+  }
+
+  // Get an MCSymbol that will ultimately resolve to the frame offset of the EH
+  // registration.
+  MCSymbol *OffsetSym =
+      MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
+          GlobalValue::getRealLinkageName(Fn->getName()));
+  SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT);
+  SDValue RegNodeFrameOffset =
+      DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSymVal);
+
+  // RegNodeBase = EntryEBP - RegNodeSize
+  // ParentFP = RegNodeBase - RegNodeFrameOffset
+  SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP,
+                                    DAG.getConstant(RegNodeSize, dl, PtrVT));
+  return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, RegNodeFrameOffset);
+}
+
 static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc dl(Op);
@@ -15206,6 +15132,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                               Src1,Src2),
                                   Mask, PassThru, Subtarget, DAG);
     }
+    case INTR_TYPE_2OP_MASK_RM: {
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Src2 = Op.getOperand(2);
+      SDValue PassThru = Op.getOperand(3);
+      SDValue Mask = Op.getOperand(4);
+      // We specify 2 possible modes for intrinsics, with/without rounding modes.
+      // First, we check if the intrinsic have rounding mode (6 operands),
+      // if not, we set rounding mode to "current".
+      SDValue Rnd;
+      if (Op.getNumOperands() == 6)
+        Rnd = Op.getOperand(5);
+      else 
+        Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+                                              Src1, Src2, Rnd),
+                                  Mask, PassThru, Subtarget, DAG);
+    }
     case INTR_TYPE_3OP_MASK: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
@@ -15230,11 +15173,26 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                               Src1, Src2, Src3),
                                   Mask, PassThru, Subtarget, DAG);
     }
+    case VPERM_3OP_MASKZ: 
+    case VPERM_3OP_MASK:
+    case FMA_OP_MASK3:
+    case FMA_OP_MASKZ:
     case FMA_OP_MASK: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
       SDValue Src3 = Op.getOperand(3);
       SDValue Mask = Op.getOperand(4);
+      EVT VT = Op.getValueType();
+      SDValue PassThru = SDValue();
+
+      // set PassThru element
+      if (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ)
+        PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+      else if (IntrData->Type == FMA_OP_MASK3)
+        PassThru = Src3;
+      else
+        PassThru = Src1;
+
       // We specify 2 possible opcodes for intrinsics with rounding modes.
       // First, we check if the intrinsic may have non-default rounding mode,
       // (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -15246,12 +15204,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
           return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
                                                   dl, Op.getValueType(),
                                                   Src1, Src2, Src3, Rnd),
-                                      Mask, Src1, Subtarget, DAG);
+                                      Mask, PassThru, Subtarget, DAG);
       }
       return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
                                               dl, Op.getValueType(),
                                               Src1, Src2, Src3),
-                                  Mask, Src1, Subtarget, DAG);
+                                  Mask, PassThru, Subtarget, DAG);
     }
     case CMP_MASK:
     case CMP_MASK_CC: {
@@ -15330,18 +15288,10 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
       SDValue PassThru = Op.getOperand(2);
       if (isAllOnes(Mask)) // return data as is
         return Op.getOperand(1);
-      EVT VT = Op.getValueType();
-      EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                    VT.getVectorNumElements());
-      EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                       Mask.getValueType().getSizeInBits());
-      SDLoc dl(Op);
-      SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
-                                  DAG.getBitcast(BitcastVT, Mask),
-                                  DAG.getIntPtrConstant(0, dl));
 
-      return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress,
-                         PassThru);
+      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+                                              DataToCompress),
+                                  Mask, PassThru, Subtarget, DAG);
     }
     case BLEND: {
       SDValue Mask = Op.getOperand(3);
@@ -15532,15 +15482,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
     auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal());
     MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol(
         GlobalValue::getRealLinkageName(Fn->getName()));
-    StringRef Name = LSDASym->getName();
-    assert(Name.data()[Name.size()] == '\0' && "not null terminated");
 
     // Generate a simple absolute symbol reference. This intrinsic is only
     // supported on 32-bit Windows, which isn't PIC.
-    SDValue Result =
-        DAG.getTargetExternalSymbol(Name.data(), VT, X86II::MO_NOPREFIX);
+    SDValue Result = DAG.getMCSymbol(LSDASym, VT);
     return DAG.getNode(X86ISD::Wrapper, dl, VT, Result);
   }
+
+  case Intrinsic::x86_seh_recoverfp: {
+    SDValue FnOp = Op.getOperand(1);
+    SDValue IncomingFPOp = Op.getOperand(2);
+    GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
+    auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
+    if (!Fn)
+      report_fatal_error(
+          "llvm.x86.seh.recoverfp must take a function as the first argument");
+    return recoverFramePointer(DAG, Fn, IncomingFPOp);
+  }
   }
 }
 
@@ -15550,7 +15508,12 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
                               const X86Subtarget * Subtarget) {
   SDLoc dl(Op);
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
-  assert(C && "Invalid scale type");
+  if (!C)
+    llvm_unreachable("Invalid scale type");
+  unsigned ScaleVal = C->getZExtValue();
+  if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
+    llvm_unreachable("Valid scale values are 1, 2, 4, 8");
+
   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
   EVT MaskVT = MVT::getVectorVT(MVT::i1,
                              Index.getSimpleValueType().getVectorNumElements());
@@ -15558,8 +15521,16 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
   ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
   if (MaskC)
     MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
-  else
-    MaskInReg = DAG.getBitcast(MaskVT, Mask);
+  else {
+    EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+                                     Mask.getValueType().getSizeInBits());
+
+    // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
+    // are extracted by EXTRACT_SUBVECTOR.
+    MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+                            DAG.getBitcast(BitcastVT, Mask),
+                            DAG.getIntPtrConstant(0, dl));
+  }
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
   SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
   SDValue Segment = DAG.getRegister(0, MVT::i32);
@@ -15576,7 +15547,12 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
                                SDValue Index, SDValue ScaleOp, SDValue Chain) {
   SDLoc dl(Op);
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
-  assert(C && "Invalid scale type");
+  if (!C)
+    llvm_unreachable("Invalid scale type");
+  unsigned ScaleVal = C->getZExtValue();
+  if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
+    llvm_unreachable("Valid scale values are 1, 2, 4, 8");
+
   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
   SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
   SDValue Segment = DAG.getRegister(0, MVT::i32);
@@ -15586,8 +15562,16 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
   ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
   if (MaskC)
     MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
-  else
-    MaskInReg = DAG.getBitcast(MaskVT, Mask);
+  else {
+    EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+                                     Mask.getValueType().getSizeInBits());
+
+    // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
+    // are extracted by EXTRACT_SUBVECTOR.
+    MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+                            DAG.getBitcast(BitcastVT, Mask),
+                            DAG.getIntPtrConstant(0, dl));
+  }
   SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
   SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
   SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
@@ -15725,37 +15709,38 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
   return DAG.getMergeValues(Results, DL);
 }
 
-static SDValue LowerEXCEPTIONINFO(SDValue Op, const X86Subtarget *Subtarget,
-                                  SelectionDAG &DAG) {
+static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget,
+                                    SelectionDAG &DAG) {
   MachineFunction &MF = DAG.getMachineFunction();
   SDLoc dl(Op);
-  SDValue FnOp = Op.getOperand(2);
-  SDValue FPOp = Op.getOperand(3);
+  SDValue Chain = Op.getOperand(0);
 
-  // Compute the symbol for the parent EH registration. We know it'll get
-  // emitted later.
-  auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(FnOp)->getGlobal());
-  MCSymbol *ParentFrameSym =
-      MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
-          GlobalValue::getRealLinkageName(Fn->getName()));
-  StringRef Name = ParentFrameSym->getName();
-  assert(Name.data()[Name.size()] == '\0' && "not null terminated");
-
-  // Create a TargetExternalSymbol for the label to avoid any target lowering
-  // that would make this PC relative.
-  MVT PtrVT = Op.getSimpleValueType();
-  SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT);
-  SDValue OffsetVal =
-      DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSym);
-
-  // Add the offset to the FP.
-  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, FPOp, OffsetVal);
-
-  // Load the second field of the struct, which is 4 bytes in. See
-  // WinEHStatePass for more info.
-  Add = DAG.getNode(ISD::ADD, dl, PtrVT, Add, DAG.getConstant(4, dl, PtrVT));
-  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Add, MachinePointerInfo(),
-                     false, false, false, 0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MVT VT = TLI.getPointerTy();
+
+  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+  unsigned FrameReg =
+      RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
+  unsigned SPReg = RegInfo->getStackRegister();
+
+  // Get incoming EBP.
+  SDValue IncomingEBP =
+      DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+
+  // Load [EBP-24] into SP.
+  SDValue SPAddr =
+      DAG.getNode(ISD::ADD, dl, VT, IncomingEBP, DAG.getConstant(-24, dl, VT));
+  SDValue NewSP =
+      DAG.getLoad(VT, dl, Chain, SPAddr, MachinePointerInfo(), false, false,
+                  false, VT.getScalarSizeInBits() / 8);
+  Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP);
+
+  // FIXME: Restore the base pointer in case of stack realignment!
+
+  // Adjust EBP to point back to the original frame position.
+  SDValue NewFP = recoverFramePointer(DAG, MF.getFunction(), IncomingEBP);
+  Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP);
+  return Chain;
 }
 
 static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
@@ -15764,8 +15749,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
 
   const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo);
   if (!IntrData) {
-    if (IntNo == Intrinsic::x86_seh_exceptioninfo)
-      return LowerEXCEPTIONINFO(Op, Subtarget, DAG);
+    if (IntNo == llvm::Intrinsic::x86_seh_restoreframe)
+      return LowerSEHRESTOREFRAME(Op, Subtarget, DAG);
     return SDValue();
   }
 
@@ -15884,16 +15869,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
                           MachinePointerInfo(), false, false,
                           VT.getScalarSizeInBits()/8);
 
-    EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                  VT.getVectorNumElements());
-    EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                     Mask.getValueType().getSizeInBits());
-    SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
-                                DAG.getBitcast(BitcastVT, Mask),
-                                DAG.getIntPtrConstant(0, dl));
-
-    SDValue Compressed =  DAG.getNode(IntrData->Opc0, dl, VT, VMask,
-                                      DataToCompress, DAG.getUNDEF(VT));
+    SDValue Compressed =
+      getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress),
+                           Mask, DAG.getUNDEF(VT), Subtarget, DAG);
     return DAG.getStore(Chain, dl, Compressed, Addr,
                         MachinePointerInfo(), false, false,
                         VT.getScalarSizeInBits()/8);
@@ -15901,7 +15879,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
   case EXPAND_FROM_MEM: {
     SDLoc dl(Op);
     SDValue Mask = Op.getOperand(4);
-    SDValue PathThru = Op.getOperand(3);
+    SDValue PassThru = Op.getOperand(3);
     SDValue Addr = Op.getOperand(2);
     SDValue Chain = Op.getOperand(0);
     EVT VT = Op.getValueType();
@@ -15909,21 +15887,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
     if (isAllOnes(Mask)) // return just a load
       return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
                          false, VT.getScalarSizeInBits()/8);
-    EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                  VT.getVectorNumElements());
-    EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                     Mask.getValueType().getSizeInBits());
-    SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
-                                DAG.getBitcast(BitcastVT, Mask),
-                                DAG.getIntPtrConstant(0, dl));
 
     SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(),
                                        false, false, false,
                                        VT.getScalarSizeInBits()/8);
 
     SDValue Results[] = {
-        DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand, PathThru),
-        Chain};
+      getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToExpand),
+                           Mask, PassThru, Subtarget, DAG), Chain};
     return DAG.getMergeValues(Results, dl);
   }
   }
@@ -18476,6 +18447,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::UMIN:               return "X86ISD::UMIN";
   case X86ISD::SMAX:               return "X86ISD::SMAX";
   case X86ISD::SMIN:               return "X86ISD::SMIN";
+  case X86ISD::ABS:                return "X86ISD::ABS";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
   case X86ISD::FMAX_RND:           return "X86ISD::FMAX_RND";
   case X86ISD::FMIN:               return "X86ISD::FMIN";
@@ -18618,9 +18590,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FDIV_RND:           return "X86ISD::FDIV_RND";
   case X86ISD::FSQRT_RND:          return "X86ISD::FSQRT_RND";
   case X86ISD::FGETEXP_RND:        return "X86ISD::FGETEXP_RND";
+  case X86ISD::SCALEF:             return "X86ISD::SCALEF";
   case X86ISD::ADDS:               return "X86ISD::ADDS";
   case X86ISD::SUBS:               return "X86ISD::SUBS";
-  case X86ISD::AVG:               return "X86ISD::AVG";
+  case X86ISD::AVG:                return "X86ISD::AVG";
   case X86ISD::SINT_TO_FP_RND:     return "X86ISD::SINT_TO_FP_RND";
   case X86ISD::UINT_TO_FP_RND:     return "X86ISD::UINT_TO_FP_RND";
   }
@@ -18777,7 +18750,7 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; }
 
 bool
 X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
-  if (!(Subtarget->hasFMA() || Subtarget->hasFMA4()))
+  if (!(Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()))
     return false;
 
   VT = VT.getScalarType();
@@ -19962,6 +19935,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
 // Replace 213-type (isel default) FMA3 instructions with 231-type for
 // accumulator loops. Writing back to the accumulator allows the coalescer
 // to remove extra copies in the loop.
+// FIXME: Do this on AVX512.  We don't support 231 variants yet (PR23937).
 MachineBasicBlock *
 X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
                                  MachineBasicBlock *MBB) const {
@@ -21302,8 +21276,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
     Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
 
-  SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
-  if (LD.getNode())
+  if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true))
     return LD;
 
   if (isTargetShuffle(N->getOpcode())) {
@@ -21451,8 +21424,7 @@ static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
 /// use 64-bit extracts and shifts.
 static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
                                          TargetLowering::DAGCombinerInfo &DCI) {
-  SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
-  if (NewOp.getNode())
+  if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI))
     return NewOp;
 
   SDValue InputVector = N->getOperand(0);
@@ -22895,16 +22867,14 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
 static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    const X86Subtarget *Subtarget) {
-  if (N->getOpcode() == ISD::SHL) {
-    SDValue V = PerformSHLCombine(N, DAG);
-    if (V.getNode()) return V;
-  }
+  if (N->getOpcode() == ISD::SHL)
+    if (SDValue V = PerformSHLCombine(N, DAG))
+      return V;
 
-  if (N->getOpcode() != ISD::SRA) {
-    // Try to fold this logical shift into a zero vector.
-    SDValue V = performShiftToAllZeros(N, DAG, Subtarget);
-    if (V.getNode()) return V;
-  }
+  // Try to fold this logical shift into a zero vector.
+  if (N->getOpcode() != ISD::SRA)
+    if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget))
+      return V;
 
   return SDValue();
 }
@@ -23284,8 +23254,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
-  if (R.getNode())
+  if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget))
     return R;
 
   SDValue N0 = N->getOperand(0);
@@ -23480,11 +23449,9 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  if (Subtarget->hasCMov()) {
-    SDValue RV = performIntegerAbsCombine(N, DAG);
-    if (RV.getNode())
+  if (Subtarget->hasCMov())
+    if (SDValue RV = performIntegerAbsCombine(N, DAG))
       return RV;
-  }
 
   return SDValue();
 }
@@ -24266,23 +24233,37 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
   }
 
-  if (VT.isVector()) {
-    auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) {
+  if (VT.isVector() && Subtarget->hasSSE2()) {
+    auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) {
       EVT InVT = N.getValueType();
       EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
-                                   128 / InVT.getScalarSizeInBits());
-      SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(),
+                                   Size / InVT.getScalarSizeInBits());
+      SmallVector<SDValue, 8> Opnds(Size / InVT.getSizeInBits(),
                                     DAG.getUNDEF(InVT));
       Opnds[0] = N;
       return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
     };
 
+    // If target-size is less than 128-bits, extend to a type that would extend
+    // to 128 bits, extend that and extract the original target vector.
+    if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits()) &&
+        (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
+        (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
+      unsigned Scale = 128 / VT.getSizeInBits();
+      EVT ExVT =
+          EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits());
+      SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits());
+      SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, ExVT, Ex);
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt,
+                         DAG.getIntPtrConstant(0, DL));
+    }
+
     // If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG
     // which ensures lowering to X86ISD::VSEXT (pmovsx*).
     if (VT.getSizeInBits() == 128 &&
         (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
         (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
-      SDValue ExOp = ExtendToVec128(DL, N0);
+      SDValue ExOp = ExtendVecSize(DL, N0, 128);
       return DAG.getSignExtendVectorInReg(ExOp, DL, VT);
     }
 
@@ -24301,7 +24282,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
            ++i, Offset += NumSubElts) {
         SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
                                      DAG.getIntPtrConstant(Offset, DL));
-        SrcVec = ExtendToVec128(DL, SrcVec);
+        SrcVec = ExtendVecSize(DL, SrcVec, 128);
         SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT);
         Opnds.push_back(SrcVec);
       }
@@ -24312,11 +24293,9 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
   if (!Subtarget->hasFp256())
     return SDValue();
 
-  if (VT.isVector() && VT.getSizeInBits() == 256) {
-    SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
-    if (R.getNode())
+  if (VT.isVector() && VT.getSizeInBits() == 256)
+    if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
       return R;
-  }
 
   return SDValue();
 }
@@ -24332,7 +24311,8 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
 
   EVT ScalarVT = VT.getScalarType();
   if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
-      (!Subtarget->hasFMA() && !Subtarget->hasFMA4()))
+      (!Subtarget->hasFMA() && !Subtarget->hasFMA4() &&
+       !Subtarget->hasAVX512()))
     return SDValue();
 
   SDValue A = N->getOperand(0);
@@ -24398,11 +24378,10 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
                          DAG.getConstant(1, dl, VT));
     }
   }
-  if (VT.is256BitVector()) {
-    SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
-    if (R.getNode())
+
+  if (VT.is256BitVector())
+    if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
       return R;
-  }
 
   // (i8,i32 zext (udivrem (i8 x, i8 y)) ->
   // (i8,i32 (udivrem_zext_hreg (i8 x, i8 y)
@@ -24606,10 +24585,7 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
   if (CC == X86::COND_B)
     return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
 
-  SDValue Flags;
-
-  Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
-  if (Flags.getNode()) {
+  if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
     SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
     return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
   }
@@ -24628,10 +24604,7 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
   SDValue EFLAGS = N->getOperand(3);
   X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
 
-  SDValue Flags;
-
-  Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
-  if (Flags.getNode()) {
+  if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
     SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
     return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
                        Flags);
@@ -24695,16 +24668,18 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
 
   // Now move on to more general possibilities.
   SDValue Op0 = N->getOperand(0);
-  EVT InVT = Op0->getValueType(0);
+  EVT VT = N->getValueType(0);
+  EVT InVT = Op0.getValueType();
+  EVT InSVT = InVT.getScalarType();
 
   // SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
   // SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
-  if (InVT == MVT::v8i8 || InVT == MVT::v4i8 ||
-      InVT == MVT::v8i16 || InVT == MVT::v4i16) {
+  if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) {
     SDLoc dl(N);
-    MVT DstVT = MVT::getVectorVT(MVT::i32, InVT.getVectorNumElements());
+    EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+                                 InVT.getVectorNumElements());
     SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
-    return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
+    return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
   }
 
   // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
@@ -24714,10 +24689,10 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
     EVT LdVT = Ld->getValueType(0);
 
     // This transformation is not supported if the result type is f16
-    if (N->getValueType(0) == MVT::f16)
+    if (VT == MVT::f16)
       return SDValue();
 
-    if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
+    if (!Ld->isVolatile() && !VT.isVector() &&
         ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
         !Subtarget->is64Bit() && LdVT == MVT::i64) {
       SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD(
@@ -25683,75 +25658,40 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
   // Otherwise, check to see if this is a register class of the wrong value
   // type.  For example, we want to map "{ax},i32" -> {eax}, we don't want it to
   // turn into {ax},{dx}.
-  if (Res.second->hasType(VT))
+  // MVT::Other is used to specify clobber names.
+  if (Res.second->hasType(VT) || VT == MVT::Other)
     return Res;   // Correct type already, nothing to do.
 
-  // All of the single-register GCC register classes map their values onto
-  // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp".  If we
-  // really want an 8-bit or 32-bit register, map to the appropriate register
-  // class and return the appropriate register.
-  if (Res.second == &X86::GR16RegClass) {
-    if (VT == MVT::i8 || VT == MVT::i1) {
-      unsigned DestReg = 0;
-      switch (Res.first) {
-      default: break;
-      case X86::AX: DestReg = X86::AL; break;
-      case X86::DX: DestReg = X86::DL; break;
-      case X86::CX: DestReg = X86::CL; break;
-      case X86::BX: DestReg = X86::BL; break;
-      }
-      if (DestReg) {
-        Res.first = DestReg;
-        Res.second = &X86::GR8RegClass;
-      }
-    } else if (VT == MVT::i32 || VT == MVT::f32) {
-      unsigned DestReg = 0;
-      switch (Res.first) {
-      default: break;
-      case X86::AX: DestReg = X86::EAX; break;
-      case X86::DX: DestReg = X86::EDX; break;
-      case X86::CX: DestReg = X86::ECX; break;
-      case X86::BX: DestReg = X86::EBX; break;
-      case X86::SI: DestReg = X86::ESI; break;
-      case X86::DI: DestReg = X86::EDI; break;
-      case X86::BP: DestReg = X86::EBP; break;
-      case X86::SP: DestReg = X86::ESP; break;
-      }
-      if (DestReg) {
-        Res.first = DestReg;
-        Res.second = &X86::GR32RegClass;
-      }
-    } else if (VT == MVT::i64 || VT == MVT::f64) {
-      unsigned DestReg = 0;
-      switch (Res.first) {
-      default: break;
-      case X86::AX: DestReg = X86::RAX; break;
-      case X86::DX: DestReg = X86::RDX; break;
-      case X86::CX: DestReg = X86::RCX; break;
-      case X86::BX: DestReg = X86::RBX; break;
-      case X86::SI: DestReg = X86::RSI; break;
-      case X86::DI: DestReg = X86::RDI; break;
-      case X86::BP: DestReg = X86::RBP; break;
-      case X86::SP: DestReg = X86::RSP; break;
-      }
-      if (DestReg) {
-        Res.first = DestReg;
-        Res.second = &X86::GR64RegClass;
-      }
-    } else if (VT != MVT::Other) {
-      // Type mismatch and not a clobber: Return an error;
+  // Get a matching integer of the correct size. i.e. "ax" with MVT::32 should
+  // return "eax". This should even work for things like getting 64bit integer
+  // registers when given an f64 type.
+  const TargetRegisterClass *Class = Res.second;
+  if (Class == &X86::GR8RegClass || Class == &X86::GR16RegClass ||
+      Class == &X86::GR32RegClass || Class == &X86::GR64RegClass) {
+    unsigned Size = VT.getSizeInBits();
+    MVT::SimpleValueType SimpleTy = Size == 1 || Size == 8 ? MVT::i8
+                                  : Size == 16 ? MVT::i16
+                                  : Size == 32 ? MVT::i32
+                                  : Size == 64 ? MVT::i64
+                                  : MVT::Other;
+    unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, SimpleTy);
+    if (DestReg > 0) {
+      Res.first = DestReg;
+      Res.second = SimpleTy == MVT::i8 ? &X86::GR8RegClass
+                 : SimpleTy == MVT::i16 ? &X86::GR16RegClass
+                 : SimpleTy == MVT::i32 ? &X86::GR32RegClass
+                 : &X86::GR64RegClass;
+      assert(Res.second->contains(Res.first) && "Register in register class");
+    } else {
+      // No register found/type mismatch.
       Res.first = 0;
       Res.second = nullptr;
     }
-  } else if (Res.second == &X86::FR32RegClass ||
-             Res.second == &X86::FR64RegClass ||
-             Res.second == &X86::VR128RegClass ||
-             Res.second == &X86::VR256RegClass ||
-             Res.second == &X86::FR32XRegClass ||
-             Res.second == &X86::FR64XRegClass ||
-             Res.second == &X86::VR128XRegClass ||
-             Res.second == &X86::VR256XRegClass ||
-             Res.second == &X86::VR512RegClass) {
+  } else if (Class == &X86::FR32RegClass || Class == &X86::FR64RegClass ||
+             Class == &X86::VR128RegClass || Class == &X86::VR256RegClass ||
+             Class == &X86::FR32XRegClass || Class == &X86::FR64XRegClass ||
+             Class == &X86::VR128XRegClass || Class == &X86::VR256XRegClass ||
+             Class == &X86::VR512RegClass) {
     // Handle references to XMM physical registers that got mapped into the
     // wrong class.  This can happen with constraints like {xmm0} where the
     // target independent register mapper will just pick the first match it can
@@ -25767,15 +25707,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       Res.second = &X86::VR256RegClass;
     else if (X86::VR512RegClass.hasType(VT))
       Res.second = &X86::VR512RegClass;
-    else if (VT != MVT::Other) {
+    else {
       // Type mismatch and not a clobber: Return an error;
       Res.first = 0;
       Res.second = nullptr;
     }
-  } else if (VT != MVT::Other) {
-    // Type mismatch and not a clobber: Return an error;
-    Res.first = 0;
-    Res.second = nullptr;
   }
 
   return Res;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 9c98333..1766089 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -211,7 +211,8 @@ namespace llvm {
 
       // FP vector get exponent 
       FGETEXP_RND,
-
+      // FP Scale
+      SCALEF,
       // Integer add/sub with unsigned saturation.
       ADDUS,
       SUBUS,
@@ -238,6 +239,9 @@ namespace llvm {
       /// Signed integer max and min.
       SMAX, SMIN,
 
+      // Integer absolute value
+      ABS,
+
       /// Floating point max and min.
       FMAX, FMIN,
 
@@ -516,7 +520,7 @@ namespace llvm {
       // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
       // thought as target memory ops!
     };
-  } // namespace X86ISD
+  }
 
   /// Define some predicates that are used for node matching.
   namespace X86 {
@@ -583,7 +587,7 @@ namespace llvm {
       TO_ZERO = 3,
       CUR_DIRECTION = 4
     };
-  } // namespace X86
+  }
 
   //===--------------------------------------------------------------------===//
   //  X86 Implementation of the TargetLowering interface
@@ -638,9 +642,8 @@ namespace llvm {
     /// legal as the hook is used before type legalization.
     bool isSafeMemOpType(MVT VT) const override;
 
-    /// Returns true if the target allows
-    /// unaligned memory accesses. of the specified type. Returns whether it
-    /// is "fast" by reference in the second argument.
+    /// Returns true if the target allows unaligned memory accesses of the
+    /// specified type. Returns whether it is "fast" in the last argument.
     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
                                        bool *Fast) const override;
 
@@ -1120,6 +1123,6 @@ namespace llvm {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                              const TargetLibraryInfo *libInfo);
   }
-} // namespace llvm
+}
 
 #endif    // X86ISELLOWERING_H
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index de6a835..b309b82 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -274,6 +274,16 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
                           (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
 
+multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
+                                     dag Outs, dag NonTiedIns, string OpcodeStr,
+                                     string AttSrcAsm, string IntelSrcAsm,
+                                     dag RHS> :
+   AVX512_maskable_common<O, F, _, Outs,
+                          !con((ins _.RC:$src1), NonTiedIns),
+                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
+                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+                          (X86select _.KRCWM:$mask, RHS, _.RC:$src1)>;
 
 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
                                   dag Outs, dag Ins,
@@ -3436,7 +3446,7 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
 }
 
 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
-                            X86VectorVTInfo _, bit IsCommutable> {
+                            X86VectorVTInfo _> {
   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
                   "$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -3446,7 +3456,7 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRn
 
 
 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
-                            X86VectorVTInfo _, bit IsCommutable> {
+                            X86VectorVTInfo _> {
   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
@@ -3481,16 +3491,16 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
 }
 
 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
-  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>,
+  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
                               EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>,
+  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
                               EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
 }
 
 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
-  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>,
+  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
                               EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
-  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>,
+  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
                               EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
 }
 
@@ -3513,6 +3523,48 @@ let Predicates = [HasDQI] in {
   defm VXOR  : avx512_fp_binop_p<0x57, "vxor", X86fxor, 1>;
 }
 
+multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            X86VectorVTInfo _> {
+  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+                  "$src2, $src1", "$src1, $src2",
+                  (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, EVEX_4V;
+  let mayLoad = 1 in {
+    defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+                    "$src2, $src1", "$src1, $src2",
+                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, EVEX_4V;
+    defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+                     "${src2}"##_.BroadcastStr##", $src1",
+                     "$src1, ${src2}"##_.BroadcastStr,
+                     (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
+                                                (_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>,
+                     EVEX_4V, EVEX_B;
+  }//let mayLoad = 1
+}
+
+multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>, 
+             avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
+                              EVEX_V512, EVEX_CD8<32, CD8VF>;
+  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>, 
+             avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
+                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+  // Define only if AVX512VL feature is present.
+  let Predicates = [HasVLX] in {
+    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
+                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
+    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f32x_info>,
+                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
+    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v2f64x_info>,
+                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
+    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f64x_info>,
+                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+  }
+}
+defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD;
+
 //===----------------------------------------------------------------------===//
 // AVX-512  VPTESTM instructions
 //===----------------------------------------------------------------------===//
@@ -3870,6 +3922,19 @@ defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
                                   X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W;
 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
                                   X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W;
+                                  
+multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  let Predicates = [HasBWI] in
+  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
+
+  let Predicates = [HasVLX, HasBWI] in {
+  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, v32i8x_info>, EVEX_V256;
+  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, v16i8x_info>, EVEX_V128;
+  }
+}
+
+defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
+
 //===----------------------------------------------------------------------===//
 // AVX-512 - MOVDDUP
 //===----------------------------------------------------------------------===//
@@ -3950,188 +4015,295 @@ let Predicates = [HasAVX512] in {
 //
 
 let Constraints = "$src1 = $dst" in {
-// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
-multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
-                           SDPatternOperator OpNode = null_frag> {
+multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            X86VectorVTInfo _> {
   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
           (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
          AVX512FMA3Base;
 
-  let mayLoad = 1 in
-  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+  let mayLoad = 1 in {
+    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
             (ins _.RC:$src2, _.MemOp:$src3),
             OpcodeStr, "$src3, $src2", "$src2, $src3",
             (_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
             AVX512FMA3Base; 
 
-  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
               (ins _.RC:$src2, _.ScalarMemOp:$src3),
               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
               !strconcat("$src2, ${src3}", _.BroadcastStr ),
               (OpNode _.RC:$src1,
                _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,	
               AVX512FMA3Base, EVEX_B;
- }
-} // Constraints = "$src1 = $dst"
+  }
+}
 
-let Constraints = "$src1 = $dst" in {
-// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
-multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr,
-                                 X86VectorVTInfo _,
-                                 SDPatternOperator OpNode> {
-   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            X86VectorVTInfo _> {
+  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
           (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
           AVX512FMA3Base, EVEX_B, EVEX_RC;
- }
+}
 } // Constraints = "$src1 = $dst"
 
-multiclass avx512_fma3_round_forms<bits<8> opc213, string OpcodeStr,
-                              X86VectorVTInfo VTI, SDPatternOperator OpNode> {
-  defm v213r : avx512_fma3_round_rrb<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
-                              VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
+multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                     SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+  let Predicates = [HasAVX512] in {
+    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512>,
+                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
+                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+  }
+  let Predicates = [HasVLX, HasAVX512] in {
+    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256>,
+                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
+    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128>,
+                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
+  }
 }
 
-multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
-                              string OpcodeStr, X86VectorVTInfo VTI,
-                              SDPatternOperator OpNode> {
-  defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
-                              VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
-  defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
-                              VTI>, EVEX_CD8<VTI.EltSize, CD8VF>;
+multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            SDNode OpNodeRnd > {
+    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
+                                      avx512vl_f32_info>;
+    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
+                                      avx512vl_f64_info>, VEX_W;
 }
 
-multiclass avx512_fma3p<bits<8> opc213, bits<8> opc231,
-                              string OpcodeStr,
-                              SDPatternOperator OpNode,
-                              SDPatternOperator OpNodeRnd> {
-let ExeDomain = SSEPackedSingle in {
-    defm NAME##PSZ      : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v16f32_info, OpNode>,
-                          avx512_fma3_round_forms<opc213, OpcodeStr,
-                                             v16f32_info, OpNodeRnd>, EVEX_V512;
-    defm NAME##PSZ256   : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v8f32x_info, OpNode>, EVEX_V256;
-    defm NAME##PSZ128   : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v4f32x_info, OpNode>, EVEX_V128;
+defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
+
+
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            X86VectorVTInfo _> {
+  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+          (ins _.RC:$src2, _.RC:$src3),
+          OpcodeStr, "$src3, $src2", "$src2, $src3",
+          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1))>,
+         AVX512FMA3Base;
+
+  let mayLoad = 1 in {
+    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+            (ins _.RC:$src2, _.MemOp:$src3),
+            OpcodeStr, "$src3, $src2", "$src2, $src3",
+            (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
+           AVX512FMA3Base;
+
+    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+           (ins _.RC:$src2, _.ScalarMemOp:$src3),
+           OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
+           "$src2, ${src3}"##_.BroadcastStr,
+           (_.VT (OpNode _.RC:$src2,
+                        (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
+                        _.RC:$src1))>, AVX512FMA3Base, EVEX_B;
   }
-let ExeDomain = SSEPackedDouble in {
-    defm  NAME##PDZ     : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v8f64_info, OpNode>,
-                          avx512_fma3_round_forms<opc213, OpcodeStr, v8f64_info,
-                                                  OpNodeRnd>, EVEX_V512, VEX_W;
-    defm  NAME##PDZ256  : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v4f64x_info, OpNode>,
-                                             EVEX_V256, VEX_W;
-    defm  NAME##PDZ128  : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v2f64x_info, OpNode>,
-                                             EVEX_V128, VEX_W;
+}
+
+multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            X86VectorVTInfo _> {
+  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
+          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
+          (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc)))>,
+          AVX512FMA3Base, EVEX_B, EVEX_RC;
+}
+} // Constraints = "$src1 = $dst"
+
+multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                     SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+  let Predicates = [HasAVX512] in {
+    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512>,
+                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
+                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+  }
+  let Predicates = [HasVLX, HasAVX512] in {
+    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256>,
+                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
+    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128>,
+                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
   }
 }
 
-defm VFMADD    : avx512_fma3p<0xA8, 0xB8, "vfmadd", X86Fmadd, X86FmaddRnd>;
-defm VFMSUB    : avx512_fma3p<0xAA, 0xBA, "vfmsub", X86Fmsub, X86FmsubRnd>;
-defm VFMADDSUB : avx512_fma3p<0xA6, 0xB6, "vfmaddsub", X86Fmaddsub, X86FmaddsubRnd>;
-defm VFMSUBADD : avx512_fma3p<0xA7, 0xB7, "vfmsubadd", X86Fmsubadd, X86FmsubaddRnd>;
-defm VFNMADD   : avx512_fma3p<0xAC, 0xBC, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
-defm VFNMSUB   : avx512_fma3p<0xAE, 0xBE, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
+multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            SDNode OpNodeRnd > {
+    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
+                                      avx512vl_f32_info>;
+    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
+                                      avx512vl_f64_info>, VEX_W;
+}
+
+defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
 
 let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                             X86VectorVTInfo _> {
-  let mayLoad = 1 in
-  def m: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
-          (ins _.RC:$src1, _.RC:$src3, _.MemOp:$src2),
-          !strconcat(OpcodeStr, "\t{$src2, $src3, $dst|$dst, $src3, $src2}"),
-          [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2),
-                                                    _.RC:$src3)))]>;
-   def mb: AVX512FMA3<opc, MRMSrcMem, (outs _.RC:$dst),
-           (ins _.RC:$src1, _.RC:$src3, _.ScalarMemOp:$src2),
-           !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr,
-            ", $src3, $dst|$dst, $src3, ${src2}", _.BroadcastStr, "}"),
-           [(set _.RC:$dst,
-               (OpNode _.RC:$src1, (_.VT (X86VBroadcast
-                                            (_.ScalarLdFrag addr:$src2))),
-                                   _.RC:$src3))]>, EVEX_B;
+multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            X86VectorVTInfo _> {
+  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+          (ins _.RC:$src3, _.RC:$src2),
+          OpcodeStr, "$src2, $src3", "$src3, $src2",
+          (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+         AVX512FMA3Base;
+
+  let mayLoad = 1 in {
+    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+            (ins _.RC:$src3, _.MemOp:$src2),
+            OpcodeStr, "$src2, $src3", "$src3, $src2",
+            (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2), _.RC:$src3))>,
+           AVX512FMA3Base;
+
+    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+           (ins _.RC:$src3, _.ScalarMemOp:$src2),
+           OpcodeStr, "${src2}"##_.BroadcastStr##", $src3",
+           "$src3, ${src2}"##_.BroadcastStr,
+           (_.VT (OpNode _.RC:$src1,
+                        (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+                        _.RC:$src3))>, AVX512FMA3Base, EVEX_B;
+  }
 }
-} // Constraints = "$src1 = $dst"
 
-multiclass avx512_fma3p_m132_f<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            X86VectorVTInfo _> {
+  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+          (ins _.RC:$src3, _.RC:$src2, AVX512RC:$rc),
+          OpcodeStr, "$rc, $src2, $src3", "$src3, $src2, $rc",
+          (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
+          AVX512FMA3Base, EVEX_B, EVEX_RC;
+}
+} // Constraints = "$src1 = $dst"
 
-let ExeDomain = SSEPackedSingle in {
-    defm NAME##PSZ      : avx512_fma3p_m132<opc, OpcodeStr##ps,
-                                             OpNode,v16f32_info>, EVEX_V512,
-                                             EVEX_CD8<32, CD8VF>;
-    defm NAME##PSZ256   : avx512_fma3p_m132<opc, OpcodeStr##ps,
-                                             OpNode, v8f32x_info>, EVEX_V256,
-                                             EVEX_CD8<32, CD8VF>;
-    defm NAME##PSZ128   : avx512_fma3p_m132<opc, OpcodeStr##ps,
-                                             OpNode, v4f32x_info>, EVEX_V128,
-                                             EVEX_CD8<32, CD8VF>;
+multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                     SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+  let Predicates = [HasAVX512] in {
+    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512>,
+                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
+                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
   }
-let ExeDomain = SSEPackedDouble in {
-    defm  NAME##PDZ       : avx512_fma3p_m132<opc, OpcodeStr##pd,
-                                           OpNode, v8f64_info>, EVEX_V512,
-                                           VEX_W, EVEX_CD8<32, CD8VF>;
-    defm  NAME##PDZ256    : avx512_fma3p_m132<opc, OpcodeStr##pd,
-                                           OpNode, v4f64x_info>, EVEX_V256,
-                                           VEX_W, EVEX_CD8<32, CD8VF>;
-    defm  NAME##PDZ128    : avx512_fma3p_m132<opc, OpcodeStr##pd,
-                                           OpNode, v2f64x_info>, EVEX_V128,
-                                           VEX_W, EVEX_CD8<32, CD8VF>;
+  let Predicates = [HasVLX, HasAVX512] in {
+    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256>,
+                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
+    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128>,
+                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
   }
 }
 
-defm VFMADD132    : avx512_fma3p_m132_f<0x98, "vfmadd132", X86Fmadd>;
-defm VFMSUB132    : avx512_fma3p_m132_f<0x9A, "vfmsub132", X86Fmsub>;
-defm VFMADDSUB132 : avx512_fma3p_m132_f<0x96, "vfmaddsub132", X86Fmaddsub>;
-defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>;
-defm VFNMADD132   : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>;
-defm VFNMSUB132   : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>;
+multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            SDNode OpNodeRnd > {
+    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
+                                      avx512vl_f32_info>;
+    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
+                                      avx512vl_f64_info>, VEX_W;
+}
+
+defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
+defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
+defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
+defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
 
 // Scalar FMA
 let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                 RegisterClass RC, ValueType OpVT,
-                 X86MemOperand x86memop, Operand memop,
-                 PatFrag mem_frag> {
-  let isCommutable = 1 in
-  def r     : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
-                   (ins RC:$src1, RC:$src2, RC:$src3),
-                   !strconcat(OpcodeStr,
-                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                   [(set RC:$dst,
-                     (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
+multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                               dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
+                                                        dag RHS_r, dag RHS_m > {
+  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
+          "$src3, $src2", "$src2, $src3", RHS_VEC_r>, AVX512FMA3Base;
+
   let mayLoad = 1 in
-  def m     : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
-                   (ins RC:$src1, RC:$src2, f128mem:$src3),
-                   !strconcat(OpcodeStr,
+    defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+            (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr,
+            "$src3, $src2", "$src2, $src3", RHS_VEC_m>, AVX512FMA3Base;
+
+  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
+         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb>,
+                                       AVX512FMA3Base, EVEX_B, EVEX_RC;
+
+  let isCodeGenOnly = 1 in {
+    def r     : AVX512FMA3<opc, MRMSrcReg, (outs _.FRC:$dst),
+                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
+                     !strconcat(OpcodeStr,
                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                   [(set RC:$dst,
-                     (OpVT (OpNode RC:$src2, RC:$src1,
-                            (mem_frag addr:$src3))))]>;
+                     [RHS_r]>;
+    let mayLoad = 1 in
+      def m     : AVX512FMA3<opc, MRMSrcMem, (outs _.FRC:$dst),
+                      (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
+                      !strconcat(OpcodeStr,
+                                 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                      [RHS_m]>;
+  }// isCodeGenOnly = 1
+}
+}// Constraints = "$src1 = $dst"
+
+multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
+         string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86VectorVTInfo _ ,
+                                                                  string SUFF> {
+
+  defm NAME#213#SUFF: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ ,
+                (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
+                (_.VT (OpNode _.RC:$src2, _.RC:$src1,
+                         (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))))),
+                (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src1, _.RC:$src3,
+                         (i32 imm:$rc))),
+                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
+                         _.FRC:$src3))),
+                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
+                         (_.ScalarLdFrag addr:$src3))))>;
+
+  defm NAME#231#SUFF: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
+                (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)),
+                (_.VT (OpNode _.RC:$src2,
+                       (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
+                              _.RC:$src1)),
+                (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src3, _.RC:$src1,
+                                  (i32 imm:$rc))),
+                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
+                                          _.FRC:$src1))),
+                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
+                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1)))>;
+
+  defm NAME#132#SUFF: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
+                (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)),
+                (_.VT (OpNode _.RC:$src1,
+                       (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
+                              _.RC:$src2)),
+                (_.VT ( OpNodeRnd _.RC:$src1, _.RC:$src3, _.RC:$src2,
+                         (i32 imm:$rc))),
+                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
+                         _.FRC:$src2))),
+                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1,
+                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2)))>;
+}
+
+multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
+                             string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd>{
+  let Predicates = [HasAVX512] in {
+    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
+                                   OpNodeRnd, f32x_info, "SS">,
+                                   EVEX_CD8<32, CD8VT1>, VEX_LIG;
+    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
+                                   OpNodeRnd, f64x_info, "SD">,
+                                   EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
+  }
 }
-} // Constraints = "$src1 = $dst"
 
-defm VFMADDSSZ  : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
-                      f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
-defm VFMADDSDZ  : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
-                      f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VFMSUBSSZ  : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
-                      f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
-defm VFMSUBSDZ  : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
-                      f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VFNMADDSSZ  : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
-                      f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
-defm VFNMADDSDZ  : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
-                      f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VFNMSUBSSZ  : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
-                      f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
-defm VFNMSUBSDZ  : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
-                      f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
+defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
+defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
+defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512  Scalar convert from sign integer to float/double
@@ -5427,10 +5599,11 @@ defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
 
 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                          X86MemOperand memop, PatFrag GatherNode> {
-  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
+  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
+      ExeDomain = _.ExeDomain in
   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
             (ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
-            !strconcat(OpcodeStr,
+            !strconcat(OpcodeStr#_.Suffix,
             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
             [(set _.RC:$dst, _.KRCWM:$mask_wb,
               (GatherNode  (_.VT _.RC:$src1), _.KRCWM:$mask,
@@ -5438,67 +5611,104 @@ multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
              EVEX_CD8<_.EltSize, CD8VT1>;
 }
 
-let ExeDomain = SSEPackedDouble in {
-defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", v8f64_info, vy64xmem,
-                                 mgatherv8i32>, EVEX_V512, VEX_W;
-defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", v8f64_info, vz64mem,
-                                 mgatherv8i64>, EVEX_V512, VEX_W;
+multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
+                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
+  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
+                                      vy32xmem, mgatherv8i32>, EVEX_V512, VEX_W;
+  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
+                                      vz64mem,  mgatherv8i64>, EVEX_V512, VEX_W;
+let Predicates = [HasVLX] in {
+  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
+                              vx32xmem, mgatherv4i32>, EVEX_V256, VEX_W;
+  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
+                              vy64xmem, mgatherv4i64>, EVEX_V256, VEX_W;
+  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
+                              vx32xmem, mgatherv4i32>, EVEX_V128, VEX_W;
+  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
+                              vx64xmem, mgatherv2i64>, EVEX_V128, VEX_W;
+}
+}
+
+multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
+                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
+  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz32mem,
+                                       mgatherv16i32>, EVEX_V512;
+  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz64mem,
+                                       mgatherv8i64>, EVEX_V512;
+let Predicates = [HasVLX] in {
+  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
+                                          vy32xmem, mgatherv8i32>, EVEX_V256;
+  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
+                                          vy64xmem, mgatherv4i64>, EVEX_V256;
+  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
+                                          vx32xmem, mgatherv4i32>, EVEX_V128;
+  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
+                                          vx64xmem, mgatherv2i64>, EVEX_V128;
 }
-
-let ExeDomain = SSEPackedSingle in {
-defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", v16f32_info, vz32mem,
-                                 mgatherv16i32>, EVEX_V512;
-defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", v8f32x_info, vz64mem,
-                                 mgatherv8i64>,  EVEX_V512;
 }
 
-defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", v8i64_info,  vy64xmem,
-                                 mgatherv8i32>, EVEX_V512, VEX_W;
-defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", v16i32_info, vz32mem,
-                                 mgatherv16i32>, EVEX_V512;
 
-defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", v8i64_info,  vz64mem,
-                                 mgatherv8i64>, EVEX_V512, VEX_W;
-defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", v8i32x_info,  vz64mem,
-                                 mgatherv8i64>, EVEX_V512;
+defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
+               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
+
+defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
+                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
 
 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                           X86MemOperand memop, PatFrag ScatterNode> {
 
-let mayStore = 1, Constraints = "$mask = $mask_wb" in
+let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
 
   def mr  : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
             (ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
-            !strconcat(OpcodeStr,
+            !strconcat(OpcodeStr#_.Suffix,
             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
             [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
                                      _.KRCWM:$mask,  vectoraddr:$dst))]>,
             EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
 }
 
-let ExeDomain = SSEPackedDouble in {
-defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", v8f64_info, vy64xmem,
-                                   mscatterv8i32>, EVEX_V512, VEX_W;
-defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", v8f64_info, vz64mem,
-                                   mscatterv8i64>, EVEX_V512, VEX_W;
+multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
+                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
+  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
+                                      vy32xmem, mscatterv8i32>, EVEX_V512, VEX_W;
+  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
+                                      vz64mem,  mscatterv8i64>, EVEX_V512, VEX_W;
+let Predicates = [HasVLX] in {
+  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
+                              vx32xmem, mscatterv4i32>, EVEX_V256, VEX_W;
+  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
+                              vy64xmem, mscatterv4i64>, EVEX_V256, VEX_W;
+  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
+                              vx32xmem, mscatterv4i32>, EVEX_V128, VEX_W;
+  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
+                              vx64xmem, mscatterv2i64>, EVEX_V128, VEX_W;
+}
+}
+
+multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
+                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
+  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz32mem,
+                                       mscatterv16i32>, EVEX_V512;
+  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz64mem,
+                                       mscatterv8i64>, EVEX_V512;
+let Predicates = [HasVLX] in {
+  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
+                                          vy32xmem, mscatterv8i32>, EVEX_V256;
+  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
+                                          vy64xmem, mscatterv4i64>, EVEX_V256;
+  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
+                                          vx32xmem, mscatterv4i32>, EVEX_V128;
+  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
+                                          vx64xmem, mscatterv2i64>, EVEX_V128;
 }
-
-let ExeDomain = SSEPackedSingle in {
-defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", v16f32_info, vz32mem,
-                                   mscatterv16i32>, EVEX_V512;
-defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", v8f32x_info, vz64mem,
-                                   mscatterv8i64>, EVEX_V512;
 }
 
-defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", v8i64_info, vy64xmem,
-                                   mscatterv8i32>, EVEX_V512, VEX_W;
-defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", v16i32_info, vz32mem,
-                                   mscatterv16i32>, EVEX_V512;
+defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
+               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
 
-defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", v8i64_info, vz64mem,
-                                   mscatterv8i64>, EVEX_V512, VEX_W;
-defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", v8i32x_info, vz64mem,
-                                   mscatterv8i64>, EVEX_V512;
+defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
+                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
 
 // prefetch
 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
@@ -5599,77 +5809,6 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1,
 def v16i1sextv16i32  : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
 def v8i1sextv8i64  : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
 
-multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
-                        RegisterClass KRC, RegisterClass RC,
-                        X86MemOperand x86memop, X86MemOperand x86scalar_mop,
-                        string BrdcstStr> {
-  def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
-            !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-            []>, EVEX;
-  def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
-             !strconcat(OpcodeStr, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
-             []>, EVEX, EVEX_K;
-  def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
-              !strconcat(OpcodeStr,
-                         "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
-              []>, EVEX, EVEX_KZ;
-  let mayLoad = 1 in {
-    def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
-              (ins x86memop:$src),
-              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-              []>, EVEX;
-    def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
-               (ins KRC:$mask, x86memop:$src),
-               !strconcat(OpcodeStr,
-                          "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
-               []>, EVEX, EVEX_K;
-    def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
-                (ins KRC:$mask, x86memop:$src),
-                !strconcat(OpcodeStr,
-                           "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
-                []>, EVEX, EVEX_KZ;
-    def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
-               (ins x86scalar_mop:$src),
-               !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
-                          ", $dst|$dst, ${src}", BrdcstStr, "}"),
-               []>, EVEX, EVEX_B;
-    def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
-                (ins KRC:$mask, x86scalar_mop:$src),
-                !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
-                           ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
-                []>, EVEX, EVEX_B, EVEX_K;
-    def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
-                 (ins KRC:$mask, x86scalar_mop:$src),
-                 !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
-                            ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
-                            BrdcstStr, "}"),
-                 []>, EVEX, EVEX_B, EVEX_KZ;
-  }
-}
-
-defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
-                           i512mem, i32mem, "{1to16}">, EVEX_V512,
-                           EVEX_CD8<32, CD8VF>;
-defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
-                           i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
-                           EVEX_CD8<64, CD8VF>;
-
-def : Pat<(xor
-          (bc_v16i32 (v16i1sextv16i32)),
-          (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
-          (VPABSDZrr VR512:$src)>;
-def : Pat<(xor
-          (bc_v8i64 (v8i1sextv8i64)),
-          (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
-          (VPABSQZrr VR512:$src)>;
-
-def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
-                   (v16i32 immAllZerosV), (i16 -1))),
-          (VPABSDZrr VR512:$src)>;
-def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
-                   (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
-          (VPABSQZrr VR512:$src)>;
-
 multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
                         RegisterClass RC, RegisterClass KRC,
                         X86MemOperand x86memop,
@@ -5868,26 +6007,24 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
 //===----------------------------------------------------------------------===//
 // AVX-512 - COMPRESS and EXPAND
 //
+
 multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
                                  string OpcodeStr> {
-  def rrkz : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
-              (ins _.KRCWM:$mask, _.RC:$src),
-              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
-              [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
-                                      _.ImmAllZerosV)))]>, EVEX_KZ;
-
-  let Constraints = "$src0 = $dst" in
-  def rrk : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
-                    (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
-                    OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
-                    [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
-                                            _.RC:$src0)))]>, EVEX_K;
+  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
+              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 
+              (_.VT (X86compress _.RC:$src1))>, AVX5128IBase;
 
   let mayStore = 1 in {
+  def mr : AVX5128I<opc, MRMDestMem, (outs),
+              (ins _.MemOp:$dst, _.RC:$src),
+              OpcodeStr # "\t{$src, $dst |$dst, $src}",
+              []>, EVEX_CD8<_.EltSize, CD8VT1>;
+
   def mrk : AVX5128I<opc, MRMDestMem, (outs),
               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
               OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
-              [(store (_.VT (X86compress _.KRCWM:$mask, _.RC:$src, undef)),
+              [(store (_.VT (vselect _.KRCWM:$mask, 
+                             (_.VT (X86compress  _.RC:$src)), _.ImmAllZerosV)),
                 addr:$dst)]>,
               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
   }
@@ -5915,37 +6052,16 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info
 // expand
 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
                                  string OpcodeStr> {
-  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
-              (ins _.KRCWM:$mask, _.RC:$src),
-              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
-              [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask, (_.VT _.RC:$src),
-                                      _.ImmAllZerosV)))]>, EVEX_KZ;
-
-  let Constraints = "$src0 = $dst" in
-  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
-                    (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
-                    OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
-                    [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
-                                      (_.VT _.RC:$src), _.RC:$src0)))]>, EVEX_K;
-
-  let mayLoad = 1, Constraints = "$src0 = $dst" in
-  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
-              (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src),
-              OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
-              [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
-                                      (_.VT (bitconvert
-                                              (_.LdFrag addr:$src))),
-                                      _.RC:$src0)))]>,
-              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
+  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 
+              (_.VT (X86expand _.RC:$src1))>, AVX5128IBase;
 
   let mayLoad = 1 in
-  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
-              (ins _.KRCWM:$mask, _.MemOp:$src),
-              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
-              [(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
-                                      (_.VT (bitconvert (_.LdFrag addr:$src))),
-                                     _.ImmAllZerosV)))]>,
-              EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>;
+  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
+              (_.VT (X86expand (_.VT (bitconvert
+                                      (_.LdFrag addr:$src1)))))>,
+            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>;
 }
 
 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
@@ -6175,3 +6291,91 @@ defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>,
                                                   EVEX_CD8<32, CD8VF>;
 defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>,
                                                   EVEX_CD8<64, CD8VF>, VEX_W;
+
+multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           X86VectorVTInfo _> {
+  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                    (ins _.RC:$src1), OpcodeStr##_.Suffix,
+                    "$src1", "$src1",
+                    (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;
+
+  let mayLoad = 1 in
+    defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                    (ins _.MemOp:$src1), OpcodeStr##_.Suffix,
+                    "$src1", "$src1",
+                    (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
+              EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
+}
+
+multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            X86VectorVTInfo _> :
+           avx512_unary_rm<opc, OpcodeStr, OpNode, _> {
+  let mayLoad = 1 in
+    defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                    (ins _.ScalarMemOp:$src1), OpcodeStr##_.Suffix,
+                    "${src1}"##_.BroadcastStr,
+                    "${src1}"##_.BroadcastStr,
+                    (_.VT (OpNode (X86VBroadcast
+                                      (_.ScalarLdFrag addr:$src1))))>,
+               EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+}
+
+multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+  let Predicates = [prd] in
+    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
+
+  let Predicates = [prd, HasVLX] in {
+    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
+                              EVEX_V256;
+    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info128>,
+                              EVEX_V128;
+  }
+}
+
+multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+  let Predicates = [prd] in
+    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
+                              EVEX_V512;
+
+  let Predicates = [prd, HasVLX] in {
+    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
+                                 EVEX_V256;
+    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
+                                 EVEX_V128;
+  }
+}
+
+multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
+                                 SDNode OpNode, Predicate prd> {
+  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr, OpNode, avx512vl_i64_info,
+                               prd>, VEX_W;
+  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr, OpNode, avx512vl_i32_info, prd>;
+}
+
+multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
+                                 SDNode OpNode, Predicate prd> {
+  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr, OpNode, avx512vl_i16_info, prd>;
+  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr, OpNode, avx512vl_i8_info, prd>;
+}
+
+multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
+                                  bits<8> opc_d, bits<8> opc_q,
+                                  string OpcodeStr, SDNode OpNode> {
+  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
+                                    HasAVX512>,
+              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
+                                    HasBWI>;
+}
+
+defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>;
+
+def : Pat<(xor
+          (bc_v16i32 (v16i1sextv16i32)),
+          (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
+          (VPABSDZrr VR512:$src)>;
+def : Pat<(xor
+          (bc_v8i64 (v8i1sextv8i64)),
+          (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
+          (VPABSQZrr VR512:$src)>;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index eb4dc48..2056056 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -179,6 +179,6 @@ addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
     .addConstantPoolIndex(CPI, 0, OpFlags).addReg(0);
 }
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 912a0fb..7f850d6 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -869,6 +869,7 @@ def : Pat<(i32 (X86Wrapper tjumptable  :$dst)), (MOV32ri tjumptable  :$dst)>;
 def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
 def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
 def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+def : Pat<(i32 (X86Wrapper mcsym:$dst)), (MOV32ri mcsym:$dst)>;
 def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
 
 def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
@@ -879,6 +880,8 @@ def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
           (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
 def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
           (ADD32ri GR32:$src1, texternalsym:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper mcsym:$src2)),
+          (ADD32ri GR32:$src1, mcsym:$src2)>;
 def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
           (ADD32ri GR32:$src1, tblockaddress:$src2)>;
 
@@ -886,6 +889,8 @@ def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
           (MOV32mi addr:$dst, tglobaladdr:$src)>;
 def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
           (MOV32mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i32 (X86Wrapper mcsym:$src)), addr:$dst),
+          (MOV32mi addr:$dst, mcsym:$src)>;
 def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
           (MOV32mi addr:$dst, tblockaddress:$src)>;
 
@@ -900,6 +905,8 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
           (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
           (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper mcsym:$dst)),
+          (MOV64ri mcsym:$dst)>, Requires<[FarData]>;
 def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
           (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
 
@@ -914,6 +921,8 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
           (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
           (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper mcsym:$dst)),
+          (MOV64ri32 mcsym:$dst)>, Requires<[KernelCode]>;
 def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
           (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
 
@@ -932,12 +941,15 @@ def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
 def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
           (MOV64mi32 addr:$dst, texternalsym:$src)>,
           Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper mcsym:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, mcsym:$src)>,
+          Requires<[NearData, IsStatic]>;
 def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
           (MOV64mi32 addr:$dst, tblockaddress:$src)>,
           Requires<[NearData, IsStatic]>;
 
-def : Pat<(i32 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
-def : Pat<(i64 (X86RecoverFrameAlloc texternalsym:$dst)), (MOV64ri texternalsym:$dst)>;
+def : Pat<(i32 (X86RecoverFrameAlloc mcsym:$dst)), (MOV32ri mcsym:$dst)>;
+def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>;
 
 // Calls
 
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 0dd05d8..49068e9 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -633,16 +633,16 @@ def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", [], IIC_FRNDINT>;
 def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", [], IIC_FSCALE>;
 def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>;
 
-def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
-               "fxsave\t$dst", [], IIC_FXSAVE>, TB;
-def FXSAVE64 : RI<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
-                  "fxsave64\t$dst", [], IIC_FXSAVE>, TB,
-                  Requires<[In64BitMode]>;
+def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
+               "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
+def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
+                  "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)], 
+                  IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
 def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
-                "fxrstor\t$src", [], IIC_FXRSTOR>, TB;
+              "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB;
 def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
-                  "fxrstor64\t$src", [], IIC_FXRSTOR>, TB,
-                  Requires<[In64BitMode]>;
+                   "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)],
+                   IIC_FXRSTOR>, TB, Requires<[In64BitMode]>;
 } // SchedRW
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 16ae77d..fe245c3 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -251,6 +251,7 @@ def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
 
 def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
 def X86VAlign  : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
+def X86Abs     : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
 
 def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
 def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
@@ -310,6 +311,7 @@ def X86fsubRnd   : SDNode<"X86ISD::FSUB_RND",  SDTFPBinOpRound>;
 def X86fmulRnd   : SDNode<"X86ISD::FMUL_RND",  SDTFPBinOpRound>;
 def X86fdivRnd   : SDNode<"X86ISD::FDIV_RND",  SDTFPBinOpRound>;
 def X86fmaxRnd   : SDNode<"X86ISD::FMAX_RND",      SDTFPBinOpRound>;
+def X86scalef    : SDNode<"X86ISD::SCALEF",    SDTFPBinOpRound>;
 def X86fminRnd   : SDNode<"X86ISD::FMIN_RND",      SDTFPBinOpRound>;
 def X86fsqrtRnd     : SDNode<"X86ISD::FSQRT_RND",  SDTFPUnaryOpRound>;
 def X86fgetexpRnd   : SDNode<"X86ISD::FGETEXP_RND",  SDTFPUnaryOpRound>;
@@ -347,12 +349,10 @@ def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
 def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
 def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
 
-def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
-                              [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
-                               SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
-def X86expand  : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
-                              [SDTCisSameAs<0, 3>,
-                               SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
+def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1,
+                              [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
+def X86expand  : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
+                              [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
 
 def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
                                SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
@@ -561,6 +561,14 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
   return false;
 }]>;
 
+def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+  (masked_gather node:$src1, node:$src2, node:$src3) , [{
+  if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+    return (Mgt->getIndex().getValueType() == MVT::v4i32 ||
+            Mgt->getBasePtr().getValueType() == MVT::v4i32);
+  return false;
+}]>;
+
 def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
   (masked_gather node:$src1, node:$src2, node:$src3) , [{
   if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
@@ -569,6 +577,20 @@ def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
   return false;
 }]>;
 
+def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+  (masked_gather node:$src1, node:$src2, node:$src3) , [{
+  if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+    return (Mgt->getIndex().getValueType() == MVT::v2i64 ||
+            Mgt->getBasePtr().getValueType() == MVT::v2i64);
+  return false;
+}]>;
+def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+  (masked_gather node:$src1, node:$src2, node:$src3) , [{
+  if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
+    return (Mgt->getIndex().getValueType() == MVT::v4i64 ||
+            Mgt->getBasePtr().getValueType() == MVT::v4i64);
+  return false;
+}]>;
 def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
   (masked_gather node:$src1, node:$src2, node:$src3) , [{
   if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
@@ -584,6 +606,30 @@ def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
   return false;
 }]>;
 
+def mscatterv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+  (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+  if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+    return (Sc->getIndex().getValueType() == MVT::v2i64 ||
+            Sc->getBasePtr().getValueType() == MVT::v2i64);
+  return false;
+}]>;
+
+def mscatterv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+  (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+  if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+    return (Sc->getIndex().getValueType() == MVT::v4i32 ||
+            Sc->getBasePtr().getValueType() == MVT::v4i32);
+  return false;
+}]>;
+
+def mscatterv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+  (masked_scatter node:$src1, node:$src2, node:$src3) , [{
+  if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
+    return (Sc->getIndex().getValueType() == MVT::v4i64 ||
+            Sc->getBasePtr().getValueType() == MVT::v4i64);
+  return false;
+}]>;
+
 def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
   (masked_scatter node:$src1, node:$src2, node:$src3) , [{
   if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 4aa0ae6..b92ba99 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1577,38 +1577,38 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPXORYrr,          X86::VPXORYrm,           0 },
 
     // FMA4 foldable patterns
-    { X86::VFMADDSS4rr,       X86::VFMADDSS4mr,        0 },
-    { X86::VFMADDSD4rr,       X86::VFMADDSD4mr,        0 },
-    { X86::VFMADDPS4rr,       X86::VFMADDPS4mr,        0 },
-    { X86::VFMADDPD4rr,       X86::VFMADDPD4mr,        0 },
-    { X86::VFMADDPS4rrY,      X86::VFMADDPS4mrY,       0 },
-    { X86::VFMADDPD4rrY,      X86::VFMADDPD4mrY,       0 },
-    { X86::VFNMADDSS4rr,      X86::VFNMADDSS4mr,       0 },
-    { X86::VFNMADDSD4rr,      X86::VFNMADDSD4mr,       0 },
-    { X86::VFNMADDPS4rr,      X86::VFNMADDPS4mr,       0 },
-    { X86::VFNMADDPD4rr,      X86::VFNMADDPD4mr,       0 },
-    { X86::VFNMADDPS4rrY,     X86::VFNMADDPS4mrY,      0 },
-    { X86::VFNMADDPD4rrY,     X86::VFNMADDPD4mrY,      0 },
-    { X86::VFMSUBSS4rr,       X86::VFMSUBSS4mr,        0 },
-    { X86::VFMSUBSD4rr,       X86::VFMSUBSD4mr,        0 },
-    { X86::VFMSUBPS4rr,       X86::VFMSUBPS4mr,        0 },
-    { X86::VFMSUBPD4rr,       X86::VFMSUBPD4mr,        0 },
-    { X86::VFMSUBPS4rrY,      X86::VFMSUBPS4mrY,       0 },
-    { X86::VFMSUBPD4rrY,      X86::VFMSUBPD4mrY,       0 },
-    { X86::VFNMSUBSS4rr,      X86::VFNMSUBSS4mr,       0 },
-    { X86::VFNMSUBSD4rr,      X86::VFNMSUBSD4mr,       0 },
-    { X86::VFNMSUBPS4rr,      X86::VFNMSUBPS4mr,       0 },
-    { X86::VFNMSUBPD4rr,      X86::VFNMSUBPD4mr,       0 },
-    { X86::VFNMSUBPS4rrY,     X86::VFNMSUBPS4mrY,      0 },
-    { X86::VFNMSUBPD4rrY,     X86::VFNMSUBPD4mrY,      0 },
-    { X86::VFMADDSUBPS4rr,    X86::VFMADDSUBPS4mr,     0 },
-    { X86::VFMADDSUBPD4rr,    X86::VFMADDSUBPD4mr,     0 },
-    { X86::VFMADDSUBPS4rrY,   X86::VFMADDSUBPS4mrY,    0 },
-    { X86::VFMADDSUBPD4rrY,   X86::VFMADDSUBPD4mrY,    0 },
-    { X86::VFMSUBADDPS4rr,    X86::VFMSUBADDPS4mr,     0 },
-    { X86::VFMSUBADDPD4rr,    X86::VFMSUBADDPD4mr,     0 },
-    { X86::VFMSUBADDPS4rrY,   X86::VFMSUBADDPS4mrY,    0 },
-    { X86::VFMSUBADDPD4rrY,   X86::VFMSUBADDPD4mrY,    0 },
+    { X86::VFMADDSS4rr,       X86::VFMADDSS4mr,        TB_ALIGN_NONE },
+    { X86::VFMADDSD4rr,       X86::VFMADDSD4mr,        TB_ALIGN_NONE },
+    { X86::VFMADDPS4rr,       X86::VFMADDPS4mr,        TB_ALIGN_NONE },
+    { X86::VFMADDPD4rr,       X86::VFMADDPD4mr,        TB_ALIGN_NONE },
+    { X86::VFMADDPS4rrY,      X86::VFMADDPS4mrY,       TB_ALIGN_NONE },
+    { X86::VFMADDPD4rrY,      X86::VFMADDPD4mrY,       TB_ALIGN_NONE },
+    { X86::VFNMADDSS4rr,      X86::VFNMADDSS4mr,       TB_ALIGN_NONE },
+    { X86::VFNMADDSD4rr,      X86::VFNMADDSD4mr,       TB_ALIGN_NONE },
+    { X86::VFNMADDPS4rr,      X86::VFNMADDPS4mr,       TB_ALIGN_NONE },
+    { X86::VFNMADDPD4rr,      X86::VFNMADDPD4mr,       TB_ALIGN_NONE },
+    { X86::VFNMADDPS4rrY,     X86::VFNMADDPS4mrY,      TB_ALIGN_NONE },
+    { X86::VFNMADDPD4rrY,     X86::VFNMADDPD4mrY,      TB_ALIGN_NONE },
+    { X86::VFMSUBSS4rr,       X86::VFMSUBSS4mr,        TB_ALIGN_NONE },
+    { X86::VFMSUBSD4rr,       X86::VFMSUBSD4mr,        TB_ALIGN_NONE },
+    { X86::VFMSUBPS4rr,       X86::VFMSUBPS4mr,        TB_ALIGN_NONE },
+    { X86::VFMSUBPD4rr,       X86::VFMSUBPD4mr,        TB_ALIGN_NONE },
+    { X86::VFMSUBPS4rrY,      X86::VFMSUBPS4mrY,       TB_ALIGN_NONE },
+    { X86::VFMSUBPD4rrY,      X86::VFMSUBPD4mrY,       TB_ALIGN_NONE },
+    { X86::VFNMSUBSS4rr,      X86::VFNMSUBSS4mr,       TB_ALIGN_NONE },
+    { X86::VFNMSUBSD4rr,      X86::VFNMSUBSD4mr,       TB_ALIGN_NONE },
+    { X86::VFNMSUBPS4rr,      X86::VFNMSUBPS4mr,       TB_ALIGN_NONE },
+    { X86::VFNMSUBPD4rr,      X86::VFNMSUBPD4mr,       TB_ALIGN_NONE },
+    { X86::VFNMSUBPS4rrY,     X86::VFNMSUBPS4mrY,      TB_ALIGN_NONE },
+    { X86::VFNMSUBPD4rrY,     X86::VFNMSUBPD4mrY,      TB_ALIGN_NONE },
+    { X86::VFMADDSUBPS4rr,    X86::VFMADDSUBPS4mr,     TB_ALIGN_NONE },
+    { X86::VFMADDSUBPD4rr,    X86::VFMADDSUBPD4mr,     TB_ALIGN_NONE },
+    { X86::VFMADDSUBPS4rrY,   X86::VFMADDSUBPS4mrY,    TB_ALIGN_NONE },
+    { X86::VFMADDSUBPD4rrY,   X86::VFMADDSUBPD4mrY,    TB_ALIGN_NONE },
+    { X86::VFMSUBADDPS4rr,    X86::VFMSUBADDPS4mr,     TB_ALIGN_NONE },
+    { X86::VFMSUBADDPD4rr,    X86::VFMSUBADDPD4mr,     TB_ALIGN_NONE },
+    { X86::VFMSUBADDPS4rrY,   X86::VFMSUBADDPS4mrY,    TB_ALIGN_NONE },
+    { X86::VFMSUBADDPD4rrY,   X86::VFMSUBADDPD4mrY,    TB_ALIGN_NONE },
 
     // XOP foldable instructions
     { X86::VPCMOVrr,          X86::VPCMOVmr,            0 },
@@ -1852,38 +1852,38 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VFMSUBADDPDr213rY,     X86::VFMSUBADDPDr213mY,     TB_ALIGN_NONE },
 
     // FMA4 foldable patterns
-    { X86::VFMADDSS4rr,           X86::VFMADDSS4rm,           0           },
-    { X86::VFMADDSD4rr,           X86::VFMADDSD4rm,           0           },
-    { X86::VFMADDPS4rr,           X86::VFMADDPS4rm,           TB_ALIGN_16 },
-    { X86::VFMADDPD4rr,           X86::VFMADDPD4rm,           TB_ALIGN_16 },
-    { X86::VFMADDPS4rrY,          X86::VFMADDPS4rmY,          TB_ALIGN_32 },
-    { X86::VFMADDPD4rrY,          X86::VFMADDPD4rmY,          TB_ALIGN_32 },
-    { X86::VFNMADDSS4rr,          X86::VFNMADDSS4rm,          0           },
-    { X86::VFNMADDSD4rr,          X86::VFNMADDSD4rm,          0           },
-    { X86::VFNMADDPS4rr,          X86::VFNMADDPS4rm,          TB_ALIGN_16 },
-    { X86::VFNMADDPD4rr,          X86::VFNMADDPD4rm,          TB_ALIGN_16 },
-    { X86::VFNMADDPS4rrY,         X86::VFNMADDPS4rmY,         TB_ALIGN_32 },
-    { X86::VFNMADDPD4rrY,         X86::VFNMADDPD4rmY,         TB_ALIGN_32 },
-    { X86::VFMSUBSS4rr,           X86::VFMSUBSS4rm,           0           },
-    { X86::VFMSUBSD4rr,           X86::VFMSUBSD4rm,           0           },
-    { X86::VFMSUBPS4rr,           X86::VFMSUBPS4rm,           TB_ALIGN_16 },
-    { X86::VFMSUBPD4rr,           X86::VFMSUBPD4rm,           TB_ALIGN_16 },
-    { X86::VFMSUBPS4rrY,          X86::VFMSUBPS4rmY,          TB_ALIGN_32 },
-    { X86::VFMSUBPD4rrY,          X86::VFMSUBPD4rmY,          TB_ALIGN_32 },
-    { X86::VFNMSUBSS4rr,          X86::VFNMSUBSS4rm,          0           },
-    { X86::VFNMSUBSD4rr,          X86::VFNMSUBSD4rm,          0           },
-    { X86::VFNMSUBPS4rr,          X86::VFNMSUBPS4rm,          TB_ALIGN_16 },
-    { X86::VFNMSUBPD4rr,          X86::VFNMSUBPD4rm,          TB_ALIGN_16 },
-    { X86::VFNMSUBPS4rrY,         X86::VFNMSUBPS4rmY,         TB_ALIGN_32 },
-    { X86::VFNMSUBPD4rrY,         X86::VFNMSUBPD4rmY,         TB_ALIGN_32 },
-    { X86::VFMADDSUBPS4rr,        X86::VFMADDSUBPS4rm,        TB_ALIGN_16 },
-    { X86::VFMADDSUBPD4rr,        X86::VFMADDSUBPD4rm,        TB_ALIGN_16 },
-    { X86::VFMADDSUBPS4rrY,       X86::VFMADDSUBPS4rmY,       TB_ALIGN_32 },
-    { X86::VFMADDSUBPD4rrY,       X86::VFMADDSUBPD4rmY,       TB_ALIGN_32 },
-    { X86::VFMSUBADDPS4rr,        X86::VFMSUBADDPS4rm,        TB_ALIGN_16 },
-    { X86::VFMSUBADDPD4rr,        X86::VFMSUBADDPD4rm,        TB_ALIGN_16 },
-    { X86::VFMSUBADDPS4rrY,       X86::VFMSUBADDPS4rmY,       TB_ALIGN_32 },
-    { X86::VFMSUBADDPD4rrY,       X86::VFMSUBADDPD4rmY,       TB_ALIGN_32 },
+    { X86::VFMADDSS4rr,           X86::VFMADDSS4rm,           TB_ALIGN_NONE },
+    { X86::VFMADDSD4rr,           X86::VFMADDSD4rm,           TB_ALIGN_NONE },
+    { X86::VFMADDPS4rr,           X86::VFMADDPS4rm,           TB_ALIGN_NONE },
+    { X86::VFMADDPD4rr,           X86::VFMADDPD4rm,           TB_ALIGN_NONE },
+    { X86::VFMADDPS4rrY,          X86::VFMADDPS4rmY,          TB_ALIGN_NONE },
+    { X86::VFMADDPD4rrY,          X86::VFMADDPD4rmY,          TB_ALIGN_NONE },
+    { X86::VFNMADDSS4rr,          X86::VFNMADDSS4rm,          TB_ALIGN_NONE },
+    { X86::VFNMADDSD4rr,          X86::VFNMADDSD4rm,          TB_ALIGN_NONE },
+    { X86::VFNMADDPS4rr,          X86::VFNMADDPS4rm,          TB_ALIGN_NONE },
+    { X86::VFNMADDPD4rr,          X86::VFNMADDPD4rm,          TB_ALIGN_NONE },
+    { X86::VFNMADDPS4rrY,         X86::VFNMADDPS4rmY,         TB_ALIGN_NONE },
+    { X86::VFNMADDPD4rrY,         X86::VFNMADDPD4rmY,         TB_ALIGN_NONE },
+    { X86::VFMSUBSS4rr,           X86::VFMSUBSS4rm,           TB_ALIGN_NONE },
+    { X86::VFMSUBSD4rr,           X86::VFMSUBSD4rm,           TB_ALIGN_NONE },
+    { X86::VFMSUBPS4rr,           X86::VFMSUBPS4rm,           TB_ALIGN_NONE },
+    { X86::VFMSUBPD4rr,           X86::VFMSUBPD4rm,           TB_ALIGN_NONE },
+    { X86::VFMSUBPS4rrY,          X86::VFMSUBPS4rmY,          TB_ALIGN_NONE },
+    { X86::VFMSUBPD4rrY,          X86::VFMSUBPD4rmY,          TB_ALIGN_NONE },
+    { X86::VFNMSUBSS4rr,          X86::VFNMSUBSS4rm,          TB_ALIGN_NONE },
+    { X86::VFNMSUBSD4rr,          X86::VFNMSUBSD4rm,          TB_ALIGN_NONE },
+    { X86::VFNMSUBPS4rr,          X86::VFNMSUBPS4rm,          TB_ALIGN_NONE },
+    { X86::VFNMSUBPD4rr,          X86::VFNMSUBPD4rm,          TB_ALIGN_NONE },
+    { X86::VFNMSUBPS4rrY,         X86::VFNMSUBPS4rmY,         TB_ALIGN_NONE },
+    { X86::VFNMSUBPD4rrY,         X86::VFNMSUBPD4rmY,         TB_ALIGN_NONE },
+    { X86::VFMADDSUBPS4rr,        X86::VFMADDSUBPS4rm,        TB_ALIGN_NONE },
+    { X86::VFMADDSUBPD4rr,        X86::VFMADDSUBPD4rm,        TB_ALIGN_NONE },
+    { X86::VFMADDSUBPS4rrY,       X86::VFMADDSUBPS4rmY,       TB_ALIGN_NONE },
+    { X86::VFMADDSUBPD4rrY,       X86::VFMADDSUBPD4rmY,       TB_ALIGN_NONE },
+    { X86::VFMSUBADDPS4rr,        X86::VFMSUBADDPS4rm,        TB_ALIGN_NONE },
+    { X86::VFMSUBADDPD4rr,        X86::VFMSUBADDPD4rm,        TB_ALIGN_NONE },
+    { X86::VFMSUBADDPS4rrY,       X86::VFMSUBADDPS4rmY,       TB_ALIGN_NONE },
+    { X86::VFMSUBADDPD4rrY,       X86::VFMSUBADDPD4rmY,       TB_ALIGN_NONE },
 
     // XOP foldable instructions
     { X86::VPCMOVrr,              X86::VPCMOVrm,              0 },
@@ -5295,21 +5295,57 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
                                Size, Alignment, /*AllowCommute=*/true);
 }
 
-static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
-                                  const MachineFunction &MF) {
+/// Check if \p LoadMI is a partial register load that we can't fold into \p MI
+/// because the latter uses contents that wouldn't be defined in the folded
+/// version.  For instance, this transformation isn't legal:
+///   movss (%rdi), %xmm0
+///   addps %xmm0, %xmm0
+/// ->
+///   addps (%rdi), %xmm0
+///
+/// But this one is:
+///   movss (%rdi), %xmm0
+///   addss %xmm0, %xmm0
+/// ->
+///   addss (%rdi), %xmm0
+///
+static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
+                                             const MachineInstr &UserMI,
+                                             const MachineFunction &MF) {
   unsigned Opc = LoadMI.getOpcode();
+  unsigned UserOpc = UserMI.getOpcode();
   unsigned RegSize =
       MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize();
 
-  if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4)
+  if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4) {
     // These instructions only load 32 bits, we can't fold them if the
-    // destination register is wider than 32 bits (4 bytes).
-    return true;
+    // destination register is wider than 32 bits (4 bytes), and its user
+    // instruction isn't scalar (SS).
+    switch (UserOpc) {
+    case X86::ADDSSrr_Int: case X86::VADDSSrr_Int:
+    case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int:
+    case X86::MULSSrr_Int: case X86::VMULSSrr_Int:
+    case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int:
+      return false;
+    default:
+      return true;
+    }
+  }
 
-  if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8)
+  if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8) {
     // These instructions only load 64 bits, we can't fold them if the
-    // destination register is wider than 64 bits (8 bytes).
-    return true;
+    // destination register is wider than 64 bits (8 bytes), and its user
+    // instruction isn't scalar (SD).
+    switch (UserOpc) {
+    case X86::ADDSDrr_Int: case X86::VADDSDrr_Int:
+    case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int:
+    case X86::MULSDrr_Int: case X86::VMULSDrr_Int:
+    case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int:
+      return false;
+    default:
+      return true;
+    }
+  }
 
   return false;
 }
@@ -5321,7 +5357,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
   unsigned NumOps = LoadMI->getDesc().getNumOperands();
   int FrameIndex;
   if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
-    if (isPartialRegisterLoad(*LoadMI, MF))
+    if (isNonFoldablePartialRegisterLoad(*LoadMI, *MI, MF))
       return nullptr;
     return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex);
   }
@@ -5434,7 +5470,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
     break;
   }
   default: {
-    if (isPartialRegisterLoad(*LoadMI, MF))
+    if (isNonFoldablePartialRegisterLoad(*LoadMI, *MI, MF))
       return nullptr;
 
     // Folding a normal load. Just copy the load's address operands.
@@ -6334,22 +6370,11 @@ hasHighOperandLatency(const TargetSchedModel &SchedModel,
   return isHighLatencyDef(DefMI->getOpcode());
 }
 
-/// If the input instruction is part of a chain of dependent ops that are
-/// suitable for reassociation, return the earlier instruction in the sequence
-/// that defines its first operand, otherwise return a nullptr.
-/// If the instruction's operands must be commuted to be considered a
-/// reassociation candidate, Commuted will be set to true.
-static MachineInstr *isReassocCandidate(const MachineInstr &Inst,
-                                        unsigned AssocOpcode,
-                                        bool checkPrevOneUse,
-                                        bool &Commuted) {
-  if (Inst.getOpcode() != AssocOpcode)
-    return nullptr;
-  
-  MachineOperand Op1 = Inst.getOperand(1);
-  MachineOperand Op2 = Inst.getOperand(2);
-  
-  const MachineBasicBlock *MBB = Inst.getParent();
+static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst,
+                                          const MachineBasicBlock *MBB) {
+  assert(Inst.getNumOperands() == 3 && "Reassociation needs binary operators");
+  const MachineOperand &Op1 = Inst.getOperand(1);
+  const MachineOperand &Op2 = Inst.getOperand(2);
   const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
 
   // We need virtual register definitions.
@@ -6359,80 +6384,99 @@ static MachineInstr *isReassocCandidate(const MachineInstr &Inst,
     MI1 = MRI.getUniqueVRegDef(Op1.getReg());
   if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
     MI2 = MRI.getUniqueVRegDef(Op2.getReg());
-  
+
   // And they need to be in the trace (otherwise, they won't have a depth).
-  if (!MI1 || !MI2 || MI1->getParent() != MBB || MI2->getParent() != MBB)
-    return nullptr;
-  
-  Commuted = false;
-  if (MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode) {
+  if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB)
+    return true;
+
+  return false;
+}
+
+static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) {
+  const MachineBasicBlock *MBB = Inst.getParent();
+  const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
+  MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
+  unsigned AssocOpcode = Inst.getOpcode();
+
+  // If only one operand has the same opcode and it's the second source operand,
+  // the operands must be commuted.
+  Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
+  if (Commuted)
     std::swap(MI1, MI2);
-    Commuted = true;
-  }
 
-  // Avoid reassociating operands when it won't provide any benefit. If both
-  // operands are produced by instructions of this type, we may already
-  // have the optimal sequence.
-  if (MI2->getOpcode() == AssocOpcode)
-    return nullptr;
-  
-  // The instruction must only be used by the other instruction that we
-  // reassociate with.
-  if (checkPrevOneUse && !MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
-    return nullptr;
-  
-  // We must match a simple chain of dependent ops.
-  // TODO: This check is not necessary for the earliest instruction in the
-  // sequence. Instead of a sequence of 3 dependent instructions with the same
-  // opcode, we only need to find a sequence of 2 dependent instructions with
-  // the same opcode plus 1 other instruction that adds to the height of the
-  // trace.
-  if (MI1->getOpcode() != AssocOpcode)
-    return nullptr;
+  // 1. The previous instruction must be the same type as Inst.
+  // 2. The previous instruction must have virtual register definitions for its
+  //    operands in the same basic block as Inst.
+  // 3. The previous instruction's result must only be used by Inst.
+  if (MI1->getOpcode() == AssocOpcode &&
+      hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
+      MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
+    return true;
   
-  return MI1;
+  return false;
 }
 
-/// Select a pattern based on how the operands of each associative operation
-/// need to be commuted.
-static MachineCombinerPattern::MC_PATTERN getPattern(bool CommutePrev,
-                                                     bool CommuteRoot) {
-  if (CommutePrev) {
-    if (CommuteRoot)
-      return MachineCombinerPattern::MC_REASSOC_XA_YB;
-    return MachineCombinerPattern::MC_REASSOC_XA_BY;
-  } else {
-    if (CommuteRoot)
-      return MachineCombinerPattern::MC_REASSOC_AX_YB;
-    return MachineCombinerPattern::MC_REASSOC_AX_BY;
-  }
+/// Return true if the input instruction is part of a chain of dependent ops
+/// that are suitable for reassociation, otherwise return false.
+/// If the instruction's operands must be commuted to have a previous
+/// instruction of the same type define the first source operand, Commuted will
+/// be set to true.
+static bool isReassocCandidate(const MachineInstr &Inst, unsigned AssocOpcode,
+                               bool &Commuted) {
+  // 1. The instruction must have the correct type.
+  // 2. The instruction must have virtual register definitions for its
+  //    operands in the same basic block.
+  // 3. The instruction must have a reassociatable sibling.
+  if (Inst.getOpcode() == AssocOpcode &&
+      hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) &&
+      hasReassocSibling(Inst, Commuted))
+    return true;
+
+  return false;
 }
 
+// FIXME: This has the potential to be expensive (compile time) while not
+// improving the code at all. Some ways to limit the overhead:
+// 1. Track successful transforms; bail out if hit rate gets too low.
+// 2. Only enable at -O3 or some other non-default optimization level.
+// 3. Pre-screen pattern candidates here: if an operand of the previous
+//    instruction is known to not increase the critical path, then don't match
+//    that pattern.
 bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
         SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
   if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
     return false;
 
+  // TODO: There is nothing x86-specific here except the instruction type.
+  // This logic could be hoisted into the machine combiner pass itself.
+
+  // Look for this reassociation pattern:
+  //   B = A op X (Prev)
+  //   C = B op Y (Root)
+
   // TODO: There are many more associative instruction types to match:
   //       1. Other forms of scalar FP add (non-AVX)
   //       2. Other data types (double, integer, vectors)
   //       3. Other math / logic operations (mul, and, or)
   unsigned AssocOpcode = X86::VADDSSrr;
 
-  // TODO: There is nothing x86-specific here except the instruction type.
-  // This logic could be hoisted into the machine combiner pass itself.
-  bool CommuteRoot;
-  if (MachineInstr *Prev = isReassocCandidate(Root, AssocOpcode, true,
-                                              CommuteRoot)) {
-    bool CommutePrev;
-    if (isReassocCandidate(*Prev, AssocOpcode, false, CommutePrev)) {
-      // We found a sequence of instructions that may be suitable for a
-      // reassociation of operands to increase ILP.
-      Patterns.push_back(getPattern(CommutePrev, CommuteRoot));
-      return true;
+  bool Commute = false;
+  if (isReassocCandidate(Root, AssocOpcode, Commute)) {
+    // We found a sequence of instructions that may be suitable for a
+    // reassociation of operands to increase ILP. Specify each commutation
+    // possibility for the Prev instruction in the sequence and let the
+    // machine combiner decide if changing the operands is worthwhile.
+    if (Commute) {
+      Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB);
+      Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB);
+    } else {
+      Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY);
+      Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY);
     }
+    return true;
   }
-  
+
   return false;
 }
 
@@ -6525,14 +6569,16 @@ void X86InstrInfo::genAlternativeCodeSequence(
 
   // Select the previous instruction in the sequence based on the input pattern.
   MachineInstr *Prev = nullptr;
-  if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_BY ||
-      Pattern == MachineCombinerPattern::MC_REASSOC_XA_BY)
-    Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
-  else if (Pattern == MachineCombinerPattern::MC_REASSOC_AX_YB ||
-           Pattern == MachineCombinerPattern::MC_REASSOC_XA_YB)
-    Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
-  else
-    llvm_unreachable("Unknown pattern for machine combiner");
+  switch (Pattern) {
+    case MachineCombinerPattern::MC_REASSOC_AX_BY:
+    case MachineCombinerPattern::MC_REASSOC_XA_BY:
+      Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+      break;
+    case MachineCombinerPattern::MC_REASSOC_AX_YB:
+    case MachineCombinerPattern::MC_REASSOC_XA_YB:
+      Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+  }
+  assert(Prev && "Unknown pattern for machine combiner");
   
   reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
   return;
@@ -6604,7 +6650,7 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-} // namespace
+}
 
 char CGBR::ID = 0;
 FunctionPass*
@@ -6716,7 +6762,7 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-} // namespace
+}
 
 char LDTLSCleanup::ID = 0;
 FunctionPass*
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 4912951..bf63336 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -90,7 +90,7 @@ namespace X86 {
   /// GetOppositeBranchCondition - Return the inverse of the specified cond,
   /// e.g. turning COND_E to COND_NE.
   CondCode GetOppositeBranchCondition(CondCode CC);
-} // namespace X86
+}  // end namespace X86;
 
 
 /// isGlobalStubReference - Return true if the specified TargetFlag operand is
@@ -512,6 +512,6 @@ private:
                       int &FrameIndex) const;
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index e936b4b..6f38cb8 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -282,6 +282,10 @@ let RenderMethod = "addMemOperands" in {
   def X86MemVX64Operand : AsmOperandClass { let Name = "MemVX64"; }
   def X86MemVY64Operand : AsmOperandClass { let Name = "MemVY64"; }
   def X86MemVZ64Operand : AsmOperandClass { let Name = "MemVZ64"; }
+  def X86MemVX32XOperand : AsmOperandClass { let Name = "MemVX32X"; }
+  def X86MemVY32XOperand : AsmOperandClass { let Name = "MemVY32X"; }
+  def X86MemVX64XOperand : AsmOperandClass { let Name = "MemVX64X"; }
+  def X86MemVY64XOperand : AsmOperandClass { let Name = "MemVY64X"; }
 }
 
 def X86AbsMemAsmOperand : AsmOperandClass {
@@ -332,7 +336,11 @@ def vx32mem  : X86VMemOperand<VR128,  "printi32mem", X86MemVX32Operand>;
 def vy32mem  : X86VMemOperand<VR256,  "printi32mem", X86MemVY32Operand>;
 def vx64mem  : X86VMemOperand<VR128,  "printi64mem", X86MemVX64Operand>;
 def vy64mem  : X86VMemOperand<VR256,  "printi64mem", X86MemVY64Operand>;
-def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64Operand>;
+
+def vx32xmem : X86VMemOperand<VR128X, "printi32mem", X86MemVX32XOperand>;
+def vx64xmem : X86VMemOperand<VR128X, "printi32mem", X86MemVX64XOperand>;
+def vy32xmem : X86VMemOperand<VR256X, "printi32mem", X86MemVY32XOperand>;
+def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64XOperand>;
 def vz32mem  : X86VMemOperand<VR512,  "printi32mem", X86MemVZ32Operand>;
 def vz64mem  : X86VMemOperand<VR512,  "printi64mem", X86MemVZ64Operand>;
 
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 9562918..2a896df 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -7860,10 +7860,11 @@ def VBROADCASTSDYrr  : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
                                       int_x86_avx2_vbroadcast_sd_pd_256,
                                       WriteFShuffle256>, VEX_L;
 
-let Predicates = [HasAVX2] in
-def VBROADCASTI128 : avx_broadcast_no_int<0x5A, "vbroadcasti128", VR256,
-                                          i128mem, v4i64, loadv2i64,
-                                          WriteLoad>, VEX_L;
+let mayLoad = 1, Predicates = [HasAVX2] in
+def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
+                           (ins i128mem:$src),
+                           "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
+                           Sched<[WriteLoad]>, VEX, VEX_L;
 
 let Predicates = [HasAVX] in
 def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 2b82930..61a3348 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -21,8 +21,9 @@ enum IntrinsicType {
   GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
   INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
   CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
-  INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK,
-  INTR_TYPE_3OP_MASK, FMA_OP_MASK,
+  INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
+  INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
+  VPERM_3OP_MASKZ,
   INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
   EXPAND_FROM_MEM, BLEND
 };
@@ -55,6 +56,22 @@ static const IntrinsicData IntrinsicsWithChain[] = {
   X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
   X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
 
+  X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
+  X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
   X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
   X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
   X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
@@ -129,15 +146,30 @@ static const IntrinsicData IntrinsicsWithChain[] = {
   X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
-
-  X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH,
-                     X86::VSCATTERPF0DPDm, X86::VSCATTERPF1DPDm),
-  X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH,
-                     X86::VSCATTERPF0DPSm, X86::VSCATTERPF1DPSm),
-  X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH,
-                     X86::VSCATTERPF0QPDm, X86::VSCATTERPF1QPDm),
-  X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH,
-                     X86::VSCATTERPF0QPSm, X86::VSCATTERPF1QPSm),
+  X86_INTRINSIC_DATA(avx512_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, X86::VSCATTERPF0DPDm,
+                     X86::VSCATTERPF1DPDm),
+  X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, X86::VSCATTERPF0DPSm,
+                     X86::VSCATTERPF1DPSm),
+  X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH, X86::VSCATTERPF0QPDm,
+                     X86::VSCATTERPF1QPDm),
+  X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, X86::VSCATTERPF0QPSm,
+                     X86::VSCATTERPF1QPSm),
+  X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
+  X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),
 
   X86_INTRINSIC_DATA(rdpmc,     RDPMC,  X86ISD::RDPMC_DAG, 0),
   X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
@@ -251,6 +283,52 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
   X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
+
+  X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD,
+                     X86ISD::FMADD_RND),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, X86ISD::FMADD,
+                     X86ISD::FMADD_RND),
+
+  X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
+                     X86ISD::FMADDSUB_RND),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_ps_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
+                     X86ISD::FMADDSUB_RND),
+
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_128, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsub_pd_512, FMA_OP_MASK3, X86ISD::FMSUB,
+                     X86ISD::FMSUB_RND),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_128, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_512, FMA_OP_MASK3, X86ISD::FMSUB,
+                     X86ISD::FMSUB_RND),
+
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_512, FMA_OP_MASK3, X86ISD::FMSUBADD,
+                     X86ISD::FMSUBADD_RND),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_ps_512, FMA_OP_MASK3, X86ISD::FMSUBADD,
+                     X86ISD::FMSUBADD_RND),
+
+  X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_pd_512, FMA_OP_MASK3, X86ISD::FNMSUB,
+                     X86ISD::FNMSUB_RND),
+  X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_128, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_512, FMA_OP_MASK3, X86ISD::FNMSUB,
+                     X86ISD::FNMSUB_RND),
+
   X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
   X86_INTRINSIC_DATA(avx512_mask_add_pd_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
   X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
@@ -382,9 +460,9 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::FGETEXP_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK_RM,
                      X86ISD::FGETEXP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM, 
+  X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM,
                      X86ISD::FGETEXP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM, 
+  X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
                      X86ISD::FGETEXP_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
@@ -393,7 +471,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_max_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
-                     X86ISD::FMAX_RND),  
+                     X86ISD::FMAX_RND),
   X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
   X86ISD::FMAX_RND),
   X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
@@ -405,7 +483,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_min_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_min_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
-                     X86ISD::FMIN_RND),  
+                     X86ISD::FMIN_RND),
   X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
   X86ISD::FMIN_RND),
   X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
@@ -428,6 +506,18 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_or_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_or_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_or_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
   X86_INTRINSIC_DATA(avx512_mask_packssdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx512_mask_packssdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx512_mask_packssdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
@@ -581,6 +671,12 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
   X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
   X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::PSHUFB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK,
+                    X86ISD::PSHUFB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
+                    X86ISD::PSHUFB, 0),
   X86_INTRINSIC_DATA(avx512_mask_psll_d,        INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psll_q,        INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
   X86_INTRINSIC_DATA(avx512_mask_pslli_d,       VSHIFT_MASK, X86ISD::VSHLI, 0),
@@ -633,6 +729,18 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::RNDSCALE, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_ss,   INTR_TYPE_SCALAR_MASK_RM,
                      X86ISD::RNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::SCALEF, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::SCALEF, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_pd_512, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::SCALEF, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_ps_128, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::SCALEF, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::SCALEF, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::SCALEF, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
@@ -667,12 +775,181 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
   X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
   X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
+
+  X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
+                     X86ISD::FMADD_RND),
+  X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD,
+                     X86ISD::FMADD_RND),
+
+  X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
+                     X86ISD::FMADDSUB_RND),
+  X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
+                     X86ISD::FMADDSUB_RND),
+
+  X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfnmadd_pd_512, FMA_OP_MASK, X86ISD::FNMADD,
+                     X86ISD::FNMADD_RND),
+  X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_128, FMA_OP_MASK, X86ISD::FNMADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_256, FMA_OP_MASK, X86ISD::FNMADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfnmadd_ps_512, FMA_OP_MASK, X86ISD::FNMADD,
+                     X86ISD::FNMADD_RND),
+
+  X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfnmsub_pd_512, FMA_OP_MASK, X86ISD::FNMSUB,
+                     X86ISD::FNMSUB_RND),
+  X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_128, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_256, FMA_OP_MASK, X86ISD::FNMSUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
+                     X86ISD::FNMSUB_RND),
+
+
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
+                    X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_512, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_512, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_512, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_512, VPERM_3OP_MASK,
+                    X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
+
+  X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, X86ISD::FMADD,
+                     X86ISD::FMADD_RND),
+  X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, X86ISD::FMADD,
+                     X86ISD::FMADD_RND),
+
+  X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
+                     X86ISD::FMADDSUB_RND),
+  X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
+                     X86ISD::FMADDSUB_RND),
+
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_512, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_128, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_256, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_512, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_128, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_256, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_512, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_128, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_256, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_512, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_128, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_256, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
+                     X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_pd,   INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_ps,   INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_sd,   INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
@@ -696,54 +973,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx_vperm2f128_pd_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
   X86_INTRINSIC_DATA(avx_vperm2f128_ps_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
   X86_INTRINSIC_DATA(avx_vperm2f128_si_256, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_128,    FMA_OP_MASK, X86ISD::FMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_256,    FMA_OP_MASK, X86ISD::FMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmadd_pd_512,    FMA_OP_MASK, X86ISD::FMADD,
-                     X86ISD::FMADD_RND),
-  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_128,    FMA_OP_MASK, X86ISD::FMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_256,    FMA_OP_MASK, X86ISD::FMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmadd_ps_512,    FMA_OP_MASK, X86ISD::FMADD,
-                     X86ISD::FMADD_RND),
-  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
-                     X86ISD::FMADDSUB_RND),
-  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmaddsub_ps_512, FMA_OP_MASK, X86ISD::FMADDSUB,
-                     X86ISD::FMADDSUB_RND),
-  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_128,    FMA_OP_MASK, X86ISD::FMSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_256,    FMA_OP_MASK, X86ISD::FMSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmsub_pd_512,    FMA_OP_MASK, X86ISD::FMSUB,
-                     X86ISD::FMSUB_RND),
-  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_128,    FMA_OP_MASK, X86ISD::FMSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_256,    FMA_OP_MASK, X86ISD::FMSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmsub_ps_512,    FMA_OP_MASK, X86ISD::FMSUB,
-                     X86ISD::FMSUB_RND),
-  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_pd_512, FMA_OP_MASK, X86ISD::FMSUBADD,
-                     X86ISD::FMSUBADD_RND),
-  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_128, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_256, FMA_OP_MASK, X86ISD::FMSUBADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfmsubadd_ps_512, FMA_OP_MASK, X86ISD::FMSUBADD,
-                     X86ISD::FMSUBADD_RND),
-  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_128,   FMA_OP_MASK, X86ISD::FNMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_256,   FMA_OP_MASK, X86ISD::FNMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmadd_pd_512,   FMA_OP_MASK, X86ISD::FNMADD,
-                     X86ISD::FNMADD_RND),
-  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_128,   FMA_OP_MASK, X86ISD::FNMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_256,   FMA_OP_MASK, X86ISD::FNMADD, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmadd_ps_512,   FMA_OP_MASK, X86ISD::FNMADD,
-                     X86ISD::FNMADD_RND),
-  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_128,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_256,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmsub_pd_512,   FMA_OP_MASK, X86ISD::FNMSUB,
-                     X86ISD::FNMSUB_RND),
-  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_128,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_256,   FMA_OP_MASK, X86ISD::FNMSUB, 0),
-  X86_INTRINSIC_DATA(fma_mask_vfnmsub_ps_512,   FMA_OP_MASK, X86ISD::FNMSUB,
-                     X86ISD::FNMSUB_RND),
   X86_INTRINSIC_DATA(fma_vfmadd_pd,        INTR_TYPE_3OP, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(fma_vfmadd_pd_256,    INTR_TYPE_3OP, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(fma_vfmadd_ps,        INTR_TYPE_3OP, X86ISD::FMADD, 0),
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 64135e0..3415ced 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -112,7 +112,7 @@ namespace llvm {
     OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
     SMShadowTracker.count(Inst, getSubtargetInfo());
   }
-} // namespace llvm
+} // end llvm namespace
 
 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
                                X86AsmPrinter &asmprinter)
@@ -159,10 +159,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
     const GlobalValue *GV = MO.getGlobal();
     AsmPrinter.getNameWithPrefix(Name, GV);
   } else if (MO.isSymbol()) {
-    if (MO.getTargetFlags() == X86II::MO_NOPREFIX)
-      Name += MO.getSymbolName();
-    else
-      getMang()->getNameWithPrefix(Name, MO.getSymbolName());
+    Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL);
   } else if (MO.isMBB()) {
     assert(Suffix.empty());
     Sym = MO.getMBB()->getSymbol();
@@ -241,7 +238,6 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   case X86II::MO_DARWIN_NONLAZY:
   case X86II::MO_DLLIMPORT:
   case X86II::MO_DARWIN_STUB:
-  case X86II::MO_NOPREFIX:
     break;
 
   case X86II::MO_TLVP:      RefKind = MCSymbolRefExpr::VK_TLVP; break;
@@ -423,6 +419,8 @@ X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
   case MachineOperand::MO_GlobalAddress:
   case MachineOperand::MO_ExternalSymbol:
     return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
+  case MachineOperand::MO_MCSymbol:
+    return LowerSymbolOperand(MO, MO.getMCSymbol());
   case MachineOperand::MO_JumpTableIndex:
     return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
   case MachineOperand::MO_ConstantPoolIndex:
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index 342d26a..d598b55 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -179,6 +179,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
index 33aa78f..143e70b 100644
--- a/lib/Target/X86/X86PadShortFunction.cpp
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -84,7 +84,7 @@ namespace {
   };
 
   char PadShortFunc::ID = 0;
-} // namespace
+}
 
 FunctionPass *llvm::createX86PadShortFunctions() {
   return new PadShortFunc();
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 00e2134..0033b50 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -598,10 +598,10 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
 }
 
 namespace llvm {
-unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
-                                bool High) {
+unsigned getX86SubSuperRegisterOrZero(unsigned Reg, MVT::SimpleValueType VT,
+                                      bool High) {
   switch (VT) {
-  default: llvm_unreachable("Unexpected VT");
+  default: return 0;
   case MVT::i8:
     if (High) {
       switch (Reg) {
@@ -625,7 +625,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
       }
     } else {
       switch (Reg) {
-      default: llvm_unreachable("Unexpected register");
+      default: return 0;
       case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
         return X86::AL;
       case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -662,7 +662,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
     }
   case MVT::i16:
     switch (Reg) {
-    default: llvm_unreachable("Unexpected register");
+    default: return 0;
     case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
       return X86::AX;
     case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -698,7 +698,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
     }
   case MVT::i32:
     switch (Reg) {
-    default: llvm_unreachable("Unexpected register");
+    default: return 0;
     case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
       return X86::EAX;
     case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -734,7 +734,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
     }
   case MVT::i64:
     switch (Reg) {
-    default: llvm_unreachable("Unexpected register");
+    default: return 0;
     case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
       return X86::RAX;
     case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -771,6 +771,14 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
   }
 }
 
+unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
+                                bool High) {
+  unsigned Res = getX86SubSuperRegisterOrZero(Reg, VT, High);
+  if (Res == 0)
+    llvm_unreachable("Unexpected register or VT");
+  return Res;
+}
+
 unsigned get512BitSuperRegister(unsigned Reg) {
   if (Reg >= X86::XMM0 && Reg <= X86::XMM31)
     return X86::ZMM0 + (Reg - X86::XMM0);
@@ -781,4 +789,4 @@ unsigned get512BitSuperRegister(unsigned Reg) {
   llvm_unreachable("Unexpected SIMD register");
 }
 
-} // namespace llvm
+}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 459ecf7..8de1d0b 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -128,14 +128,19 @@ public:
   unsigned getSlotSize() const { return SlotSize; }
 };
 
-// getX86SubSuperRegister - X86 utility function. It returns the sub or super
-// register of a specific X86 register.
-// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX
+/// Returns the sub or super register of a specific X86 register.
+/// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) returns X86::AX.
+/// Aborts on error.
 unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false);
 
+/// Returns the sub or super register of a specific X86 register.
+/// Like getX86SubSuperRegister() but returns 0 on error.
+unsigned getX86SubSuperRegisterOrZero(unsigned, MVT::SimpleValueType,
+                                      bool High = false);
+
 //get512BitRegister - X86 utility - returns 512-bit super register
 unsigned get512BitSuperRegister(unsigned Reg);
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index 25606d3..eb7e0ed 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -48,6 +48,6 @@ public:
                                   MachinePointerInfo SrcPtrInfo) const override;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 6934061..d420abb 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -490,6 +490,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 3d6eb4f7..fb9cb4b 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -110,12 +110,15 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
   if (Subtarget.isTargetWin64())
     this->Options.TrapUnreachable = true;
 
-  // TODO: By default, all reciprocal estimate operations are off because
-  // that matches the behavior before TargetRecip was added (except for btver2
-  // which used subtarget features to enable this type of codegen).
-  // We should change this to match GCC behavior where everything but
-  // scalar division estimates are turned on by default with -ffast-math.
-  this->Options.Reciprocals.setDefaults("all", false, 1);
+  // By default (and when -ffast-math is on), enable estimate codegen for
+  // everything except scalar division. By default, use 1 refinement step for
+  // all operations. Defaults may be overridden by using command-line options.
+  // Scalar division estimates are disabled because they break too much
+  // real-world code. These defaults match GCC behavior.
+  this->Options.Reciprocals.setDefaults("sqrtf", true, 1);
+  this->Options.Reciprocals.setDefaults("divf", false, 1);
+  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, 1);
+  this->Options.Reciprocals.setDefaults("vec-divf", true, 1);
 
   initAsmInfo();
 }
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index be56888..2629556 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -44,6 +44,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index f9f6290..6f900ea 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -131,52 +131,44 @@ static std::string APIntToHexString(const APInt &AI) {
   return HexString;
 }
 
-
 static std::string scalarConstantToHexString(const Constant *C) {
   Type *Ty = C->getType();
-  APInt AI;
   if (isa<UndefValue>(C)) {
-    AI = APInt(Ty->getPrimitiveSizeInBits(), /*val=*/0);
-  } else if (Ty->isFloatTy() || Ty->isDoubleTy()) {
-    const auto *CFP = cast<ConstantFP>(C);
-    AI = CFP->getValueAPF().bitcastToAPInt();
-  } else if (Ty->isIntegerTy()) {
-    const auto *CI = cast<ConstantInt>(C);
-    AI = CI->getValue();
+    return APIntToHexString(APInt::getNullValue(Ty->getPrimitiveSizeInBits()));
+  } else if (const auto *CFP = dyn_cast<ConstantFP>(C)) {
+    return APIntToHexString(CFP->getValueAPF().bitcastToAPInt());
+  } else if (const auto *CI = dyn_cast<ConstantInt>(C)) {
+    return APIntToHexString(CI->getValue());
   } else {
-    llvm_unreachable("unexpected constant pool element type!");
+    unsigned NumElements;
+    if (isa<VectorType>(Ty))
+      NumElements = Ty->getVectorNumElements();
+    else
+      NumElements = Ty->getArrayNumElements();
+    std::string HexString;
+    for (int I = NumElements - 1, E = -1; I != E; --I)
+      HexString += scalarConstantToHexString(C->getAggregateElement(I));
+    return HexString;
   }
-  return APIntToHexString(AI);
 }
 
 MCSection *
 X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind,
                                                   const Constant *C) const {
-  if (Kind.isReadOnly()) {
-    if (C) {
-      Type *Ty = C->getType();
-      SmallString<32> COMDATSymName;
-      if (Ty->isFloatTy() || Ty->isDoubleTy()) {
-        COMDATSymName = "__real@";
-        COMDATSymName += scalarConstantToHexString(C);
-      } else if (const auto *VTy = dyn_cast<VectorType>(Ty)) {
-        uint64_t NumBits = VTy->getBitWidth();
-        if (NumBits == 128 || NumBits == 256) {
-          COMDATSymName = NumBits == 128 ? "__xmm@" : "__ymm@";
-          for (int I = VTy->getNumElements() - 1, E = -1; I != E; --I)
-            COMDATSymName +=
-                scalarConstantToHexString(C->getAggregateElement(I));
-        }
-      }
-      if (!COMDATSymName.empty()) {
-        unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                                   COFF::IMAGE_SCN_MEM_READ |
-                                   COFF::IMAGE_SCN_LNK_COMDAT;
-        return getContext().getCOFFSection(".rdata", Characteristics, Kind,
-                                           COMDATSymName,
-                                           COFF::IMAGE_COMDAT_SELECT_ANY);
-      }
-    }
+  if (Kind.isMergeableConst() && C) {
+    const unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                     COFF::IMAGE_SCN_MEM_READ |
+                                     COFF::IMAGE_SCN_LNK_COMDAT;
+    std::string COMDATSymName;
+    if (Kind.isMergeableConst4() || Kind.isMergeableConst8())
+      COMDATSymName = "__real@" + scalarConstantToHexString(C);
+    else if (Kind.isMergeableConst16())
+      COMDATSymName = "__xmm@" + scalarConstantToHexString(C);
+
+    if (!COMDATSymName.empty())
+      return getContext().getCOFFSection(".rdata", Characteristics, Kind,
+                                         COMDATSymName,
+                                         COFF::IMAGE_COMDAT_SELECT_ANY);
   }
 
   return TargetLoweringObjectFile::getSectionForConstant(Kind, C);
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 13384fa..0c82a70 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1130,3 +1130,18 @@ bool X86TTIImpl::isLegalMaskedStore(Type *DataType, int Consecutive) {
   return isLegalMaskedLoad(DataType, Consecutive);
 }
 
+bool X86TTIImpl::hasCompatibleFunctionAttributes(const Function *Caller,
+                                                 const Function *Callee) const {
+  const TargetMachine &TM = getTLI()->getTargetMachine();
+
+  // Work this as a subsetting of subtarget features.
+  const FeatureBitset &CallerBits =
+      TM.getSubtargetImpl(*Caller)->getFeatureBits();
+  const FeatureBitset &CalleeBits =
+      TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+  // FIXME: This is likely too limiting as it will include subtarget features
+  // that we might not care about for inlining, but it is conservatively
+  // correct.
+  return (CallerBits & CalleeBits) == CalleeBits;
+}
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index e570bb5..a831584 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -103,6 +103,8 @@ public:
                          Type *Ty);
   bool isLegalMaskedLoad(Type *DataType, int Consecutive);
   bool isLegalMaskedStore(Type *DataType, int Consecutive);
+  bool hasCompatibleFunctionAttributes(const Function *Caller,
+                                       const Function *Callee) const;
 
   /// @}
 };
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 71ce45b..6925b27 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -86,7 +86,7 @@ namespace {
   };
 
   char VZeroUpperInserter::ID = 0;
-} // namespace
+}
 
 FunctionPass *llvm::createX86IssueVZeroUpperPass() {
   return new VZeroUpperInserter();
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index c9e8094..9035725 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -105,7 +105,7 @@ private:
   /// The linked list node subobject inside of RegNode.
   Value *Link = nullptr;
 };
-} // namespace
+}
 
 FunctionPass *llvm::createX86WinEHStatePass() { return new WinEHStatePass(); }
 
@@ -398,6 +398,7 @@ void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) {
 
   // Set up RegNodeEscapeIndex
   int RegNodeEscapeIndex = escapeRegNode(F);
+  FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
 
   // Only insert stores in catch handlers.
   Constant *FI8 =
@@ -480,8 +481,8 @@ void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
   WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F);
 
   // Remember and return the index that we used. We save it in WinEHFuncInfo so
-  // that we can lower llvm.x86.seh.exceptioninfo later in filter functions
-  // without too much trouble.
+  // that we can lower llvm.x86.seh.recoverfp later in filter functions without
+  // too much trouble.
   int RegNodeEscapeIndex = escapeRegNode(F);
   FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
 
@@ -528,14 +529,12 @@ void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
     }
   }
 
-  // Insert llvm.stackrestore into each __except block.
-  Function *StackRestore =
-      Intrinsic::getDeclaration(TheModule, Intrinsic::stackrestore);
+  // Insert llvm.x86.seh.restoreframe() into each __except block.
+  Function *RestoreFrame =
+      Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_restoreframe);
   for (BasicBlock *ExceptBB : ExceptBlocks) {
     IRBuilder<> Builder(ExceptBB->begin());
-    Value *SP =
-        Builder.CreateLoad(Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
-    Builder.CreateCall(StackRestore, {SP});
+    Builder.CreateCall(RestoreFrame, {});
   }
 }
 
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index e1baeac..2e44ac9 100644
--- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -40,7 +40,7 @@ public:
                               raw_ostream &VStream,
                               raw_ostream &CStream) const override;
 };
-} // namespace
+}
 
 static bool readInstruction16(ArrayRef<uint8_t> Bytes, uint64_t Address,
                               uint64_t &Size, uint16_t &Insn) {
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 8699ce8..ac954d0 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -123,7 +123,7 @@ void XCoreTargetAsmStreamer::emitCCBottomData(StringRef Name) {
 void XCoreTargetAsmStreamer::emitCCBottomFunction(StringRef Name) {
   OS << "\t.cc_bottom " << Name << ".function\n";
 }
-} // namespace
+}
 
 static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
                                                  formatted_raw_ostream &OS,
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index eb8b5ec..ba6ca84 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -32,6 +32,6 @@ namespace llvm {
                                    CodeGenOpt::Level OptLevel);
   ModulePass *createXCoreLowerThreadLocalPass();
 
-} // namespace llvm
+} // end namespace llvm;
 
 #endif
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index 116e89a..607c772 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -58,6 +58,6 @@ namespace llvm {
       return 4;
     }
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
index 8d96105..77292c4 100644
--- a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
+++ b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
@@ -34,7 +34,7 @@ namespace {
     }
   };
   char XCoreFTAOElim::ID = 0;
-} // namespace
+}
 
 /// createXCoreFrameToArgsOffsetEliminationPass - returns an instance of the
 /// Frame to args offset elimination pass
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 9c49a8d..97f0494 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -85,7 +85,7 @@ namespace llvm {
       // Memory barrier.
       MEMBARRIER
     };
-  } // namespace XCoreISD
+  }
 
   //===--------------------------------------------------------------------===//
   // TargetLowering Implementation
@@ -215,6 +215,6 @@ namespace llvm {
                      const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
                      LLVMContext &Context) const override;
   };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index a6e974e..ee30344 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -41,7 +41,7 @@ namespace XCore {
     COND_INVALID
   };
 }
-} // namespace llvm
+}
 
 // Pin the vtable to this file.
 void XCoreInstrInfo::anchor() {}
@@ -196,15 +196,10 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                               SmallVectorImpl<MachineOperand> &Cond,
                               bool AllowModify) const {
   // If the block has no terminators, it just falls into the block after it.
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
     return false;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return false;
-    --I;
-  }
+
   if (!isUnpredicatedTerminator(I))
     return false;
 
@@ -312,14 +307,10 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
 
 unsigned
 XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin()) return 0;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return 0;
-    --I;
-  }
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
+    return 0;
+
   if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
     return 0;
   
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 70beb41..b958c36 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -88,6 +88,6 @@ public:
                                             unsigned Reg, uint64_t Value) const;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index f866ab0..996c6f5 100644
--- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -50,7 +50,7 @@ namespace {
 
     bool runOnModule(Module &M) override;
   };
-} // namespace
+}
 
 char XCoreLowerThreadLocal::ID = 0;
 
diff --git a/lib/Target/XCore/XCoreMCInstLower.h b/lib/Target/XCore/XCoreMCInstLower.h
index 74a7f20..5691478 100644
--- a/lib/Target/XCore/XCoreMCInstLower.h
+++ b/lib/Target/XCore/XCoreMCInstLower.h
@@ -37,6 +37,6 @@ private:
   MCOperand LowerSymbolOperand(const MachineOperand &MO,
                                MachineOperandType MOTy, unsigned Offset) const;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index 8cce75f..078ffde 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -101,6 +101,6 @@ public:
     return SpillLabels;
   }
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index 6224843..cfd80b3 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -35,6 +35,6 @@ public:
                           MachinePointerInfo SrcPtrInfo) const override;
 };
 
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index 74ee594..f01fb67 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -61,6 +61,6 @@ public:
     return &InstrInfo.getRegisterInfo();
   }
 };
-} // namespace llvm
+} // End llvm namespace
 
 #endif
diff --git a/lib/Target/XCore/XCoreTargetStreamer.h b/lib/Target/XCore/XCoreTargetStreamer.h
index a82702f..3563dbc 100644
--- a/lib/Target/XCore/XCoreTargetStreamer.h
+++ b/lib/Target/XCore/XCoreTargetStreamer.h
@@ -22,6 +22,6 @@ public:
   virtual void emitCCBottomData(StringRef Name) = 0;
   virtual void emitCCBottomFunction(StringRef Name) = 0;
 };
-} // namespace llvm
+}
 
 #endif
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index f90aafc..29b9bb8 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -35,7 +35,7 @@ namespace {
       return false;
     }
   };
-} // namespace
+}
 
 char Hello::ID = 0;
 static RegisterPass<Hello> X("hello", "Hello World Pass");
@@ -58,7 +58,7 @@ namespace {
       AU.setPreservesAll();
     }
   };
-} // namespace
+}
 
 char Hello2::ID = 0;
 static RegisterPass<Hello2>
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 86b3faa..f754363 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -92,7 +92,7 @@ namespace {
     unsigned maxElements;
     DenseMap<const Function *, DISubprogram *> FunctionDIs;
   };
-} // namespace
+}
 
 char ArgPromotion::ID = 0;
 INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
diff --git a/lib/Transforms/IPO/BarrierNoopPass.cpp b/lib/Transforms/IPO/BarrierNoopPass.cpp
index 7585fdc..6af1043 100644
--- a/lib/Transforms/IPO/BarrierNoopPass.cpp
+++ b/lib/Transforms/IPO/BarrierNoopPass.cpp
@@ -38,7 +38,7 @@ public:
 
   bool runOnModule(Module &M) override { return false; }
 };
-} // namespace
+}
 
 ModulePass *llvm::createBarrierNoopPass() { return new BarrierNoop(); }
 
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 3b68743..8ce7646 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -53,7 +53,7 @@ namespace {
     unsigned getAlignment(GlobalVariable *GV) const;
 
   };
-} // namespace
+}
 
 char ConstantMerge::ID = 0;
 INITIALIZE_PASS(ConstantMerge, "constmerge",
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 6bfd3d1..76898f2 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -159,7 +159,7 @@ namespace {
     bool DeleteDeadVarargs(Function &Fn);
     bool RemoveDeadArgumentsFromCallers(Function &Fn);
   };
-} // namespace
+}
 
 
 char DAE::ID = 0;
@@ -175,7 +175,7 @@ namespace {
 
     bool ShouldHackArguments() const override { return true; }
   };
-} // namespace
+}
 
 char DAH::ID = 0;
 INITIALIZE_PASS(DAH, "deadarghaX0r", 
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 7e0dddc..2f8c7d9 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -146,7 +146,7 @@ namespace {
   };
 
   char GVExtractorPass::ID = 0;
-} // namespace
+}
 
 ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue *> &GVs,
                                          bool deleteFn) {
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 749ff99..bb5e64a 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -132,7 +132,7 @@ namespace {
     AliasAnalysis *AA;
     TargetLibraryInfo *TLI;
   };
-} // namespace
+}
 
 char FunctionAttrs::ID = 0;
 INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
@@ -379,7 +379,7 @@ namespace {
 
     const SmallPtrSet<Function*, 8> &SCCNodes;
   };
-} // namespace
+}
 
 namespace llvm {
   template<> struct GraphTraits<ArgumentGraphNode*> {
@@ -406,7 +406,7 @@ namespace llvm {
       return AG->end();
     }
   };
-} // namespace llvm
+}
 
 // Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
 static Attribute::AttrKind
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 7983104..61d0ff9 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -57,7 +57,7 @@ namespace {
 
     bool RemoveUnusedGlobalValue(GlobalValue &GV);
   };
-} // namespace
+}
 
 /// Returns true if F contains only a single "ret" instruction.
 static bool isEmptyFunction(Function *F) {
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 0d83c82..5ffe15d 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -89,7 +89,7 @@ namespace {
     TargetLibraryInfo *TLI;
     SmallSet<const Comdat *, 8> NotDiscardableComdats;
   };
-} // namespace
+}
 
 char GlobalOpt::ID = 0;
 INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
@@ -1992,11 +1992,9 @@ isSimpleEnoughValueToCommitHelper(Constant *C,
   // Aggregate values are safe if all their elements are.
   if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
       isa<ConstantVector>(C)) {
-    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
-      Constant *Op = cast<Constant>(C->getOperand(i));
-      if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, DL))
+    for (Value *Op : C->operands())
+      if (!isSimpleEnoughValueToCommit(cast<Constant>(Op), SimpleConstants, DL))
         return false;
-    }
     return true;
   }
 
@@ -2786,7 +2784,7 @@ public:
       setUsedInitializer(*CompilerUsedV, CompilerUsed);
   }
 };
-} // namespace
+}
 
 static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
   if (GA.use_empty()) // No use at all.
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index d717b25..af541d1 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -45,7 +45,7 @@ namespace {
     bool PropagateConstantsIntoArguments(Function &F);
     bool PropagateConstantReturn(Function &F);
   };
-} // namespace
+}
 
 char IPCP::ID = 0;
 INITIALIZE_PASS(IPCP, "ipconstprop",
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 37ff091..dc56a02 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -62,7 +62,7 @@ public:
   }
 };
 
-} // namespace
+}
 
 char AlwaysInliner::ID = 0;
 INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 93cdba6..5273c3d 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -199,8 +199,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
     // set to keep track of which "available" allocas are being used by this
     // function.  Also, AllocasForType can be empty of course!
     bool MergedAwayAlloca = false;
-    for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
-      AllocaInst *AvailableAlloca = AllocasForType[i];
+    for (AllocaInst *AvailableAlloca : AllocasForType) {
 
       unsigned Align1 = AI->getAlignment(),
                Align2 = AvailableAlloca->getAlignment();
@@ -482,7 +481,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
   // If there are no calls in this function, exit early.
   if (CallSites.empty())
     return false;
-  
+
   // Now that we have all of the call sites, move the ones to functions in the
   // current SCC to the end of the list.
   unsigned FirstCallInSCC = CallSites.size();
@@ -592,7 +591,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
         DEBUG(dbgs() << "    -> Deleting dead function: "
               << Callee->getName() << "\n");
         CallGraphNode *CalleeNode = CG[Callee];
-        
+
         // Remove any call graph edges from the callee to its callees.
         CalleeNode->removeAllCalledFunctions();
         
@@ -648,8 +647,8 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
 
   // Scan for all of the functions, looking for ones that should now be removed
   // from the program.  Insert the dead ones in the FunctionsToRemove set.
-  for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
-    CallGraphNode *CGN = I->second;
+  for (auto I : CG) {
+    CallGraphNode *CGN = I.second;
     Function *F = CGN->getFunction();
     if (!F || F->isDeclaration())
       continue;
@@ -724,10 +723,8 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
   FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(),
                                       FunctionsToRemove.end()),
                           FunctionsToRemove.end());
-  for (SmallVectorImpl<CallGraphNode *>::iterator I = FunctionsToRemove.begin(),
-                                                  E = FunctionsToRemove.end();
-       I != E; ++I) {
-    delete CG.removeFunctionFromModule(*I);
+  for (CallGraphNode *CGN : FunctionsToRemove) {
+    delete CG.removeFunctionFromModule(CGN);
     ++NumDeleted;
   }
   return true;
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index ada4a76..41334ca 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -51,7 +51,7 @@ namespace {
       AU.addRequired<DominatorTreeWrapperPass>();
     }
   };
-} // namespace
+}
 
 char LoopExtractor::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
@@ -183,7 +183,7 @@ namespace {
 
     bool runOnModule(Module &M) override;
   };
-} // namespace
+}
 
 char BlockExtractorPass::ID = 0;
 INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp
index bffeebb..c6795c6 100644
--- a/lib/Transforms/IPO/LowerBitSets.cpp
+++ b/lib/Transforms/IPO/LowerBitSets.cpp
@@ -271,8 +271,10 @@ BitSetInfo LowerBitSets::buildBitSet(
     for (MDNode *Op : BitSetNM->operands()) {
       if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
         continue;
-      auto OpGlobal = cast<GlobalVariable>(
+      auto OpGlobal = dyn_cast<GlobalVariable>(
           cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+      if (!OpGlobal)
+        continue;
       uint64_t Offset =
           cast<ConstantInt>(cast<ConstantAsMetadata>(Op->getOperand(2))
                                 ->getValue())->getZExtValue();
@@ -621,7 +623,7 @@ bool LowerBitSets::buildBitSets() {
         report_fatal_error("Bit set element must be a constant");
       auto OpGlobal = dyn_cast<GlobalVariable>(OpConstMD->getValue());
       if (!OpGlobal)
-        report_fatal_error("Bit set element must refer to global");
+        continue;
 
       auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
       if (!OffsetConstMD)
@@ -675,8 +677,10 @@ bool LowerBitSets::buildBitSets() {
         if (I == BitSetIndices.end())
           continue;
 
-        auto OpGlobal = cast<GlobalVariable>(
+        auto OpGlobal = dyn_cast<GlobalVariable>(
             cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+        if (!OpGlobal)
+          continue;
         BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
       }
     }
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 5e41798..2e3519e 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -409,7 +409,7 @@ public:
     return (FunctionComparator(F, RHS.getFunc()).compare()) == -1;
   }
 };
-} // namespace
+}
 
 int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
   if (L < R) return -1;
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 7a7065c..4a7cb7b 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -40,7 +40,7 @@ namespace {
   private:
     Function* unswitchFunction(Function* F);
   };
-} // namespace
+}
 
 char PartialInliner::ID = 0;
 INITIALIZE_PASS(PartialInliner, "partial-inliner",
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index a5ba9ee..b2f1010 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -25,6 +25,7 @@
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
@@ -49,7 +50,7 @@ namespace {
     bool SimplifyFunction(Function *F);
     void DeleteBasicBlock(BasicBlock *BB);
   };
-} // namespace
+}
 
 char PruneEH::ID = 0;
 INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
@@ -97,42 +98,54 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
     } else {
       bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow();
       bool CheckReturn = !SCCMightReturn && !F->doesNotReturn();
+      // Determine if we should scan for InlineAsm in a naked function as it
+      // is the only way to return without a ReturnInst.  Only do this for
+      // no-inline functions as functions which may be inlined cannot
+      // meaningfully return via assembly.
+      bool CheckReturnViaAsm = CheckReturn &&
+                               F->hasFnAttribute(Attribute::Naked) &&
+                               F->hasFnAttribute(Attribute::NoInline);
 
       if (!CheckUnwind && !CheckReturn)
         continue;
 
-      // Check to see if this function performs an unwind or calls an
-      // unwinding function.
-      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-        if (CheckUnwind && isa<ResumeInst>(BB->getTerminator())) {
-          // Uses unwind / resume!
+      for (const BasicBlock &BB : *F) {
+        const TerminatorInst *TI = BB.getTerminator();
+        if (CheckUnwind && TI->mayThrow()) {
           SCCMightUnwind = true;
-        } else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) {
+        } else if (CheckReturn && isa<ReturnInst>(TI)) {
           SCCMightReturn = true;
         }
 
-        // Invoke instructions don't allow unwinding to continue, so we are
-        // only interested in call instructions.
-        if (CheckUnwind && !SCCMightUnwind)
-          for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-            if (CallInst *CI = dyn_cast<CallInst>(I)) {
-              if (CI->doesNotThrow()) {
-                // This call cannot throw.
-              } else if (Function *Callee = CI->getCalledFunction()) {
+        for (const Instruction &I : BB) {
+          if ((!CheckUnwind || SCCMightUnwind) &&
+              (!CheckReturnViaAsm || SCCMightReturn))
+            break;
+
+          // Check to see if this function performs an unwind or calls an
+          // unwinding function.
+          if (CheckUnwind && !SCCMightUnwind && I.mayThrow()) {
+            bool InstMightUnwind = true;
+            if (const auto *CI = dyn_cast<CallInst>(&I)) {
+              if (Function *Callee = CI->getCalledFunction()) {
                 CallGraphNode *CalleeNode = CG[Callee];
-                // If the callee is outside our current SCC then we may
-                // throw because it might.
-                if (!SCCNodes.count(CalleeNode)) {
-                  SCCMightUnwind = true;
-                  break;
-                }
-              } else {
-                // Indirect call, it might throw.
-                SCCMightUnwind = true;
-                break;
+                // If the callee is outside our current SCC then we may throw
+                // because it might.  If it is inside, do nothing.
+                if (SCCNodes.count(CalleeNode) > 0)
+                  InstMightUnwind = false;
               }
             }
-        if (SCCMightUnwind && SCCMightReturn) break;
+            SCCMightUnwind |= InstMightUnwind;
+          }
+          if (CheckReturnViaAsm && !SCCMightReturn)
+            if (auto ICS = ImmutableCallSite(&I))
+              if (const auto *IA = dyn_cast<InlineAsm>(ICS.getCalledValue()))
+                if (IA->hasSideEffects())
+                  SCCMightReturn = true;
+        }
+
+        if (SCCMightUnwind && SCCMightReturn)
+          break;
       }
     }
   }
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 6f9af1d..a4f30c5 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -95,7 +95,7 @@ namespace {
       AU.setPreservesAll();
     }
   };
-} // namespace
+}
 
 char StripSymbols::ID = 0;
 INITIALIZE_PASS(StripSymbols, "strip",
@@ -142,9 +142,9 @@ static bool OnlyUsedBy(Value *V, Value *Usr) {
 static void RemoveDeadConstant(Constant *C) {
   assert(C->use_empty() && "Constant is not dead!");
   SmallPtrSet<Constant*, 4> Operands;
-  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
-    if (OnlyUsedBy(C->getOperand(i), C))
-      Operands.insert(cast<Constant>(C->getOperand(i)));
+  for (Value *Op : C->operands())
+    if (OnlyUsedBy(Op, C))
+      Operands.insert(cast<Constant>(Op));
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
     if (!GV->hasLocalLinkage()) return;   // Don't delete non-static globals.
     GV->eraseFromParent();
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 29ecc1d..2d2c109f 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -193,7 +193,7 @@ namespace {
       void incCreateInstNum() {}
     #endif
   };
-} // namespace
+}
 
 //===----------------------------------------------------------------------===//
 //
@@ -1611,6 +1611,32 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       return BinaryOperator::CreateAnd(A, B);
   }
 
+  // (sub (select (a, c, b)), (select (a, d, b))) -> (select (a, (sub c, d), 0))
+  // (sub (select (a, b, c)), (select (a, b, d))) -> (select (a, 0, (sub c, d)))
+  if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
+    if (auto *SI1 = dyn_cast<SelectInst>(Op1)) {
+      if (SI0->getCondition() == SI1->getCondition()) {
+        if (Value *V = SimplifySubInst(
+                SI0->getFalseValue(), SI1->getFalseValue(), I.hasNoSignedWrap(),
+                I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
+          return SelectInst::Create(
+              SI0->getCondition(),
+              Builder->CreateSub(SI0->getTrueValue(), SI1->getTrueValue(), "",
+                                 /*HasNUW=*/I.hasNoUnsignedWrap(),
+                                 /*HasNSW=*/I.hasNoSignedWrap()),
+              V);
+        if (Value *V = SimplifySubInst(SI0->getTrueValue(), SI1->getTrueValue(),
+                                       I.hasNoSignedWrap(),
+                                       I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
+          return SelectInst::Create(
+              SI0->getCondition(), V,
+              Builder->CreateSub(SI0->getFalseValue(), SI1->getFalseValue(), "",
+                                 /*HasNUW=*/I.hasNoUnsignedWrap(),
+                                 /*HasNSW=*/I.hasNoSignedWrap()));
+      }
+    }
+  }
+
   if (Op0->hasOneUse()) {
     Value *Y = nullptr;
     // ((X | Y) - X) --> (~X & Y)
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index f53eeef..010b7b5 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2646,7 +2646,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     Changed = true;
   }
 
-  if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC))
+  if (Value *V =
+          SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC, &I))
     return ReplaceInstUsesWith(I, V);
 
   // comparing -val or val with non-zero is the same as just comparing val
@@ -3927,7 +3928,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC))
+  if (Value *V =
+          SimplifyFCmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC, &I))
     return ReplaceInstUsesWith(I, V);
 
   // Simplify 'fcmp pred X, X'
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 6b384b4..a554e9f 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -948,7 +948,7 @@ struct UDivFoldAction {
   UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand, size_t SLHS)
       : FoldAction(FA), OperandToFold(InputOperand), SelectLHSIdx(SLHS) {}
 };
-} // namespace
+}
 
 // X udiv 2^C -> X >> C
 static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1,
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index a93ffbe..460f6eb 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -19,9 +19,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "instcombine"
 
-/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
-/// and if a/b/c and the add's all have a single use, turn this into a phi
-/// and a single binop.
+/// If we have something like phi [add (a,b), add(a,c)] and if a/b/c and the
+/// adds all have a single use, turn this into a phi and a single binop.
 Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
   assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst));
@@ -238,10 +237,9 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
 }
 
 
-/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to
-/// sink the load out of the block that defines it.  This means that it must be
-/// obvious the value of the load is not changed from the point of the load to
-/// the end of the block it is in.
+/// Return true if we know that it is safe to sink the load out of the block
+/// that defines it. This means that it must be obvious the value of the load is
+/// not changed from the point of the load to the end of the block it is in.
 ///
 /// Finally, it is safe, but not profitable, to sink a load targeting a
 /// non-address-taken alloca.  Doing so will cause us to not promote the alloca
@@ -385,9 +383,9 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
 
 
-/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
-/// operator and they all are only used by the PHI, PHI together their
-/// inputs, and do the operation once, to the result of the PHI.
+/// If all operands to a PHI node are the same "unary" operator and they all are
+/// only used by the PHI, PHI together their inputs, and do the operation once,
+/// to the result of the PHI.
 Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
 
@@ -503,8 +501,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   return NewCI;
 }
 
-/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
-/// that is dead.
+/// Return true if this PHI node is only used by a PHI node cycle that is dead.
 static bool DeadPHICycle(PHINode *PN,
                          SmallPtrSetImpl<PHINode*> &PotentiallyDeadPHIs) {
   if (PN->use_empty()) return true;
@@ -524,8 +521,8 @@ static bool DeadPHICycle(PHINode *PN,
   return false;
 }
 
-/// PHIsEqualValue - Return true if this phi node is always equal to
-/// NonPhiInVal.  This happens with mutually cyclic phi nodes like:
+/// Return true if this phi node is always equal to NonPhiInVal.
+/// This happens with mutually cyclic phi nodes like:
 ///   z = some value; x = phi (y, z); y = phi (x, z)
 static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
                            SmallPtrSetImpl<PHINode*> &ValueEqualPHIs) {
@@ -582,7 +579,7 @@ struct LoweredPHIRecord {
   LoweredPHIRecord(PHINode *pn, unsigned Sh)
     : PN(pn), Shift(Sh), Width(0) {}
 };
-} // namespace
+}
 
 namespace llvm {
   template<>
@@ -603,13 +600,13 @@ namespace llvm {
              LHS.Width == RHS.Width;
     }
   };
-} // namespace llvm
+}
 
 
-/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
-/// illegal type: see if it is only used by trunc or trunc(lshr) operations.  If
-/// so, we split the PHI into the various pieces being extracted.  This sort of
-/// thing is introduced when SROA promotes an aggregate to large integer values.
+/// This is an integer PHI and we know that it has an illegal type: see if it is
+/// only used by trunc or trunc(lshr) operations. If so, we split the PHI into
+/// the various pieces being extracted. This sort of thing is introduced when
+/// SROA promotes an aggregate to large integer values.
 ///
 /// TODO: The user of the trunc may be an bitcast to float/double/vector or an
 /// inttoptr.  We should produce new PHIs in the right type.
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 53950ae..2a81689 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2125,7 +2125,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
 
   // Truncate the condition operand if the new type is equal to or larger than
   // the largest legal integer type. We need to be conservative here since
-  // x86 generates redundant zero-extenstion instructions if the operand is
+  // x86 generates redundant zero-extension instructions if the operand is
   // truncated to i8 or i16.
   bool TruncCond = false;
   if (NewWidth > 0 && BitWidth > NewWidth &&
@@ -3046,7 +3046,7 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   bool runOnFunction(Function &F) override;
 };
-} // namespace
+}
 
 void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 2dd2fe6..e7ef9f9 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1144,6 +1144,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
 
     // Globals from llvm.metadata aren't emitted, do not instrument them.
     if (Section == "llvm.metadata") return false;
+    // Do not instrument globals from special LLVM sections.
+    if (Section.find("__llvm") != StringRef::npos) return false;
 
     // Callbacks put into the CRT initializer/terminator sections
     // should not be instrumented.
@@ -1672,12 +1674,6 @@ void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined(
   }
 }
 
-static DebugLoc getFunctionEntryDebugLocation(Function &F) {
-  for (const auto &Inst : F.getEntryBlock())
-    if (!isa<AllocaInst>(Inst)) return Inst.getDebugLoc();
-  return DebugLoc();
-}
-
 PHINode *FunctionStackPoisoner::createPHI(IRBuilder<> &IRB, Value *Cond,
                                           Value *ValueIfTrue,
                                           Instruction *ThenTerm,
@@ -1730,7 +1726,9 @@ void FunctionStackPoisoner::poisonStack() {
   if (AllocaVec.size() == 0) return;
 
   int StackMallocIdx = -1;
-  DebugLoc EntryDebugLocation = getFunctionEntryDebugLocation(F);
+  DebugLoc EntryDebugLocation;
+  if (auto SP = getDISubprogram(&F))
+    EntryDebugLocation = DebugLoc::get(SP->getScopeLine(), 0, SP);
 
   Instruction *InsBefore = AllocaVec[0];
   IRBuilder<> IRB(InsBefore);
@@ -1753,11 +1751,10 @@ void FunctionStackPoisoner::poisonStack() {
   uint64_t LocalStackSize = L.FrameSize;
   bool DoStackMalloc = ClUseAfterReturn && !ASan.CompileKernel &&
                        LocalStackSize <= kMaxStackMallocSize;
-  // Don't do dynamic alloca in presence of inline asm: too often it makes
-  // assumptions on which registers are available. Don't do stack malloc in the
-  // presence of inline asm on 32-bit platforms for the same reason.
+  // Don't do dynamic alloca or stack malloc in presence of inline asm:
+  // too often it makes assumptions on which registers are available.
   bool DoDynamicAlloca = ClDynamicAllocaStack && !HasNonEmptyInlineAsm;
-  DoStackMalloc &= !HasNonEmptyInlineAsm || ASan.LongSize != 32;
+  DoStackMalloc &= !HasNonEmptyInlineAsm;
 
   Value *StaticAlloca =
       DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false);
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index a887425..f685803 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -63,7 +63,7 @@ namespace {
     void emitBranchToTrap(Value *Cmp = nullptr);
     bool instrument(Value *Ptr, Value *Val, const DataLayout &DL);
  };
-} // namespace
+}
 
 char BoundsChecking::ID = 0;
 INITIALIZE_PASS(BoundsChecking, "bounds-checking", "Run-time bounds checking",
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 4309157..2de6e1a 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -346,7 +346,7 @@ class DFSanVisitor : public InstVisitor<DFSanVisitor> {
   void visitMemTransferInst(MemTransferInst &I);
 };
 
-} // namespace
+}
 
 char DataFlowSanitizer::ID;
 INITIALIZE_PASS(DataFlowSanitizer, "dfsan",
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 43caf1f..9a3ed5c 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -139,7 +139,7 @@ namespace {
     LLVMContext *Ctx;
     SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
   };
-} // namespace
+}
 
 char GCOVProfiler::ID = 0;
 INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
@@ -419,7 +419,7 @@ namespace {
     DenseMap<BasicBlock *, GCOVBlock> Blocks;
     GCOVBlock ReturnBlock;
   };
-} // namespace
+}
 
 std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
                                      const char *NewStem) {
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 05a9c8a..712bf8e 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -362,7 +362,7 @@ void InstrProfiling::emitInitialization() {
         Function::Create(SetNameTy, GlobalValue::ExternalLinkage,
                          "__llvm_profile_override_default_filename", M);
 
-    // Create variable for profile name
+    // Create variable for profile name.
     Constant *ProfileNameConst =
         ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
     GlobalVariable *ProfileName =
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 63eee2f..286a563 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -236,6 +236,14 @@ static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
   0x002000000000,  // OriginBase
 };
 
+// ppc64 Linux
+static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
+  0x200000000000,  // AndMask
+  0x100000000000,  // XorMask
+  0x080000000000,  // ShadowBase
+  0x1C0000000000,  // OriginBase
+};
+
 // i386 FreeBSD
 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
   0x000180000000,  // AndMask
@@ -262,6 +270,11 @@ static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
   &Linux_MIPS64_MemoryMapParams,
 };
 
+static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
+  NULL,
+  &Linux_PowerPC64_MemoryMapParams,
+};
+
 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
   &FreeBSD_I386_MemoryMapParams,
   &FreeBSD_X86_64_MemoryMapParams,
@@ -479,6 +492,10 @@ bool MemorySanitizer::doInitialization(Module &M) {
         case Triple::mips64el:
           MapParams = Linux_MIPS_MemoryMapParams.bits64;
           break;
+        case Triple::ppc64:
+        case Triple::ppc64le:
+          MapParams = Linux_PowerPC_MemoryMapParams.bits64;
+          break;
         default:
           report_fatal_error("unsupported architecture");
       }
diff --git a/lib/Transforms/Instrumentation/SafeStack.cpp b/lib/Transforms/Instrumentation/SafeStack.cpp
index 13c5412..6b185a2 100644
--- a/lib/Transforms/Instrumentation/SafeStack.cpp
+++ b/lib/Transforms/Instrumentation/SafeStack.cpp
@@ -165,7 +165,7 @@ class SafeStack : public FunctionPass {
   Type *Int32Ty;
   Type *Int8Ty;
 
-  Constant *UnsafeStackPtr;
+  Constant *UnsafeStackPtr = nullptr;
 
   /// Unsafe stack alignment. Each stack frame must ensure that the stack is
   /// aligned to this value. We need to re-align the unsafe stack if the
@@ -232,8 +232,6 @@ public:
     Int32Ty = Type::getInt32Ty(M.getContext());
     Int8Ty = Type::getInt8Ty(M.getContext());
 
-    UnsafeStackPtr = getOrCreateUnsafeStackPtr(M);
-
     return false;
   }
 
@@ -576,6 +574,9 @@ bool SafeStack::runOnFunction(Function &F) {
   if (!StackRestorePoints.empty())
     ++NumUnsafeStackRestorePointsFunctions;
 
+  if (!UnsafeStackPtr)
+    UnsafeStackPtr = getOrCreateUnsafeStackPtr(*F.getParent());
+
   // The top of the unsafe stack after all unsafe static allocas are allocated.
   Value *StaticTop = moveStaticAllocasToUnsafeStack(F, StaticAllocas, Returns);
 
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index dff39ef..7a5b4cb 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -375,6 +375,13 @@ void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) {
 
 void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
                                                     bool UseCalls) {
+  // Don't insert coverage for unreachable blocks: we will never call
+  // __sanitizer_cov() for them, so counting them in
+  // NumberOfInstrumentedBlocks() might complicate calculation of code coverage
+  // percentage. Also, unreachable instructions frequently have no debug
+  // locations.
+  if (isa<UnreachableInst>(BB.getTerminator()))
+    return;
   BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
   // Skip static allocas at the top of the entry block so they don't become
   // dynamic when we split the block.  If we used our optimized stack layout,
diff --git a/lib/Transforms/ObjCARC/BlotMapVector.h b/lib/Transforms/ObjCARC/BlotMapVector.h
index f9fde26..d6439b6 100644
--- a/lib/Transforms/ObjCARC/BlotMapVector.h
+++ b/lib/Transforms/ObjCARC/BlotMapVector.h
@@ -105,4 +105,4 @@ public:
     return Map.empty();
   }
 };
-} // namespace llvm
+} //
diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index c7c77ec..d318643 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -50,7 +50,7 @@ namespace {
       initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry());
     }
   };
-} // namespace
+}
 
 char ObjCARCAPElim::ID = 0;
 INITIALIZE_PASS(ObjCARCAPElim,
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
index 94b092c..3893aab 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
@@ -57,9 +57,8 @@ ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AliasAnalysis::getAnalysisUsage(AU);
 }
 
-AliasAnalysis::AliasResult
-ObjCARCAliasAnalysis::alias(const MemoryLocation &LocA,
-                            const MemoryLocation &LocB) {
+AliasResult ObjCARCAliasAnalysis::alias(const MemoryLocation &LocA,
+                                        const MemoryLocation &LocB) {
   if (!EnableARCOpts)
     return AliasAnalysis::alias(LocA, LocB);
 
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 080dbc0..baca76b 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -101,7 +101,7 @@ namespace {
       initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
     }
   };
-} // namespace
+}
 
 //===----------------------------------------------------------------------===//
 //                               Implementation
diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index 4f2f7da..53c19c3 100644
--- a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -63,7 +63,7 @@ namespace {
       initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
     }
   };
-} // namespace
+}
 
 char ObjCARCExpand::ID = 0;
 INITIALIZE_PASS(ObjCARCExpand,
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index cdbbfac..9edbb17 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -313,7 +313,7 @@ namespace {
   };
 
   const unsigned BBState::OverflowOccurredValue = 0xffffffff;
-} // namespace
+}
 
 namespace llvm {
 raw_ostream &operator<<(raw_ostream &OS,
@@ -551,7 +551,7 @@ namespace {
       initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
     }
   };
-} // namespace
+}
 
 char ObjCARCOpt::ID = 0;
 INITIALIZE_PASS_BEGIN(ObjCARCOpt,
@@ -1846,7 +1846,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
         Value *Arg = Call->getArgOperand(0);
         Value *EarlierArg = EarlierCall->getArgOperand(0);
         switch (PA.getAA()->alias(Arg, EarlierArg)) {
-        case AliasAnalysis::MustAlias:
+        case MustAlias:
           Changed = true;
           // If the load has a builtin retain, insert a plain retain for it.
           if (Class == ARCInstKind::LoadWeakRetained) {
@@ -1858,10 +1858,10 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
           Call->replaceAllUsesWith(EarlierCall);
           Call->eraseFromParent();
           goto clobbered;
-        case AliasAnalysis::MayAlias:
-        case AliasAnalysis::PartialAlias:
+        case MayAlias:
+        case PartialAlias:
           goto clobbered;
-        case AliasAnalysis::NoAlias:
+        case NoAlias:
           break;
         }
         break;
@@ -1875,7 +1875,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
         Value *Arg = Call->getArgOperand(0);
         Value *EarlierArg = EarlierCall->getArgOperand(0);
         switch (PA.getAA()->alias(Arg, EarlierArg)) {
-        case AliasAnalysis::MustAlias:
+        case MustAlias:
           Changed = true;
           // If the load has a builtin retain, insert a plain retain for it.
           if (Class == ARCInstKind::LoadWeakRetained) {
@@ -1887,10 +1887,10 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
           Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
           Call->eraseFromParent();
           goto clobbered;
-        case AliasAnalysis::MayAlias:
-        case AliasAnalysis::PartialAlias:
+        case MayAlias:
+        case PartialAlias:
           goto clobbered;
-        case AliasAnalysis::NoAlias:
+        case NoAlias:
           break;
         }
         break;
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
index 8346345..9ffdfb4 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -116,12 +116,12 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B,
 
   // Ask regular AliasAnalysis, for a first approximation.
   switch (AA->alias(A, B)) {
-  case AliasAnalysis::NoAlias:
+  case NoAlias:
     return false;
-  case AliasAnalysis::MustAlias:
-  case AliasAnalysis::PartialAlias:
+  case MustAlias:
+  case PartialAlias:
     return true;
-  case AliasAnalysis::MayAlias:
+  case MayAlias:
     break;
   }
 
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index fe0224b..d6fc916 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -44,7 +44,7 @@ struct ADCE : public FunctionPass {
     AU.setPreservesCFG();
   }
 };
-} // namespace
+}
 
 char ADCE::ID = 0;
 INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index a4e5446..8918909 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -76,7 +76,7 @@ struct AlignmentFromAssumptions : public FunctionPass {
                             const SCEV *&OffSCEV);
   bool processAssumption(CallInst *I);
 };
-} // namespace
+}
 
 char AlignmentFromAssumptions::ID = 0;
 static const char aip_name[] = "Alignment from assumptions";
diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp
index 8ffbacd..09c605e 100644
--- a/lib/Transforms/Scalar/BDCE.cpp
+++ b/lib/Transforms/Scalar/BDCE.cpp
@@ -66,7 +66,7 @@ struct BDCE : public FunctionPass {
   AssumptionCache *AC;
   DominatorTree *DT;
 };
-} // namespace
+}
 
 char BDCE::ID = 0;
 INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index cc1dc94..4288742 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -171,7 +171,7 @@ private:
   void deleteDeadCastInst() const;
   bool optimizeConstants(Function &Fn);
 };
-} // namespace
+}
 
 char ConstantHoisting::ID = 0;
 INITIALIZE_PASS_BEGIN(ConstantHoisting, "consthoist", "Constant Hoisting",
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index e3df86e..c974ebb 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -47,7 +47,7 @@ namespace {
       AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
   };
-} // namespace
+}
 
 char ConstantPropagation::ID = 0;
 INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index b1809b7..79624b2 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -56,7 +56,7 @@ namespace {
       AU.addRequired<LazyValueInfo>();
     }
   };
-} // namespace
+}
 
 char CorrelatedValuePropagation::ID = 0;
 INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index aa628e5..3b262a2 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -60,7 +60,7 @@ namespace {
       AU.setPreservesCFG();
     }
   };
-} // namespace
+}
 
 char DeadInstElimination::ID = 0;
 INITIALIZE_PASS(DeadInstElimination, "die",
@@ -87,7 +87,7 @@ namespace {
       AU.setPreservesCFG();
     }
  };
-} // namespace
+}
 
 char DCE::ID = 0;
 INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c99dc5f..c505584 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -92,7 +92,7 @@ namespace {
       AU.addPreserved<MemoryDependenceAnalysis>();
     }
   };
-} // namespace
+}
 
 char DSE::ID = 0;
 INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 8b629ea..d536a93 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -72,7 +72,7 @@ struct SimpleValue {
            isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
   }
 };
-} // namespace
+}
 
 namespace llvm {
 template <> struct DenseMapInfo<SimpleValue> {
@@ -85,7 +85,7 @@ template <> struct DenseMapInfo<SimpleValue> {
   static unsigned getHashValue(SimpleValue Val);
   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
 };
-} // namespace llvm
+}
 
 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
   Instruction *Inst = Val.Inst;
@@ -219,7 +219,7 @@ struct CallValue {
     return true;
   }
 };
-} // namespace
+}
 
 namespace llvm {
 template <> struct DenseMapInfo<CallValue> {
@@ -232,7 +232,7 @@ template <> struct DenseMapInfo<CallValue> {
   static unsigned getHashValue(CallValue Val);
   static bool isEqual(CallValue LHS, CallValue RHS);
 };
-} // namespace llvm
+}
 
 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
   Instruction *Inst = Val.Inst;
@@ -447,7 +447,7 @@ private:
                                                  ExpectedType);
   }
 };
-} // namespace
+}
 
 bool EarlyCSE::processNode(DomTreeNode *Node) {
   BasicBlock *BB = Node->getBlock();
@@ -764,7 +764,7 @@ public:
     AU.setPreservesCFG();
   }
 };
-} // namespace
+}
 
 char EarlyCSELegacyPass::ID = 0;
 
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index dd6ea8d..0430c18 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -36,7 +36,7 @@ public:
 private:
   AliasAnalysis *AA;
 };
-} // namespace
+}
 
 char FlattenCFGPass::ID = 0;
 INITIALIZE_PASS_BEGIN(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp
index bb90c5f..c931422 100644
--- a/lib/Transforms/Scalar/Float2Int.cpp
+++ b/lib/Transforms/Scalar/Float2Int.cpp
@@ -79,7 +79,7 @@ namespace {
     MapVector<Instruction*, Value*> ConvertedInsts;
     LLVMContext *Ctx;
   };
-} // namespace
+}
 
 char Float2Int::ID = 0;
 INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index d9308c4..60903c8 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -138,7 +138,7 @@ namespace {
     uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
     void verifyRemoved(const Value *) const;
   };
-} // namespace
+}
 
 namespace llvm {
 template <> struct DenseMapInfo<Expression> {
@@ -159,7 +159,7 @@ template <> struct DenseMapInfo<Expression> {
   }
 };
 
-} // namespace llvm
+}
 
 //===----------------------------------------------------------------------===//
 //                     ValueTable Internal Functions
@@ -723,7 +723,7 @@ namespace {
   };
 
   char GVN::ID = 0;
-} // namespace
+}
 
 // The public interface to this file...
 FunctionPass *llvm::createGVNPass(bool NoLoads) {
@@ -1783,13 +1783,9 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
   // being replaced.
   BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
   BinaryOperator *ReplOp = dyn_cast<BinaryOperator>(Repl);
-  if (Op && ReplOp && isa<OverflowingBinaryOperator>(Op) &&
-      isa<OverflowingBinaryOperator>(ReplOp)) {
-    if (ReplOp->hasNoSignedWrap() && !Op->hasNoSignedWrap())
-      ReplOp->setHasNoSignedWrap(false);
-    if (ReplOp->hasNoUnsignedWrap() && !Op->hasNoUnsignedWrap())
-      ReplOp->setHasNoUnsignedWrap(false);
-  }
+  if (Op && ReplOp)
+    ReplOp->andIRFlags(Op);
+
   if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) {
     // FIXME: If both the original and replacement value are part of the
     // same control-flow region (meaning that the execution of one
@@ -2808,6 +2804,10 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
   if (!BI || BI->isUnconditional())
     return false;
 
+  // If a branch has two identical successors, we cannot declare either dead.
+  if (BI->getSuccessor(0) == BI->getSuccessor(1))
+    return false;
+
   ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
   if (!Cond)
     return false;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index e931382..6f03754 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -136,7 +136,7 @@ namespace {
 
     void SinkUnusedInvariants(Loop *L);
   };
-} // namespace
+}
 
 char IndVarSimplify::ID = 0;
 INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
@@ -494,7 +494,7 @@ struct RewritePhi {
   RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S)
       : PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {}
 };
-} // namespace
+}
 
 //===----------------------------------------------------------------------===//
 // RewriteLoopExitValues - Optimize IV users outside the loop.
@@ -758,7 +758,7 @@ namespace {
     WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
                    IsSigned(false) {}
   };
-} // namespace
+}
 
 /// visitCast - Update information about the induction variable that is
 /// extended by this sign or zero extend operation. This is used to determine
@@ -1321,7 +1321,7 @@ namespace {
     // Implement the interface used by simplifyUsersOfIV.
     void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
   };
-} // namespace
+}
 
 /// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
 /// users. Each successive simplification may push more users which may
@@ -2013,11 +2013,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Now that we're done iterating through lists, clean up any instructions
   // which are now dead.
-  while (!DeadInsts.empty()) {
-    Value *V = static_cast<Value *>(DeadInsts.pop_back_val());
-    if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
+  while (!DeadInsts.empty())
+    if (Instruction *Inst =
+            dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
       RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
-  }
 
   // The Rewriter may not be used from this point on.
 
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index ce1a0ca..cbdacad 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -222,7 +222,7 @@ public:
 };
 
 char InductiveRangeCheckElimination::ID = 0;
-} // namespace
+}
 
 INITIALIZE_PASS(InductiveRangeCheckElimination, "irce",
                 "Inductive range check elimination", false, false)
@@ -618,7 +618,7 @@ public:
   bool run();
 };
 
-} // namespace
+}
 
 void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
                                       BasicBlock *ReplaceBy) {
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 7316db6..1130d22 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -138,7 +138,7 @@ namespace {
     bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
     bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
   };
-} // namespace
+}
 
 char JumpThreading::ID = 0;
 INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index e501946..f0e6d64 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -156,7 +156,7 @@ namespace {
     /// Simple Analysis hook. Delete loop L from alias set map.
     void deleteAnalysisLoop(Loop *L) override;
   };
-} // namespace
+}
 
 char LICM::ID = 0;
 INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
@@ -777,7 +777,7 @@ namespace {
       AST.deleteValue(I);
     }
   };
-} // namespace
+} // end anon namespace
 
 /// Try to promote memory values to scalars by sinking stores out of the
 /// loop and moving loads to before the loop.  We do this by looping over
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 3dbf6ac..c19cd19 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -77,7 +77,7 @@ private:
   bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &);
   bool combineLoads(SmallVectorImpl<LoadPOPPair> &);
 };
-} // namespace
+}
 
 bool LoadCombine::doInitialization(Function &F) {
   DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 02760ff..98b068e 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -57,7 +57,7 @@ namespace {
                     bool &Changed, BasicBlock *Preheader);
 
   };
-} // namespace
+}
 
 char LoopDeletion::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp
index d21a7db..0325d26 100644
--- a/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -635,10 +635,11 @@ public:
   LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
                  DominatorTree *DT,
                  const SmallVector<int, 8> *PtrToPartition = nullptr)
-      : OrigLoop(L), NonDistributedLoop(nullptr),
+      : VersionedLoop(L), NonVersionedLoop(nullptr),
         PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) {}
 
-  /// \brief Returns true if we need memchecks to distribute the loop.
+  /// \brief Returns true if we need memchecks to disambiguate may-aliasing
+  /// accesses.
   bool needsRuntimeChecks() const {
     return LAI.getRuntimePointerCheck()->needsAnyChecking(PtrToPartition);
   }
@@ -649,49 +650,51 @@ public:
     Instruction *FirstCheckInst;
     Instruction *MemRuntimeCheck;
     // Add the memcheck in the original preheader (this is empty initially).
-    BasicBlock *MemCheckBB = OrigLoop->getLoopPreheader();
+    BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader();
     std::tie(FirstCheckInst, MemRuntimeCheck) =
         LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition);
     assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
 
     // Rename the block to make the IR more readable.
-    MemCheckBB->setName(OrigLoop->getHeader()->getName() + ".ldist.memcheck");
+    MemCheckBB->setName(VersionedLoop->getHeader()->getName() +
+                        ".lver.memcheck");
 
     // Create empty preheader for the loop (and after cloning for the
-    // original/nondist loop).
+    // non-versioned loop).
     BasicBlock *PH =
         SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI);
-    PH->setName(OrigLoop->getHeader()->getName() + ".ph");
+    PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
 
     // Clone the loop including the preheader.
     //
     // FIXME: This does not currently preserve SimplifyLoop because the exit
     // block is a join between the two loops.
-    SmallVector<BasicBlock *, 8> NonDistributedLoopBlocks;
-    NonDistributedLoop =
-        cloneLoopWithPreheader(PH, MemCheckBB, OrigLoop, VMap, ".ldist.nondist",
-                               LI, DT, NonDistributedLoopBlocks);
-    remapInstructionsInLoop(NonDistributedLoopBlocks, VMap);
+    SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks;
+    NonVersionedLoop =
+        cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap,
+                               ".lver.orig", LI, DT, NonVersionedLoopBlocks);
+    remapInstructionsInLoop(NonVersionedLoopBlocks, VMap);
 
     // Insert the conditional branch based on the result of the memchecks.
     Instruction *OrigTerm = MemCheckBB->getTerminator();
-    BranchInst::Create(NonDistributedLoop->getLoopPreheader(),
-                       OrigLoop->getLoopPreheader(), MemRuntimeCheck, OrigTerm);
+    BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
+                       VersionedLoop->getLoopPreheader(), MemRuntimeCheck,
+                       OrigTerm);
     OrigTerm->eraseFromParent();
 
     // The loops merge in the original exit block.  This is now dominated by the
     // memchecking block.
-    DT->changeImmediateDominator(OrigLoop->getExitBlock(), MemCheckBB);
+    DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB);
   }
 
   /// \brief Adds the necessary PHI nodes for the versioned loops based on the
   /// loop-defined values used outside of the loop.
   void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
-    BasicBlock *PHIBlock = OrigLoop->getExitBlock();
+    BasicBlock *PHIBlock = VersionedLoop->getExitBlock();
     assert(PHIBlock && "No single successor to loop exit block");
 
     for (auto *Inst : DefsUsedOutside) {
-      auto *NonDistInst = cast<Instruction>(VMap[Inst]);
+      auto *NonVersionedLoopInst = cast<Instruction>(VMap[Inst]);
       PHINode *PN;
 
       // First see if we have a single-operand PHI with the value defined by the
@@ -704,24 +707,25 @@ public:
       }
       // If not create it.
       if (!PN) {
-        PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".ldist",
+        PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
                              PHIBlock->begin());
         for (auto *User : Inst->users())
-          if (!OrigLoop->contains(cast<Instruction>(User)->getParent()))
+          if (!VersionedLoop->contains(cast<Instruction>(User)->getParent()))
             User->replaceUsesOfWith(Inst, PN);
-        PN->addIncoming(Inst, OrigLoop->getExitingBlock());
+        PN->addIncoming(Inst, VersionedLoop->getExitingBlock());
       }
-      // Add the new incoming value from the non-distributed loop.
-      PN->addIncoming(NonDistInst, NonDistributedLoop->getExitingBlock());
+      // Add the new incoming value from the non-versioned loop.
+      PN->addIncoming(NonVersionedLoopInst,
+                      NonVersionedLoop->getExitingBlock());
     }
   }
 
 private:
   /// \brief The original loop.  This becomes the "versioned" one, i.e. control
   /// goes if the memchecks all pass.
-  Loop *OrigLoop;
+  Loop *VersionedLoop;
   /// \brief The fall-back loop, i.e. if any of the memchecks fail.
-  Loop *NonDistributedLoop;
+  Loop *NonVersionedLoop;
 
   /// \brief For each memory pointer it contains the partitionId it is used in.
   /// If nullptr, no partitioning is used.
@@ -730,8 +734,8 @@ private:
   /// If the pointer is used in multiple partitions the entry is set to -1.
   const SmallVector<int, 8> *PtrToPartition;
 
-  /// \brief This maps the instructions from OrigLoop to their counterpart in
-  /// NonDistributedLoop.
+  /// \brief This maps the instructions from VersionedLoop to their counterpart
+  /// in NonVersionedLoop.
   ValueToValueMapTy VMap;
 
   /// \brief Analyses used.
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3de1333..714ce91 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -209,7 +209,7 @@ namespace {
     bool runOnNoncountableLoop();
     bool runOnCountableLoop();
   };
-} // namespace
+}
 
 char LoopIdiomRecognize::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 4c40f24..e125026 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -52,7 +52,7 @@ namespace {
       AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
   };
-} // namespace
+}
 
 char LoopInstSimplify::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index f6db9b1..ed103e6 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -438,7 +438,7 @@ namespace {
     bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
                 ReductionTracker &Reductions);
   };
-} // namespace
+}
 
 char LoopReroll::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 2ba70ad..a675e12 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -79,7 +79,7 @@ namespace {
     AssumptionCache *AC;
     DominatorTree *DT;
   };
-} // namespace
+}
 
 char LoopRotate::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ee72486..4b59f3d 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -116,7 +116,7 @@ public:
   void dump() const;
 };
 
-} // namespace
+}
 
 void RegSortData::print(raw_ostream &OS) const {
   OS << "[NumUses=" << UsedByIndices.count() << ']';
@@ -157,7 +157,7 @@ public:
   const_iterator end() const   { return RegSequence.end(); }
 };
 
-} // namespace
+}
 
 void
 RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
@@ -281,7 +281,7 @@ struct Formula {
   void dump() const;
 };
 
-} // namespace
+}
 
 /// DoInitialMatch - Recursion helper for InitialMatch.
 static void DoInitialMatch(const SCEV *S, Loop *L,
@@ -903,7 +903,7 @@ private:
                            SmallPtrSetImpl<const SCEV *> *LoserRegs);
 };
 
-} // namespace
+}
 
 /// RateRegister - Tally up interesting quantities from the given register.
 void Cost::RateRegister(const SCEV *Reg,
@@ -1102,7 +1102,7 @@ struct LSRFixup {
   void dump() const;
 };
 
-} // namespace
+}
 
 LSRFixup::LSRFixup()
   : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
@@ -1252,7 +1252,7 @@ public:
   void dump() const;
 };
 
-} // namespace
+}
 
 /// HasFormula - Test whether this use as a formula which has the same
 /// registers as the given formula.
@@ -1791,7 +1791,7 @@ public:
   void dump() const;
 };
 
-} // namespace
+}
 
 /// OptimizeShadowIV - If IV is used in a int-to-float cast
 /// inside the loop then try to eliminate the cast operation.
@@ -3644,7 +3644,7 @@ struct WorkItem {
   void dump() const;
 };
 
-} // namespace
+}
 
 void WorkItem::print(raw_ostream &OS) const {
   OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
@@ -4949,7 +4949,7 @@ private:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
-} // namespace
+}
 
 char LoopStrengthReduce::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d702dc0..9e7558d 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -229,7 +229,7 @@ namespace {
                              unsigned DynamicCostSavingsDiscount,
                              uint64_t UnrolledCost, uint64_t RolledDynamicCost);
   };
-} // namespace
+}
 
 char LoopUnroll::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 5bdc2ec..cbc563b 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -43,6 +43,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -80,6 +81,7 @@ namespace {
 
     struct LoopProperties {
       unsigned CanBeUnswitchedCount;
+      unsigned WasUnswitchedCount;
       unsigned SizeEstimation;
       UnswitchedValsMap UnswitchedVals;
     };
@@ -93,37 +95,52 @@ namespace {
     UnswitchedValsMap *CurLoopInstructions;
     LoopProperties *CurrentLoopProperties;
 
-    // Max size of code we can produce on remained iterations.
+    // A loop unswitching with an estimated cost above this threshold
+    // is not performed. MaxSize is turned into unswitching quota for
+    // the current loop, and reduced correspondingly, though note that
+    // the quota is returned by releaseMemory() when the loop has been
+    // processed, so that MaxSize will return to its previous
+    // value. So in most cases MaxSize will equal the Threshold flag
+    // when a new loop is processed. An exception to that is that
+    // MaxSize will have a smaller value while processing nested loops
+    // that were introduced due to loop unswitching of an outer loop.
+    //
+    // FIXME: The way that MaxSize works is subtle and depends on the
+    // pass manager processing loops and calling releaseMemory() in a
+    // specific order. It would be good to find a more straightforward
+    // way of doing what MaxSize does.
     unsigned MaxSize;
 
-    public:
-
-      LUAnalysisCache() :
-        CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr),
-        MaxSize(Threshold)
-      {}
-
-      // Analyze loop. Check its size, calculate is it possible to unswitch
-      // it. Returns true if we can unswitch this loop.
-      bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
-                     AssumptionCache *AC);
-
-      // Clean all data related to given loop.
-      void forgetLoop(const Loop *L);
-
-      // Mark case value as unswitched.
-      // Since SI instruction can be partly unswitched, in order to avoid
-      // extra unswitching in cloned loops keep track all unswitched values.
-      void setUnswitched(const SwitchInst *SI, const Value *V);
-
-      // Check was this case value unswitched before or not.
-      bool isUnswitched(const SwitchInst *SI, const Value *V);
-
-      // Clone all loop-unswitch related loop properties.
-      // Redistribute unswitching quotas.
-      // Note, that new loop data is stored inside the VMap.
-      void cloneData(const Loop *NewLoop, const Loop *OldLoop,
-                     const ValueToValueMapTy &VMap);
+  public:
+    LUAnalysisCache()
+        : CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr),
+          MaxSize(Threshold) {}
+
+    // Analyze loop. Check its size, calculate is it possible to unswitch
+    // it. Returns true if we can unswitch this loop.
+    bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
+                   AssumptionCache *AC);
+
+    // Clean all data related to given loop.
+    void forgetLoop(const Loop *L);
+
+    // Mark case value as unswitched.
+    // Since SI instruction can be partly unswitched, in order to avoid
+    // extra unswitching in cloned loops keep track all unswitched values.
+    void setUnswitched(const SwitchInst *SI, const Value *V);
+
+    // Check was this case value unswitched before or not.
+    bool isUnswitched(const SwitchInst *SI, const Value *V);
+
+    // Returns true if another unswitching could be done within the cost
+    // threshold.
+    bool CostAllowsUnswitching();
+
+    // Clone all loop-unswitch related loop properties.
+    // Redistribute unswitching quotas.
+    // Note, that new loop data is stored inside the VMap.
+    void cloneData(const Loop *NewLoop, const Loop *OldLoop,
+                   const ValueToValueMapTy &VMap);
   };
 
   class LoopUnswitch : public LoopPass {
@@ -195,10 +212,12 @@ namespace {
     /// Update the appropriate Phi nodes as we do so.
     void SplitExitEdges(Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks);
 
-    bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
+    bool UnswitchIfProfitable(Value *LoopCond, Constant *Val,
+                              TerminatorInst *TI = nullptr);
     void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
-                                  BasicBlock *ExitBlock);
-    void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L);
+                                  BasicBlock *ExitBlock, TerminatorInst *TI);
+    void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
+                                     TerminatorInst *TI);
 
     void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
                                               Constant *Val, bool isEqual);
@@ -206,14 +225,15 @@ namespace {
     void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
                                         BasicBlock *TrueDest,
                                         BasicBlock *FalseDest,
-                                        Instruction *InsertPt);
+                                        Instruction *InsertPt,
+                                        TerminatorInst *TI);
 
     void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
     bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr,
                                     BasicBlock **LoopExit = nullptr);
 
   };
-} // namespace
+}
 
 // Analyze loop. Check its size, calculate is it possible to unswitch
 // it. Returns true if we can unswitch this loop.
@@ -242,12 +262,13 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
     // consideration code simplification opportunities and code that can
     // be shared by the resultant unswitched loops.
     CodeMetrics Metrics;
-    for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-         I != E; ++I)
+    for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
+         ++I)
       Metrics.analyzeBasicBlock(*I, TTI, EphValues);
 
-    Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
+    Props.SizeEstimation = Metrics.NumInsts;
     Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
+    Props.WasUnswitchedCount = 0;
     MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
 
     if (Metrics.notDuplicatable) {
@@ -258,13 +279,6 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
     }
   }
 
-  if (!Props.CanBeUnswitchedCount) {
-    DEBUG(dbgs() << "NOT unswitching loop %"
-                 << L->getHeader()->getName() << ", cost too high: "
-                 << L->getBlocks().size() << "\n");
-    return false;
-  }
-
   // Be careful. This links are good only before new loop addition.
   CurrentLoopProperties = &Props;
   CurLoopInstructions = &Props.UnswitchedVals;
@@ -279,7 +293,8 @@ void LUAnalysisCache::forgetLoop(const Loop *L) {
 
   if (LIt != LoopsProperties.end()) {
     LoopProperties &Props = LIt->second;
-    MaxSize += Props.CanBeUnswitchedCount * Props.SizeEstimation;
+    MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) *
+               Props.SizeEstimation;
     LoopsProperties.erase(LIt);
   }
 
@@ -299,6 +314,10 @@ bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) {
   return (*CurLoopInstructions)[SI].count(V);
 }
 
+bool LUAnalysisCache::CostAllowsUnswitching() {
+  return CurrentLoopProperties->CanBeUnswitchedCount > 0;
+}
+
 // Clone all loop-unswitch related loop properties.
 // Redistribute unswitching quotas.
 // Note, that new loop data is stored inside the VMap.
@@ -312,6 +331,8 @@ void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop,
   // Reallocate "can-be-unswitched quota"
 
   --OldLoopProps.CanBeUnswitchedCount;
+  ++OldLoopProps.WasUnswitchedCount;
+  NewLoopProps.WasUnswitchedCount = 0;
   unsigned Quota = OldLoopProps.CanBeUnswitchedCount;
   NewLoopProps.CanBeUnswitchedCount = Quota / 2;
   OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2;
@@ -453,8 +474,8 @@ bool LoopUnswitch::processCurrentLoop() {
         // unswitch on it if we desire.
         Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
                                                currentLoop, Changed);
-        if (LoopCond && UnswitchIfProfitable(LoopCond,
-                                             ConstantInt::getTrue(Context))) {
+        if (LoopCond &&
+            UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) {
           ++NumBranches;
           return true;
         }
@@ -643,7 +664,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
 /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
 /// LoopCond == Val to simplify the loop.  If we decide that this is profitable,
 /// unswitch the loop, reprocess the pieces, then return true.
-bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
+bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
+                                        TerminatorInst *TI) {
   Function *F = loopHeader->getParent();
   Constant *CondVal = nullptr;
   BasicBlock *ExitBlock = nullptr;
@@ -651,17 +673,25 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
   if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
     // If the condition is trivial, always unswitch. There is no code growth
     // for this case.
-    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock);
+    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock, TI);
     return true;
   }
 
   // Check to see if it would be profitable to unswitch current loop.
+  if (!BranchesInfo.CostAllowsUnswitching()) {
+    DEBUG(dbgs() << "NOT unswitching loop %"
+                 << currentLoop->getHeader()->getName()
+                 << " at non-trivial condition '" << *Val
+                 << "' == " << *LoopCond << "\n"
+                 << ". Cost too high.\n");
+    return false;
+  }
 
   // Do not do non-trivial unswitch while optimizing for size.
   if (OptimizeForSize || F->hasFnAttribute(Attribute::OptimizeForSize))
     return false;
 
-  UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
+  UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
   return true;
 }
 
@@ -685,25 +715,65 @@ static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
   return New;
 }
 
+static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst,
+                         bool Swapped) {
+  if (!SrcInst || !SrcInst->hasMetadata())
+    return;
+
+  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+  SrcInst->getAllMetadata(MDs);
+  for (auto &MD : MDs) {
+    switch (MD.first) {
+    default:
+      break;
+    case LLVMContext::MD_prof:
+      if (Swapped && MD.second->getNumOperands() == 3 &&
+          isa<MDString>(MD.second->getOperand(0))) {
+        MDString *MDName = cast<MDString>(MD.second->getOperand(0));
+        if (MDName->getString() == "branch_weights") {
+          auto *ValT = cast_or_null<ConstantAsMetadata>(
+                           MD.second->getOperand(1))->getValue();
+          auto *ValF = cast_or_null<ConstantAsMetadata>(
+                           MD.second->getOperand(2))->getValue();
+          assert(ValT && ValF && "Invalid Operands of branch_weights");
+          auto NewMD =
+              MDBuilder(DstInst->getParent()->getContext())
+                  .createBranchWeights(cast<ConstantInt>(ValF)->getZExtValue(),
+                                       cast<ConstantInt>(ValT)->getZExtValue());
+          MD.second = NewMD;
+        }
+      }
+      // fallthrough.
+    case LLVMContext::MD_dbg:
+      DstInst->setMetadata(MD.first, MD.second);
+    }
+  }
+}
+
 /// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
 /// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest.  Insert the
 /// code immediately before InsertPt.
 void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
                                                   BasicBlock *TrueDest,
                                                   BasicBlock *FalseDest,
-                                                  Instruction *InsertPt) {
+                                                  Instruction *InsertPt,
+                                                  TerminatorInst *TI) {
   // Insert a conditional branch on LIC to the two preheaders.  The original
   // code is the true version and the new code is the false version.
   Value *BranchVal = LIC;
+  bool Swapped = false;
   if (!isa<ConstantInt>(Val) ||
       Val->getType() != Type::getInt1Ty(LIC->getContext()))
     BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val);
-  else if (Val != ConstantInt::getTrue(Val->getContext()))
+  else if (Val != ConstantInt::getTrue(Val->getContext())) {
     // We want to enter the new loop when the condition is true.
     std::swap(TrueDest, FalseDest);
+    Swapped = true;
+  }
 
   // Insert the new branch.
   BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
+  copyMetadata(BI, TI, Swapped);
 
   // If either edge is critical, split it. This helps preserve LoopSimplify
   // form for enclosing loops.
@@ -717,13 +787,14 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
 /// where the path through the loop that doesn't execute its body has no
 /// side-effects), unswitch it.  This doesn't involve any code duplication, just
 /// moving the conditional branch outside of the loop and updating loop info.
-void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
-                                            Constant *Val,
-                                            BasicBlock *ExitBlock) {
+void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
+                                            BasicBlock *ExitBlock,
+                                            TerminatorInst *TI) {
   DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
-        << loopHeader->getName() << " [" << L->getBlocks().size()
-        << " blocks] in Function " << L->getHeader()->getParent()->getName()
-        << " on cond: " << *Val << " == " << *Cond << "\n");
+               << loopHeader->getName() << " [" << L->getBlocks().size()
+               << " blocks] in Function "
+               << L->getHeader()->getParent()->getName() << " on cond: " << *Val
+               << " == " << *Cond << "\n");
 
   // First step, split the preheader, so that we know that there is a safe place
   // to insert the conditional branch.  We will change loopPreheader to have a
@@ -744,7 +815,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
   // Okay, now we have a position to branch from and a position to branch to,
   // insert the new conditional branch.
   EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
-                                 loopPreheader->getTerminator());
+                                 loopPreheader->getTerminator(), TI);
   LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
   loopPreheader->getTerminator()->eraseFromParent();
 
@@ -780,7 +851,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
 /// to unswitch when LIC equal Val.  Split it into loop versions and test the
 /// condition outside of either loop.  Return the loops created as Out1/Out2.
 void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
-                                               Loop *L) {
+                                               Loop *L, TerminatorInst *TI) {
   Function *F = loopHeader->getParent();
   DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
         << loopHeader->getName() << " [" << L->getBlocks().size()
@@ -897,7 +968,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
          "Preheader splitting did not work correctly!");
 
   // Emit the new branch that selects between the two versions of this loop.
-  EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
+  EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
+                                 TI);
   LPM->deleteSimpleAnalysisValue(OldBR, L);
   OldBR->eraseFromParent();
 
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index b8b35d4..3314e1e 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -138,7 +138,7 @@ namespace {
       return Changed;
     }
   };
-} // namespace
+}
 
 char LowerAtomic::ID = 0;
 INITIALIZE_PASS(LowerAtomic, "loweratomic",
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index b845c03..0c47cbd 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -181,7 +181,7 @@ public:
 
   bool runOnFunction(Function &F) override { return lowerExpectIntrinsic(F); }
 };
-} // namespace
+}
 
 char LowerExpectIntrinsic::ID = 0;
 INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect",
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2c9f935..85012af 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -153,7 +153,7 @@ struct MemsetRange {
 
   bool isProfitableToUseMemset(const DataLayout &DL) const;
 };
-} // namespace
+} // end anon namespace
 
 bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
   // If we found more than 4 stores to merge or 16 bytes, use memset.
@@ -237,7 +237,7 @@ public:
 
 };
 
-} // namespace
+} // end anon namespace
 
 
 /// addRange - Add a new store to the MemsetRanges data structure.  This adds a
@@ -355,7 +355,7 @@ namespace {
   };
 
   char MemCpyOpt::ID = 0;
-} // namespace
+}
 
 // createMemCpyOptPass - The public interface to this file...
 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 886b6f5..243db8d 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -156,7 +156,7 @@ private:
 };
 
 char MergedLoadStoreMotion::ID = 0;
-} // namespace
+}
 
 ///
 /// \brief createMergedLoadStoreMotionPass - The public interface to this file.
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index 4cf68b0..f42f830 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -74,21 +74,18 @@
 // 1) We only considers n-ary adds for now. This should be extended and
 // generalized.
 //
-// 2) Besides arithmetic operations, similar reassociation can be applied to
-// GEPs. For example, if
-//   X = &arr[a]
-// dominates
-//   Y = &arr[a + b]
-// we may rewrite Y into X + b.
-//
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
@@ -115,6 +112,7 @@ public:
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<ScalarEvolution>();
     AU.addPreserved<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<ScalarEvolution>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
@@ -163,12 +161,18 @@ private:
   // GEP's pointer size, i.e., whether Index needs to be sign-extended in order
   // to be an index of GEP.
   bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP);
+  // Returns whether V is known to be non-negative at context \c Ctxt.
+  bool isKnownNonNegative(Value *V, Instruction *Ctxt);
+  // Returns whether AO may sign overflow at context \c Ctxt. It computes a
+  // conservative result -- it answers true when not sure.
+  bool maySignOverflow(AddOperator *AO, Instruction *Ctxt);
 
+  AssumptionCache *AC;
+  const DataLayout *DL;
   DominatorTree *DT;
   ScalarEvolution *SE;
   TargetLibraryInfo *TLI;
   TargetTransformInfo *TTI;
-  const DataLayout *DL;
   // A lookup table quickly telling which instructions compute the given SCEV.
   // Note that there can be multiple instructions at different locations
   // computing to the same SCEV, so we map a SCEV to an instruction list.  For
@@ -185,6 +189,7 @@ private:
 char NaryReassociate::ID = 0;
 INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
                       false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
@@ -200,6 +205,7 @@ bool NaryReassociate::runOnFunction(Function &F) {
   if (skipOptnoneFunction(F))
     return false;
 
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   SE = &getAnalysis<ScalarEvolution>();
   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
@@ -346,18 +352,44 @@ bool NaryReassociate::requiresSignExtension(Value *Index,
   return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
 }
 
+bool NaryReassociate::isKnownNonNegative(Value *V, Instruction *Ctxt) {
+  bool NonNegative, Negative;
+  // TODO: ComputeSignBits is expensive. Consider caching the results.
+  ComputeSignBit(V, NonNegative, Negative, *DL, 0, AC, Ctxt, DT);
+  return NonNegative;
+}
+
+bool NaryReassociate::maySignOverflow(AddOperator *AO, Instruction *Ctxt) {
+  if (AO->hasNoSignedWrap())
+    return false;
+
+  Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
+  // If LHS or RHS has the same sign as the sum, AO doesn't sign overflow.
+  // TODO: handle the negative case as well.
+  if (isKnownNonNegative(AO, Ctxt) &&
+      (isKnownNonNegative(LHS, Ctxt) || isKnownNonNegative(RHS, Ctxt)))
+    return false;
+
+  return true;
+}
+
 GetElementPtrInst *
 NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
                                           Type *IndexedType) {
   Value *IndexToSplit = GEP->getOperand(I + 1);
-  if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit))
+  if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit)) {
     IndexToSplit = SExt->getOperand(0);
+  } else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {
+    // zext can be treated as sext if the source is non-negative.
+    if (isKnownNonNegative(ZExt->getOperand(0), GEP))
+      IndexToSplit = ZExt->getOperand(0);
+  }
 
   if (AddOperator *AO = dyn_cast<AddOperator>(IndexToSplit)) {
     // If the I-th index needs sext and the underlying add is not equipped with
     // nsw, we cannot split the add because
     //   sext(LHS + RHS) != sext(LHS) + sext(RHS).
-    if (requiresSignExtension(IndexToSplit, GEP) && !AO->hasNoSignedWrap())
+    if (requiresSignExtension(IndexToSplit, GEP) && maySignOverflow(AO, GEP))
       return nullptr;
     Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
     // IndexToSplit = LHS + RHS.
@@ -373,10 +405,9 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
   return nullptr;
 }
 
-GetElementPtrInst *
-NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
-                                          Value *LHS, Value *RHS,
-                                          Type *IndexedType) {
+GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
+    GetElementPtrInst *GEP, unsigned I, Value *LHS, Value *RHS,
+    Type *IndexedType) {
   // Look for GEP's closest dominator that has the same SCEV as GEP except that
   // the I-th index is replaced with LHS.
   SmallVector<const SCEV *, 4> IndexExprs;
@@ -384,6 +415,16 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
     IndexExprs.push_back(SE->getSCEV(*Index));
   // Replace the I-th index with LHS.
   IndexExprs[I] = SE->getSCEV(LHS);
+  if (isKnownNonNegative(LHS, GEP) &&
+      DL->getTypeSizeInBits(LHS->getType()) <
+          DL->getTypeSizeInBits(GEP->getOperand(I)->getType())) {
+    // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to
+    // zext if the source operand is proved non-negative. We should do that
+    // consistently so that CandidateExpr more likely appears before. See
+    // @reassociate_gep_assume for an example of this canonicalization.
+    IndexExprs[I] =
+        SE->getZeroExtendExpr(IndexExprs[I], GEP->getOperand(I)->getType());
+  }
   const SCEV *CandidateExpr = SE->getGEPExpr(
       GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()),
       IndexExprs, GEP->isInBounds());
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 5423499..31d7df3 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -46,7 +46,7 @@ namespace {
   };
 
   char PartiallyInlineLibCalls::ID = 0;
-} // namespace
+}
 
 INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
                 "Partially inline calls to library functions", false, false)
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 670dcd2..9ecaf10 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -160,7 +160,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
     AU.setPreservesAll();
   }
 };
-} // namespace
+}
 
 static cl::opt<bool> NoEntry("spp-no-entry", cl::Hidden, cl::init(false));
 static cl::opt<bool> NoCall("spp-no-call", cl::Hidden, cl::init(false));
@@ -181,7 +181,7 @@ struct PlaceSafepoints : public FunctionPass {
     // if that was worth doing
   }
 };
-} // namespace
+}
 
 // Insert a safepoint poll immediately before the given instruction.  Does
 // not handle the parsability of state at the runtime call, that's the
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 9842fd7..d1acf78 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -154,7 +154,7 @@ namespace {
     unsigned SymbolicRank;
     bool isOr;
   };
-} // namespace
+}
 
 namespace {
   class Reassociate : public FunctionPass {
@@ -197,7 +197,7 @@ namespace {
     void OptimizeInst(Instruction *I);
     Instruction *canonicalizeNegConstExpr(Instruction *I);
   };
-} // namespace
+}
 
 XorOpnd::XorOpnd(Value *V) {
   assert(!isa<ConstantInt>(V) && "No ConstantInt");
@@ -936,6 +936,10 @@ static Value *NegateValue(Value *V, Instruction *BI) {
     // Push the negates through the add.
     I->setOperand(0, NegateValue(I->getOperand(0), BI));
     I->setOperand(1, NegateValue(I->getOperand(1), BI));
+    if (I->getOpcode() == Instruction::Add) {
+      I->setHasNoUnsignedWrap(false);
+      I->setHasNoSignedWrap(false);
+    }
 
     // We must move the add instruction here, because the neg instructions do
     // not dominate the old add instruction in general.  By moving it, we are
@@ -976,6 +980,12 @@ static Value *NegateValue(Value *V, Instruction *BI) {
       InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
     }
     TheNeg->moveBefore(InsertPt);
+    if (TheNeg->getOpcode() == Instruction::Sub) {
+      TheNeg->setHasNoUnsignedWrap(false);
+      TheNeg->setHasNoSignedWrap(false);
+    } else {
+      TheNeg->andIRFlags(BI);
+    }
     return TheNeg;
   }
 
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 2ff56e6..1b46727 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -58,7 +58,7 @@ namespace {
 
     bool runOnFunction(Function &F) override;
   };
-} // namespace
+}
 
 char RegToMem::ID = 0;
 INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index c15bc1b..ae2ae3a 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -183,7 +183,7 @@ struct PartiallyConstructedSafepointRecord {
   /// Maps rematerialized copy to it's original value.
   RematerializedValueMapTy RematerializedValues;
 };
-} // namespace
+}
 
 /// Compute the live-in set for every basic block in the function
 static void computeLiveInValues(DominatorTree &DT, Function &F,
@@ -294,12 +294,17 @@ static void analyzeParsePointLiveness(
 
 static Value *findBaseDefiningValue(Value *I);
 
-/// If we can trivially determine that the index specified in the given vector
-/// is a base pointer, return it.  In cases where the entire vector is known to
-/// consist of base pointers, the entire vector will be returned.  This
-/// indicates that the relevant extractelement is a valid base pointer and
-/// should be used directly.
-static Value *findBaseOfVector(Value *I, Value *Index) {
+/// Return a base defining value for the 'Index' element of the given vector
+/// instruction 'I'.  If Index is null, returns a BDV for the entire vector
+/// 'I'.  As an optimization, this method will try to determine when the 
+/// element is known to already be a base pointer.  If this can be established,
+/// the second value in the returned pair will be true.  Note that either a
+/// vector or a pointer typed value can be returned.  For the former, the
+/// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
+/// If the later, the return pointer is a BDV (or possibly a base) for the
+/// particular element in 'I'.  
+static std::pair<Value *, bool>
+findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
   assert(I->getType()->isVectorTy() &&
          cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
          "Illegal to ask for the base pointer of a non-pointer type");
@@ -309,7 +314,7 @@ static Value *findBaseOfVector(Value *I, Value *Index) {
 
   if (isa<Argument>(I))
     // An incoming argument to the function is a base pointer
-    return I;
+    return std::make_pair(I, true);
 
   // We shouldn't see the address of a global as a vector value?
   assert(!isa<GlobalVariable>(I) &&
@@ -320,7 +325,7 @@ static Value *findBaseOfVector(Value *I, Value *Index) {
   if (isa<UndefValue>(I))
     // utterly meaningless, but useful for dealing with partially optimized
     // code.
-    return I;
+    return std::make_pair(I, true);
 
   // Due to inheritance, this must be _after_ the global variable and undef
   // checks
@@ -328,38 +333,56 @@ static Value *findBaseOfVector(Value *I, Value *Index) {
     assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
            "order of checks wrong!");
     assert(Con->isNullValue() && "null is the only case which makes sense");
-    return Con;
+    return std::make_pair(Con, true);
   }
-
+  
   if (isa<LoadInst>(I))
-    return I;
-
+    return std::make_pair(I, true);
+  
   // For an insert element, we might be able to look through it if we know
-  // something about the indexes, but if the indices are arbitrary values, we
-  // can't without much more extensive scalarization.
+  // something about the indexes.
   if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) {
-    Value *InsertIndex = IEI->getOperand(2);
-    // This index is inserting the value, look for it's base
-    if (InsertIndex == Index)
-      return findBaseDefiningValue(IEI->getOperand(1));
-    // Both constant, and can't be equal per above. This insert is definitely
-    // not relevant, look back at the rest of the vector and keep trying.
-    if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex))
-      return findBaseOfVector(IEI->getOperand(0), Index);
-  }
-
-  // Note: This code is currently rather incomplete.  We are essentially only
-  // handling cases where the vector element is trivially a base pointer.  We
-  // need to update the entire base pointer construction algorithm to know how
-  // to track vector elements and potentially scalarize, but the case which
-  // would motivate the work hasn't shown up in real workloads yet.
-  llvm_unreachable("no base found for vector element");
+    if (Index) {
+      Value *InsertIndex = IEI->getOperand(2);
+      // This index is inserting the value, look for its BDV
+      if (InsertIndex == Index)
+        return std::make_pair(findBaseDefiningValue(IEI->getOperand(1)), false);
+      // Both constant, and can't be equal per above. This insert is definitely
+      // not relevant, look back at the rest of the vector and keep trying.
+      if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex))
+        return findBaseDefiningValueOfVector(IEI->getOperand(0), Index);
+    }
+    
+    // We don't know whether this vector contains entirely base pointers or
+    // not.  To be conservatively correct, we treat it as a BDV and will
+    // duplicate code as needed to construct a parallel vector of bases.
+    return std::make_pair(IEI, false);
+  }
+
+  if (isa<ShuffleVectorInst>(I))
+    // We don't know whether this vector contains entirely base pointers or
+    // not.  To be conservatively correct, we treat it as a BDV and will
+    // duplicate code as needed to construct a parallel vector of bases.
+    // TODO: There a number of local optimizations which could be applied here
+    // for particular sufflevector patterns.
+    return std::make_pair(I, false);
+
+  // A PHI or Select is a base defining value.  The outer findBasePointer
+  // algorithm is responsible for constructing a base value for this BDV.
+  assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
+         "unknown vector instruction - no base found for vector element");
+  return std::make_pair(I, false);
 }
 
+static bool isKnownBaseResult(Value *V);
+
 /// Helper function for findBasePointer - Will return a value which either a)
 /// defines the base pointer for the input or b) blocks the simple search
 /// (i.e. a PHI or Select of two derived pointers)
 static Value *findBaseDefiningValue(Value *I) {
+  if (I->getType()->isVectorTy())
+    return findBaseDefiningValueOfVector(I).first;
+  
   assert(I->getType()->isPointerTy() &&
          "Illegal to ask for the base pointer of a non-pointer type");
 
@@ -370,16 +393,39 @@ static Value *findBaseDefiningValue(Value *I) {
   if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
     Value *VectorOperand = EEI->getVectorOperand();
     Value *Index = EEI->getIndexOperand();
-    Value *VectorBase = findBaseOfVector(VectorOperand, Index);
-    // If the result returned is a vector, we know the entire vector must
-    // contain base pointers.  In that case, the extractelement is a valid base
-    // for this value.
-    if (VectorBase->getType()->isVectorTy())
-      return EEI;
-    // Otherwise, we needed to look through the vector to find the base for
-    // this particular element.
-    assert(VectorBase->getType()->isPointerTy());
-    return VectorBase;
+    std::pair<Value *, bool> pair =
+      findBaseDefiningValueOfVector(VectorOperand, Index);
+    Value *VectorBase = pair.first;
+    if (VectorBase->getType()->isPointerTy())
+      // We found a BDV for this specific element with the vector.  This is an
+      // optimization, but in practice it covers most of the useful cases
+      // created via scalarization.
+      return VectorBase;
+    else {
+      assert(VectorBase->getType()->isVectorTy());
+      if (pair.second)
+        // If the entire vector returned is known to be entirely base pointers,
+        // then the extractelement is valid base for this value.
+        return EEI;
+      else {
+        // Otherwise, we have an instruction which potentially produces a
+        // derived pointer and we need findBasePointers to clone code for us
+        // such that we can create an instruction which produces the
+        // accompanying base pointer.
+        // Note: This code is currently rather incomplete.  We don't currently
+        // support the general form of shufflevector of insertelement.
+        // Conceptually, these are just 'base defining values' of the same
+        // variety as phi or select instructions.  We need to update the
+        // findBasePointers algorithm to insert new 'base-only' versions of the
+        // original instructions. This is relative straight forward to do, but
+        // the case which would motivate the work hasn't shown up in real
+        // workloads yet.  
+        assert((isa<PHINode>(VectorBase) || isa<SelectInst>(VectorBase)) &&
+               "need to extend findBasePointers for generic vector"
+               "instruction cases");
+        return VectorBase;
+      }
+    }
   }
 
   if (isa<Argument>(I))
@@ -646,7 +692,7 @@ private:
     llvm_unreachable("only three states!");
   }
 };
-} // namespace
+}
 /// For a given value or instruction, figure out what base ptr it's derived
 /// from.  For gc objects, this is simply itself.  On success, returns a value
 /// which is the base pointer.  (This is reliable and can be used for
@@ -1712,7 +1758,9 @@ static void findLiveReferences(
 /// slightly non-trivial since it requires a format change.  Given how rare
 /// such cases are (for the moment?) scalarizing is an acceptable comprimise.
 static void splitVectorValues(Instruction *StatepointInst,
-                              StatepointLiveSetTy &LiveSet, DominatorTree &DT) {
+                              StatepointLiveSetTy &LiveSet,
+                              DenseMap<Value *, Value *>& PointerToBase,
+                              DominatorTree &DT) {
   SmallVector<Value *, 16> ToSplit;
   for (Value *V : LiveSet)
     if (isa<VectorType>(V->getType()))
@@ -1721,14 +1769,14 @@ static void splitVectorValues(Instruction *StatepointInst,
   if (ToSplit.empty())
     return;
 
+  DenseMap<Value *, SmallVector<Value *, 16>> ElementMapping;
+
   Function &F = *(StatepointInst->getParent()->getParent());
 
   DenseMap<Value *, AllocaInst *> AllocaMap;
   // First is normal return, second is exceptional return (invoke only)
   DenseMap<Value *, std::pair<Value *, Value *>> Replacements;
   for (Value *V : ToSplit) {
-    LiveSet.erase(V);
-
     AllocaInst *Alloca =
         new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
     AllocaMap[V] = Alloca;
@@ -1738,7 +1786,7 @@ static void splitVectorValues(Instruction *StatepointInst,
     SmallVector<Value *, 16> Elements;
     for (unsigned i = 0; i < VT->getNumElements(); i++)
       Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
-    LiveSet.insert(Elements.begin(), Elements.end());
+    ElementMapping[V] = Elements;
 
     auto InsertVectorReform = [&](Instruction *IP) {
       Builder.SetInsertPoint(IP);
@@ -1771,6 +1819,7 @@ static void splitVectorValues(Instruction *StatepointInst,
       Replacements[V].second = InsertVectorReform(IP);
     }
   }
+
   for (Value *V : ToSplit) {
     AllocaInst *Alloca = AllocaMap[V];
 
@@ -1814,6 +1863,25 @@ static void splitVectorValues(Instruction *StatepointInst,
   for (Value *V : ToSplit)
     Allocas.push_back(AllocaMap[V]);
   PromoteMemToReg(Allocas, DT);
+
+  // Update our tracking of live pointers and base mappings to account for the
+  // changes we just made.
+  for (Value *V : ToSplit) {
+    auto &Elements = ElementMapping[V];
+
+    LiveSet.erase(V);
+    LiveSet.insert(Elements.begin(), Elements.end());
+    // We need to update the base mapping as well.
+    assert(PointerToBase.count(V));
+    Value *OldBase = PointerToBase[V];
+    auto &BaseElements = ElementMapping[OldBase];
+    PointerToBase.erase(V);
+    assert(Elements.size() == BaseElements.size());
+    for (unsigned i = 0; i < Elements.size(); i++) {
+      Value *Elem = Elements[i];
+      PointerToBase[Elem] = BaseElements[i];
+    }
+  }
 }
 
 // Helper function for the "rematerializeLiveValues". It walks use chain
@@ -2075,17 +2143,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   // site.
   findLiveReferences(F, DT, P, toUpdate, records);
 
-  // Do a limited scalarization of any live at safepoint vector values which
-  // contain pointers.  This enables this pass to run after vectorization at
-  // the cost of some possible performance loss.  TODO: it would be nice to
-  // natively support vectors all the way through the backend so we don't need
-  // to scalarize here.
-  for (size_t i = 0; i < records.size(); i++) {
-    struct PartiallyConstructedSafepointRecord &info = records[i];
-    Instruction *statepoint = toUpdate[i].getInstruction();
-    splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT);
-  }
-
   // B) Find the base pointers for each live pointer
   /* scope for caching */ {
     // Cache the 'defining value' relation used in the computation and
@@ -2146,6 +2203,18 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   }
   holders.clear();
 
+  // Do a limited scalarization of any live at safepoint vector values which
+  // contain pointers.  This enables this pass to run after vectorization at
+  // the cost of some possible performance loss.  TODO: it would be nice to
+  // natively support vectors all the way through the backend so we don't need
+  // to scalarize here.
+  for (size_t i = 0; i < records.size(); i++) {
+    struct PartiallyConstructedSafepointRecord &info = records[i];
+    Instruction *statepoint = toUpdate[i].getInstruction();
+    splitVectorValues(cast<Instruction>(statepoint), info.liveset,
+                      info.PointerToBase, DT);
+  }
+
   // In order to reduce live set of statepoint we might choose to rematerialize
   // some values instead of relocating them. This is purelly an optimization and
   // does not influence correctness.
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index bc068f7..305175f 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1055,7 +1055,7 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
 
   // load null -> null
   if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
-    return markConstant(IV, &I, Constant::getNullValue(I.getType()));
+    return markConstant(IV, &I, UndefValue::get(I.getType()));
 
   // Transform load (constant global) into the value loaded.
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index f38b2b1..056dd11 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -127,7 +127,7 @@ typedef llvm::IRBuilder<true, ConstantFolder, IRBuilderPrefixedInserter<true>>
 typedef llvm::IRBuilder<false, ConstantFolder, IRBuilderPrefixedInserter<false>>
     IRBuilderTy;
 #endif
-} // namespace
+}
 
 namespace {
 /// \brief A used slice of an alloca.
@@ -595,7 +595,7 @@ private:
   /// the alloca.
   SmallVector<Use *, 8> DeadOperands;
 };
-} // namespace
+}
 
 static Value *foldSelectInst(SelectInst &SI) {
   // If the condition being selected on is a constant or the same value is
@@ -1173,7 +1173,7 @@ public:
     }
   }
 };
-} // namespace
+} // end anon namespace
 
 namespace {
 /// \brief An optimization pass providing Scalar Replacement of Aggregates.
@@ -1268,7 +1268,7 @@ private:
   void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
   bool promoteAllocas(Function &F);
 };
-} // namespace
+}
 
 char SROA::ID = 0;
 
@@ -3119,7 +3119,7 @@ private:
     return true;
   }
 };
-} // namespace
+}
 
 namespace {
 /// \brief Visitor to rewrite aggregate loads and stores as scalar.
@@ -3327,7 +3327,7 @@ private:
     return false;
   }
 };
-} // namespace
+}
 
 /// \brief Strip aggregate type wrapping.
 ///
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index 69e3a67..c8dfa54 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -174,7 +174,7 @@ protected:
   /// \brief Flag indicating whether the profile input loaded successfully.
   bool ProfileIsValid;
 };
-} // namespace
+}
 
 /// \brief Print the weight of edge \p E on stream \p OS.
 ///
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e42c3da..d955da7 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -221,7 +221,7 @@ namespace {
     }
   };
 
-} // namespace
+}
 
 char SROA_DT::ID = 0;
 char SROA_SSAUp::ID = 0;
@@ -1123,7 +1123,7 @@ public:
     }
   }
 };
-} // namespace
+} // end anon namespace
 
 /// isSafeSelectToSpeculate - Select instructions that use an alloca and are
 /// subsequently loaded can be rewritten to load both input pointers and then
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 0733daf..231411a 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -48,8 +48,8 @@ UserBonusInstThreshold("bonus-inst-threshold", cl::Hidden, cl::init(1),
 
 STATISTIC(NumSimpl, "Number of blocks simplified");
 
-/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
-/// node) return blocks, merge them together to promote recursive block merging.
+/// If we have more than one empty (other than phi node) return blocks,
+/// merge them together to promote recursive block merging.
 static bool mergeEmptyReturnBlocks(Function &F) {
   bool Changed = false;
 
@@ -124,7 +124,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
   return Changed;
 }
 
-/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// Call SimplifyCFG on all the blocks in the function,
 /// iterating until no more changes are made.
 static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
                                    AssumptionCache *AC,
@@ -134,8 +134,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
   while (LocalChange) {
     LocalChange = false;
 
-    // Loop over all of the basic blocks and remove them if they are unneeded...
-    //
+    // Loop over all of the basic blocks and remove them if they are unneeded.
     for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
       if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) {
         LocalChange = true;
@@ -159,7 +158,7 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
   // iterativelySimplifyCFG can (rarely) make some loops dead.  If this happens,
   // removeUnreachableBlocks is needed to nuke them, which means we should
   // iterate between the two optimizations.  We structure the code like this to
-  // avoid reruning iterativelySimplifyCFG if the second pass of
+  // avoid rerunning iterativelySimplifyCFG if the second pass of
   // removeUnreachableBlocks doesn't do anything.
   if (!removeUnreachableBlocks(F))
     return true;
@@ -220,7 +219,7 @@ struct CFGSimplifyPass : public FunctionPass {
     AU.addRequired<TargetTransformInfoWrapperPass>();
   }
 };
-} // namespace
+}
 
 char CFGSimplifyPass::ID = 0;
 INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index f32769c..6d9d417 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -224,11 +224,13 @@ FunctionPass *llvm::createStraightLineStrengthReducePass() {
 bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
                                             const Candidate &C) {
   return (Basis.Ins != C.Ins && // skip the same instruction
+          // They must have the same type too. Basis.Base == C.Base doesn't
+          // guarantee their types are the same (PR23975).
+          Basis.Ins->getType() == C.Ins->getType() &&
           // Basis must dominate C in order to rewrite C with respect to Basis.
           DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) &&
           // They share the same base, stride, and candidate kind.
-          Basis.Base == C.Base &&
-          Basis.Stride == C.Stride &&
+          Basis.Base == C.Base && Basis.Stride == C.Stride &&
           Basis.CandidateKind == C.CandidateKind);
 }
 
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index d23f515..c7de2e2 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -120,7 +120,7 @@ namespace {
     bool CanMoveAboveCall(Instruction *I, CallInst *CI);
     Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
   };
-} // namespace
+}
 
 char TailCallElim::ID = 0;
 INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim",
@@ -246,7 +246,7 @@ struct AllocaDerivedValueTracker {
   SmallPtrSet<Instruction *, 32> AllocaUsers;
   SmallPtrSet<Instruction *, 32> EscapePoints;
 };
-} // namespace
+}
 
 bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
   if (F.callsFunctionThatReturnsTwice())
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index 72cdfa4..03c3a80 100644
--- a/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -107,4 +107,4 @@ ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
   assert(Layout->FrameSize / Granularity == Layout->ShadowBytes.size());
 }
 
-} // namespace llvm
+} // llvm namespace
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 798376e..53471de 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -211,6 +211,11 @@ void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
   assert(I->getParent() == nullptr &&
          "ReplaceInstWithInst: Instruction already inserted into basic block!");
 
+  // Copy debug location to newly added instruction, if it wasn't already set
+  // by the caller.
+  if (!I->getDebugLoc())
+    I->setDebugLoc(BI->getDebugLoc());
+
   // Insert the new instruction into the basic block...
   BasicBlock::iterator New = BIL.insert(BI, I);
 
@@ -716,7 +721,6 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond,
   CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
   BranchInst *HeadNewTerm =
     BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond);
-  HeadNewTerm->setDebugLoc(SplitBefore->getDebugLoc());
   HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
   ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
 
@@ -766,7 +770,6 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
   (*ElseTerm)->setDebugLoc(SplitBefore->getDebugLoc());
   BranchInst *HeadNewTerm =
     BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond);
-  HeadNewTerm->setDebugLoc(SplitBefore->getDebugLoc());
   HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
   ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
 }
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 362cd9b..7e83c9e 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -60,7 +60,7 @@ namespace {
       AU.addPreservedID(LoopSimplifyID);
     }
   };
-} // namespace
+}
 
 char BreakCriticalEdges::ID = 0;
 INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 0771b29..f2d5e07 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -42,7 +42,7 @@ namespace {
     DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
       : Quotient(InQuotient), Remainder(InRemainder) {}
   };
-} // namespace
+}
 
 namespace llvm {
   template<>
@@ -69,7 +69,7 @@ namespace llvm {
   };
 
   typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
-} // namespace llvm
+}
 
 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
 // value of the operands and uses a shorter-faster div/rem instruction when
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index e623445..4f8d1df 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -289,7 +289,7 @@ namespace {
                     BasicBlock::const_iterator StartingInst,
                     std::vector<const BasicBlock*> &ToClone);
   };
-} // namespace
+}
 
 /// The specified block is found to be reachable, clone it and
 /// anything that it can reach.
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 2693322..61f1811 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -99,7 +99,11 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
 
       SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
       CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
+
     }
+
+    if (I->hasPersonalityFn())
+      F->setPersonalityFn(MapValue(I->getPersonalityFn(), VMap));
   }
 
   // And aliases
diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp
index 4bbded8..dc95089 100644
--- a/lib/Transforms/Utils/CtorUtils.cpp
+++ b/lib/Transforms/Utils/CtorUtils.cpp
@@ -162,4 +162,4 @@ bool optimizeGlobalCtorsList(Module &M,
   return true;
 }
 
-} // namespace llvm
+} // End llvm namespace
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 40a48c0..4eb3e3d 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -46,7 +46,7 @@ public:
   FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {}
   bool run(BasicBlock *BB);
 };
-} // namespace
+}
 
 /// If \param [in] BB has more than one predecessor that is a conditional
 /// branch, attempt to use parallel and/or for the branch condition. \returns
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index ea84e7c..d2d60d7 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -121,7 +121,7 @@ namespace {
       }
     }
   };
-} // namespace
+}
 
 /// Get or create a target for the branch from ResumeInsts.
 BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index c9bec9a..da890a2 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -50,7 +50,7 @@ namespace {
   };
   
   char InstNamer::ID = 0;
-} // namespace
+}
 
 INITIALIZE_PASS(InstNamer, "instnamer", 
                 "Assign names to anonymous instructions", false, false)
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index fcc7986..9d40b69 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -300,7 +300,7 @@ struct LCSSA : public FunctionPass {
     AU.addPreserved<ScalarEvolution>();
   }
 };
-} // namespace
+}
 
 char LCSSA::ID = 0;
 INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 8b0afa6..2e7d21c 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -386,8 +386,9 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
 
   // Create and insert the new backedge block...
   BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
-                                           Header->getName()+".backedge", F);
+                                           Header->getName() + ".backedge", F);
   BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+  BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
 
   DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
                << BEBlock->getName() << "\n");
@@ -776,7 +777,7 @@ namespace {
     /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
     void verifyAnalysis() const override;
   };
-} // namespace
+}
 
 char LoopSimplify::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 919b45d..add5432 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -86,7 +86,7 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
       if (L->contains(PN)) {
         NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
       } else {
-        NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH);
+        NewPN->addIncoming(UndefValue::get(PN->getType()), OrigPH);
       }
 
       Value *V = PN->getIncomingValueForBlock(Latch);
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index c1b0645..4acd988 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -101,7 +101,7 @@ namespace {
       return CI1->getValue().slt(CI2->getValue());
     }
   };
-} // namespace
+}
 
 char LowerSwitch::ID = 0;
 INITIALIZE_PASS(LowerSwitch, "lowerswitch",
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 46dd65e..395a46b 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -131,7 +131,7 @@ namespace {
       return true;
     }
   };
-} // namespace
+}
 
 char MetaRenamer::ID = 0;
 INITIALIZE_PASS(MetaRenamer, "metarenamer", 
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index c0988987..88b39dd 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -303,7 +303,7 @@ public:
   }
 };
 
-} // namespace llvm
+} // End llvm namespace
 
 /// Check to see if AvailableVals has an entry for the specified BB and if so,
 /// return it.  If not, construct SSA form by first calculating the required
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 3d7ab0f..36781c1 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -136,11 +136,10 @@ public:
       : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {}
   bool run(BasicBlock *BB);
 };
-} // namespace
+}
 
-/// SafeToMergeTerminators - Return true if it is safe to merge these two
+/// Return true if it is safe to merge these two
 /// terminator instructions together.
-///
 static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
   if (SI1 == SI2) return false;  // Can't merge with self!
 
@@ -164,11 +163,9 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
   return true;
 }
 
-/// isProfitableToFoldUnconditional - Return true if it is safe and profitable
-/// to merge these two terminator instructions together, where SI1 is an
-/// unconditional branch. PhiNodes will store all PHI nodes in common
-/// successors.
-///
+/// Return true if it is safe and profitable to merge these two terminator
+/// instructions together, where SI1 is an unconditional branch. PhiNodes will
+/// store all PHI nodes in common successors.
 static bool isProfitableToFoldUnconditional(BranchInst *SI1,
                                           BranchInst *SI2,
                                           Instruction *Cond,
@@ -205,10 +202,10 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1,
   return true;
 }
 
-/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will
-/// now be entries in it from the 'NewPred' block.  The values that will be
-/// flowing into the PHI nodes will be the same as those coming in from
-/// ExistPred, an existing predecessor of Succ.
+/// Update PHI nodes in Succ to indicate that there will now be entries in it
+/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
+/// will be the same as those coming in from ExistPred, an existing predecessor
+/// of Succ.
 static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
                                   BasicBlock *ExistPred) {
   if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
@@ -219,9 +216,9 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
     PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
 }
 
-/// ComputeSpeculationCost - Compute an abstract "cost" of speculating the
-/// given instruction, which is assumed to be safe to speculate. TCC_Free means
-/// cheap, TCC_Basic means less cheap, and TCC_Expensive means prohibitively
+/// Compute an abstract "cost" of speculating the given instruction,
+/// which is assumed to be safe to speculate. TCC_Free means cheap,
+/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
 /// expensive.
 static unsigned ComputeSpeculationCost(const User *I,
                                        const TargetTransformInfo &TTI) {
@@ -229,8 +226,8 @@ static unsigned ComputeSpeculationCost(const User *I,
          "Instruction is not safe to speculatively execute!");
   return TTI.getUserCost(I);
 }
-/// DominatesMergePoint - If we have a merge point of an "if condition" as
-/// accepted above, return true if the specified value dominates the block.  We
+/// If we have a merge point of an "if condition" as accepted above,
+/// return true if the specified value dominates the block.  We
 /// don't handle the true generality of domination here, just a special case
 /// which works well enough for us.
 ///
@@ -302,7 +299,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   return true;
 }
 
-/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
+/// Extract ConstantInt from value, looking through IntToPtr
 /// and PointerNullValue. Return NULL if value is not a constant int.
 static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
   // Normal constant int.
@@ -456,7 +453,7 @@ private:
 
   }
 
-  /// gather - Given a potentially 'or'd or 'and'd together collection of icmp
+  /// Given a potentially 'or'd or 'and'd together collection of icmp
   /// eq/ne/lt/gt instructions that compare a value against a constant, extract
   /// the value being compared, and stick the list constants into the Vals
   /// vector.
@@ -502,7 +499,7 @@ private:
   }
 };
 
-} // namespace
+}
 
 static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
   Instruction *Cond = nullptr;
@@ -519,7 +516,7 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
   if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond);
 }
 
-/// isValueEqualityComparison - Return true if the specified terminator checks
+/// Return true if the specified terminator checks
 /// to see if a value is equal to constant integer value.
 Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
   Value *CV = nullptr;
@@ -547,7 +544,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
   return CV;
 }
 
-/// GetValueEqualityComparisonCases - Given a value comparison instruction,
+/// Given a value comparison instruction,
 /// decode all of the 'cases' that it represents and return the 'default' block.
 BasicBlock *SimplifyCFGOpt::
 GetValueEqualityComparisonCases(TerminatorInst *TI,
@@ -571,15 +568,14 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
 }
 
 
-/// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries
+/// Given a vector of bb/value pairs, remove any entries
 /// in the list that match the specified block.
 static void EliminateBlockCases(BasicBlock *BB,
                               std::vector<ValueEqualityComparisonCase> &Cases) {
   Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
 }
 
-/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
-/// well.
+/// Return true if there are any keys in C1 that exist in C2 as well.
 static bool
 ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
               std::vector<ValueEqualityComparisonCase > &C2) {
@@ -613,12 +609,11 @@ ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
   return false;
 }
 
-/// SimplifyEqualityComparisonWithOnlyPredecessor - If TI is known to be a
-/// terminator instruction and its block is known to only have a single
-/// predecessor block, check to see if that predecessor is also a value
-/// comparison with the same value, and if that comparison determines the
-/// outcome of this comparison.  If so, simplify TI.  This does a very limited
-/// form of jump threading.
+/// If TI is known to be a terminator instruction and its block is known to
+/// only have a single predecessor block, check to see if that predecessor is
+/// also a value comparison with the same value, and if that comparison
+/// determines the outcome of this comparison. If so, simplify TI. This does a
+/// very limited form of jump threading.
 bool SimplifyCFGOpt::
 SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
                                               BasicBlock *Pred,
@@ -754,7 +749,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
 }
 
 namespace {
-  /// ConstantIntOrdering - This class implements a stable ordering of constant
+  /// This class implements a stable ordering of constant
   /// integers that does not depend on their address.  This is important for
   /// applications that sort ConstantInt's to ensure uniqueness.
   struct ConstantIntOrdering {
@@ -817,8 +812,8 @@ static void FitWeights(MutableArrayRef<uint64_t> Weights) {
   }
 }
 
-/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
-/// equality comparison instruction (either a switch or a branch on "X == c").
+/// The specified terminator is a value equality comparison instruction
+/// (either a switch or a branch on "X == c").
 /// See if any of the predecessors of the terminator block are value comparisons
 /// on the same value.  If so, and if safe to do so, fold them together.
 bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
@@ -1027,10 +1022,9 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
   return Changed;
 }
 
-// isSafeToHoistInvoke - If we would need to insert a select that uses the
-// value of this invoke (comments in HoistThenElseCodeToIf explain why we
-// would need to do this), we can't hoist the invoke, as there is nowhere
-// to put the select in this case.
+// If we would need to insert a select that uses the value of this invoke
+// (comments in HoistThenElseCodeToIf explain why we would need to do this), we
+// can't hoist the invoke, as there is nowhere to put the select in this case.
 static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
                                 Instruction *I1, Instruction *I2) {
   for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
@@ -1049,9 +1043,9 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
 
 static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
 
-/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
-/// BB2, hoist any common code in the two blocks up into the branch block.  The
-/// caller of this function guarantees that BI's block dominates BB1 and BB2.
+/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
+/// in the two blocks up into the branch block. The caller of this function
+/// guarantees that BI's block dominates BB1 and BB2.
 static bool HoistThenElseCodeToIf(BranchInst *BI,
                                   const TargetTransformInfo &TTI) {
   // This does very trivial matching, with limited scanning, to find identical
@@ -1197,7 +1191,7 @@ HoistTerminator:
   return true;
 }
 
-/// SinkThenElseCodeToEnd - Given an unconditional branch that goes to BBEnd,
+/// Given an unconditional branch that goes to BBEnd,
 /// check whether BBEnd has only two predecessors and the other predecessor
 /// ends with an unconditional branch. If it is true, sink any common code
 /// in the two predecessors to BBEnd.
@@ -1656,8 +1650,7 @@ static bool HasNoDuplicateCall(const BasicBlock *BB) {
   return false;
 }
 
-/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
-/// across this block.
+/// Return true if we can thread a branch across this block.
 static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
   unsigned Size = 0;
@@ -1681,10 +1674,9 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
   return true;
 }
 
-/// FoldCondBranchOnPHI - If we have a conditional branch on a PHI node value
-/// that is defined in the same block as the branch and if any PHI entries are
-/// constants, thread edges corresponding to that entry to be branches to their
-/// ultimate destination.
+/// If we have a conditional branch on a PHI node value that is defined in the
+/// same block as the branch and if any PHI entries are constants, thread edges
+/// corresponding to that entry to be branches to their ultimate destination.
 static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
   BasicBlock *BB = BI->getParent();
   PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
@@ -1781,8 +1773,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
   return false;
 }
 
-/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
-/// PHI node, see if we can eliminate it.
+/// Given a BB that starts with the specified two-entry PHI node,
+/// see if we can eliminate it.
 static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
                                 const DataLayout &DL) {
   // Ok, this is a two entry PHI node.  Check to see if this is a simple "if
@@ -1920,8 +1912,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
   return true;
 }
 
-/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
-/// to two returning blocks, try to merge them together into one return,
+/// If we found a conditional branch that goes to two returning blocks,
+/// try to merge them together into one return,
 /// introducing a select if the return values disagree.
 static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
                                            IRBuilder<> &Builder) {
@@ -2008,10 +2000,9 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
   return true;
 }
 
-/// ExtractBranchMetadata - Given a conditional BranchInstruction, retrieve the
-/// probabilities of the branch taking each edge. Fills in the two APInt
-/// parameters and return true, or returns false if no or invalid metadata was
-/// found.
+/// Given a conditional BranchInstruction, retrieve the probabilities of the
+/// branch taking each edge. Fills in the two APInt parameters and returns true,
+/// or returns false if no or invalid metadata was found.
 static bool ExtractBranchMetadata(BranchInst *BI,
                                   uint64_t &ProbTrue, uint64_t &ProbFalse) {
   assert(BI->isConditional() &&
@@ -2028,9 +2019,8 @@ static bool ExtractBranchMetadata(BranchInst *BI,
   return true;
 }
 
-/// checkCSEInPredecessor - Return true if the given instruction is available
+/// Return true if the given instruction is available
 /// in its predecessor block. If yes, the instruction will be removed.
-///
 static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
   if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst))
     return false;
@@ -2046,9 +2036,9 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
   return false;
 }
 
-/// FoldBranchToCommonDest - If this basic block is simple enough, and if a
-/// predecessor branches to us and one of our successors, fold the block into
-/// the predecessor and use logical operations to pick the right destination.
+/// If this basic block is simple enough, and if a predecessor branches to us
+/// and one of our successors, fold the block into the predecessor and use
+/// logical operations to pick the right destination.
 bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
   BasicBlock *BB = BI->getParent();
 
@@ -2190,11 +2180,11 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
     }
 
     // If we have bonus instructions, clone them into the predecessor block.
-    // Note that there may be mutliple predecessor blocks, so we cannot move
+    // Note that there may be multiple predecessor blocks, so we cannot move
     // bonus instructions to a predecessor block.
     ValueToValueMapTy VMap; // maps original values to cloned values
     // We already make sure Cond is the last instruction before BI. Therefore,
-    // every instructions before Cond other than DbgInfoIntrinsic are bonus
+    // all instructions before Cond other than DbgInfoIntrinsic are bonus
     // instructions.
     for (auto BonusInst = BB->begin(); Cond != BonusInst; ++BonusInst) {
       if (isa<DbgInfoIntrinsic>(BonusInst))
@@ -2342,8 +2332,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
   return false;
 }
 
-/// SimplifyCondBranchToCondBranch - If we have a conditional branch as a
-/// predecessor of another block, this function tries to simplify it.  We know
+/// If we have a conditional branch as a predecessor of another block,
+/// this function tries to simplify it.  We know
 /// that PBI and BI are both conditional branches, and BI is in one of the
 /// successor blocks of PBI - PBI branches to BI.
 static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
@@ -2558,8 +2548,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   return true;
 }
 
-// SimplifyTerminatorOnSelect - Simplifies a terminator by replacing it with a
-// branch to TrueBB if Cond is true or to FalseBB if Cond is false.
+// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
+// true or to FalseBB if Cond is false.
 // Takes care of updating the successors and removing the old terminator.
 // Also makes sure not to introduce new successors by assuming that edges to
 // non-successor TrueBBs and FalseBBs aren't reachable.
@@ -2624,7 +2614,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
   return true;
 }
 
-// SimplifySwitchOnSelect - Replaces
+// Replaces
 //   (switch (select cond, X, Y)) on constant X, Y
 // with a branch - conditional if X and Y lead to distinct BBs,
 // unconditional otherwise.
@@ -2659,7 +2649,7 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
                                     TrueWeight, FalseWeight);
 }
 
-// SimplifyIndirectBrOnSelect - Replaces
+// Replaces
 //   (indirectbr (select cond, blockaddress(@fn, BlockA),
 //                             blockaddress(@fn, BlockB)))
 // with
@@ -2680,8 +2670,8 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
                                     0, 0);
 }
 
-/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp
-/// instruction (a seteq/setne with a constant) as the only instruction in a
+/// This is called when we find an icmp instruction
+/// (a seteq/setne with a constant) as the only instruction in a
 /// block that ends with an uncond branch.  We are looking for a very specific
 /// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified.  In
 /// this case, we merge the first two "or's of icmp" into a switch, but then the
@@ -2802,7 +2792,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
   return true;
 }
 
-/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
+/// The specified branch is a conditional branch.
 /// Check to see if it is branching on an or/and chain of icmp instructions, and
 /// fold it into a switch instruction if so.
 static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
@@ -3239,7 +3229,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
   return true;
 }
 
-/// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch
+/// Compute masked bits for the condition of a switch
 /// and use it to remove dead cases.
 static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
                                      const DataLayout &DL) {
@@ -3290,8 +3280,8 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
   return !DeadCases.empty();
 }
 
-/// FindPHIForConditionForwarding - If BB would be eligible for simplification
-/// by TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
+/// If BB would be eligible for simplification by
+/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
 /// by an unconditional branch), look at the phi node for BB in the successor
 /// block and see if the incoming value is equal to CaseValue. If so, return
 /// the phi node, and set PhiIndex to BB's index in the phi node.
@@ -3324,9 +3314,9 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
   return nullptr;
 }
 
-/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch
-/// instruction to a phi node dominated by the switch, if that would mean that
-/// some of the destination blocks of the switch can be folded away.
+/// Try to forward the condition of a switch instruction to a phi node
+/// dominated by the switch, if that would mean that some of the destination
+/// blocks of the switch can be folded away.
 /// Returns true if a change is made.
 static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
   typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
@@ -3361,7 +3351,7 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
   return Changed;
 }
 
-/// ValidLookupTableConstant - Return true if the backend will be able to handle
+/// Return true if the backend will be able to handle
 /// initializing an array of constants like C.
 static bool ValidLookupTableConstant(Constant *C) {
   if (C->isThreadDependent())
@@ -3379,7 +3369,7 @@ static bool ValidLookupTableConstant(Constant *C) {
       isa<UndefValue>(C);
 }
 
-/// LookupConstant - If V is a Constant, return it. Otherwise, try to look up
+/// If V is a Constant, return it. Otherwise, try to look up
 /// its constant value in ConstantPool, returning 0 if it's not there.
 static Constant *LookupConstant(Value *V,
                          const SmallDenseMap<Value*, Constant*>& ConstantPool) {
@@ -3388,7 +3378,7 @@ static Constant *LookupConstant(Value *V,
   return ConstantPool.lookup(V);
 }
 
-/// ConstantFold - Try to fold instruction I into a constant. This works for
+/// Try to fold instruction I into a constant. This works for
 /// simple instructions such as binary operations where both operands are
 /// constant or can be replaced by constants from the ConstantPool. Returns the
 /// resulting constant on success, 0 otherwise.
@@ -3422,7 +3412,7 @@ ConstantFold(Instruction *I, const DataLayout &DL,
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL);
 }
 
-/// GetCaseResults - Try to determine the resulting constant values in phi nodes
+/// Try to determine the resulting constant values in phi nodes
 /// at the common destination basic block, *CommonDest, for one of the case
 /// destionations CaseDest corresponding to value CaseVal (0 for the default
 /// case), of a switch instruction SI.
@@ -3501,8 +3491,8 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
   return Res.size() > 0;
 }
 
-// MapCaseToResult - Helper function used to
-// add CaseVal to the list of cases that generate Result.
+// Helper function used to add CaseVal to the list of cases that generate
+// Result.
 static void MapCaseToResult(ConstantInt *CaseVal,
     SwitchCaseResultVectorTy &UniqueResults,
     Constant *Result) {
@@ -3516,7 +3506,7 @@ static void MapCaseToResult(ConstantInt *CaseVal,
         SmallVector<ConstantInt*, 4>(1, CaseVal)));
 }
 
-// InitializeUniqueCases - Helper function that initializes a map containing
+// Helper function that initializes a map containing
 // results for the PHI node of the common destination block for a switch
 // instruction. Returns false if multiple PHI nodes have been found or if
 // there is not a common destination block for the switch.
@@ -3561,9 +3551,8 @@ static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
   return true;
 }
 
-// ConvertTwoCaseSwitch - Helper function that checks if it is possible to
-// transform a switch with only two cases (or two cases + default)
-// that produces a result into a value select.
+// Helper function that checks if it is possible to transform a switch with only
+// two cases (or two cases + default) that produces a result into a select.
 // Example:
 // switch (a) {
 //   case 10:                %0 = icmp eq i32 %a, 10
@@ -3603,9 +3592,8 @@ ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
   return nullptr;
 }
 
-// RemoveSwitchAfterSelectConversion - Helper function to cleanup a switch
-// instruction that has been converted into a select, fixing up PHI nodes and
-// basic blocks.
+// Helper function to cleanup a switch instruction that has been converted into
+// a select, fixing up PHI nodes and basic blocks.
 static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
                                               Value *SelectValue,
                                               IRBuilder<> &Builder) {
@@ -3627,7 +3615,7 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
   SI->eraseFromParent();
 }
 
-/// SwitchToSelect - If the switch is only used to initialize one or more
+/// If the switch is only used to initialize one or more
 /// phi nodes in a common successor block with only two different
 /// constant values, replace the switch with select.
 static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
@@ -3659,23 +3647,21 @@ static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
 }
 
 namespace {
-  /// SwitchLookupTable - This class represents a lookup table that can be used
-  /// to replace a switch.
+  /// This class represents a lookup table that can be used to replace a switch.
   class SwitchLookupTable {
   public:
-    /// SwitchLookupTable - Create a lookup table to use as a switch replacement
-    /// with the contents of Values, using DefaultValue to fill any holes in the
-    /// table.
+    /// Create a lookup table to use as a switch replacement with the contents
+    /// of Values, using DefaultValue to fill any holes in the table.
     SwitchLookupTable(
         Module &M, uint64_t TableSize, ConstantInt *Offset,
         const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
         Constant *DefaultValue, const DataLayout &DL);
 
-    /// BuildLookup - Build instructions with Builder to retrieve the value at
+    /// Build instructions with Builder to retrieve the value at
     /// the position given by Index in the lookup table.
     Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
 
-    /// WouldFitInRegister - Return true if a table with TableSize elements of
+    /// Return true if a table with TableSize elements of
     /// type ElementType would fit in a target-legal register.
     static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
                                    const Type *ElementType);
@@ -3717,7 +3703,7 @@ namespace {
     // For ArrayKind, this is the array.
     GlobalVariable *Array;
   };
-} // namespace
+}
 
 SwitchLookupTable::SwitchLookupTable(
     Module &M, uint64_t TableSize, ConstantInt *Offset,
@@ -3907,9 +3893,8 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
   return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
 }
 
-/// ShouldBuildLookupTable - Determine whether a lookup table should be built
-/// for this switch, based on the number of cases, size of the table and the
-/// types of the results.
+/// Determine whether a lookup table should be built for this switch, based on
+/// the number of cases, size of the table, and the types of the results.
 static bool
 ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
                        const TargetTransformInfo &TTI, const DataLayout &DL,
@@ -4033,9 +4018,9 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock,
   }
 }
 
-/// SwitchToLookupTable - If the switch is only used to initialize one or more
-/// phi nodes in a common successor block with different constant values,
-/// replace the switch with lookup tables.
+/// If the switch is only used to initialize one or more phi nodes in a common
+/// successor block with different constant values, replace the switch with
+/// lookup tables.
 static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
                                 const DataLayout &DL,
                                 const TargetTransformInfo &TTI) {
@@ -4691,8 +4676,8 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
   return Changed;
 }
 
-/// SimplifyCFG - This function is used to do simplification of a CFG.  For
-/// example, it adjusts branches to branches to eliminate the extra hop, it
+/// This function is used to do simplification of a CFG.
+/// For example, it adjusts branches to branches to eliminate the extra hop,
 /// eliminates unreachable basic blocks, and does other "peephole" optimization
 /// of the CFG.  It returns true if a modification was made.
 ///
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 68986ac..ab30aa1 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -77,7 +77,7 @@ namespace {
     Instruction *splitOverflowIntrinsic(Instruction *IVUser,
                                         const DominatorTree *DT);
   };
-} // namespace
+}
 
 /// Fold an IV operand into its use.  This removes increments of an
 /// aligned IV when used by a instruction that ignores the low bits.
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 0a583a5..c499c87 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -100,7 +100,7 @@ namespace {
       return Changed;
     }
   };
-} // namespace
+}
 
 char InstSimplifier::ID = 0;
 INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index 4cc278f..a2a54da 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -538,7 +538,7 @@ void RewriteSymbols::loadAndParseMapFiles() {
   for (const auto &MapFile : MapFiles)
     parser.parse(MapFile, &Descriptors);
 }
-} // namespace
+}
 
 INITIALIZE_PASS(RewriteSymbols, "rewrite-symbols", "Rewrite Symbols", false,
                 false)
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index fd7661f..215d6f9 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -3192,7 +3192,7 @@ namespace {
 
     DEBUG(dbgs() << "BBV: final: \n" << BB << "\n");
   }
-} // namespace
+}
 
 char BBVectorize::ID = 0;
 static const char bb_vectorize_name[] = "Basic-Block Vectorization";
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index b7faa20..5ba1417 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -96,7 +96,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/VectorUtils.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include <algorithm>
 #include <map>
@@ -850,6 +850,8 @@ public:
         return B.CreateAdd(StartValue, Index);
 
       case IK_PtrInduction:
+        assert(Index->getType() == StepValue->getType() &&
+               "Index type does not match StepValue type");
         if (StepValue->isMinusOne())
           Index = B.CreateNeg(Index);
         else if (!StepValue->isOne())
@@ -2413,9 +2415,8 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
          LoopVectorBody.push_back(NewIfBlock);
          VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
          Builder.SetInsertPoint(InsertPt);
-         Instruction *OldBr = IfBlock->getTerminator();
-         BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
-         OldBr->eraseFromParent();
+         ReplaceInstWithInst(IfBlock->getTerminator(),
+                             BranchInst::Create(CondBlock, NewIfBlock, Cmp));
          IfBlock = NewIfBlock;
       }
     }
@@ -2658,9 +2659,9 @@ void InnerLoopVectorizer::createEmptyLoop() {
     if (ParentLoop)
       ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
     LoopBypassBlocks.push_back(CheckBlock);
-    Instruction *OldTerm = LastBypassBlock->getTerminator();
-    BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow, OldTerm);
-    OldTerm->eraseFromParent();
+    ReplaceInstWithInst(
+        LastBypassBlock->getTerminator(),
+        BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow));
     LastBypassBlock = CheckBlock;
   }
 
@@ -2682,9 +2683,8 @@ void InnerLoopVectorizer::createEmptyLoop() {
 
     // Replace the branch into the memory check block with a conditional branch
     // for the "few elements case".
-    Instruction *OldTerm = LastBypassBlock->getTerminator();
-    BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
-    OldTerm->eraseFromParent();
+    ReplaceInstWithInst(LastBypassBlock->getTerminator(),
+                        BranchInst::Create(MiddleBlock, CheckBlock, Cmp));
 
     Cmp = StrideCheck;
     LastBypassBlock = CheckBlock;
@@ -2707,17 +2707,15 @@ void InnerLoopVectorizer::createEmptyLoop() {
 
     // Replace the branch into the memory check block with a conditional branch
     // for the "few elements case".
-    Instruction *OldTerm = LastBypassBlock->getTerminator();
-    BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
-    OldTerm->eraseFromParent();
+    ReplaceInstWithInst(LastBypassBlock->getTerminator(),
+                        BranchInst::Create(MiddleBlock, CheckBlock, Cmp));
 
     Cmp = MemRuntimeCheck;
     LastBypassBlock = CheckBlock;
   }
 
-  LastBypassBlock->getTerminator()->eraseFromParent();
-  BranchInst::Create(MiddleBlock, VectorPH, Cmp,
-                     LastBypassBlock);
+  ReplaceInstWithInst(LastBypassBlock->getTerminator(),
+                      BranchInst::Create(MiddleBlock, VectorPH, Cmp));
 
   // We are going to resume the execution of the scalar loop.
   // Go over all of the induction variables that we found and fix the
@@ -2798,7 +2796,10 @@ void InnerLoopVectorizer::createEmptyLoop() {
       break;
     }
     case LoopVectorizationLegality::IK_PtrInduction: {
-      EndValue = II.transform(BypassBuilder, CountRoundDown);
+      Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
+                                                   II.StepValue->getType(),
+                                                   "cast.crd");
+      EndValue = II.transform(BypassBuilder, CRD);
       EndValue->setName("ptr.ind.end");
       break;
     }
@@ -2851,10 +2852,8 @@ void InnerLoopVectorizer::createEmptyLoop() {
   Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd,
                                 ResumeIndex, "cmp.n",
                                 MiddleBlock->getTerminator());
-
-  BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator());
-  // Remove the old terminator.
-  MiddleBlock->getTerminator()->eraseFromParent();
+  ReplaceInstWithInst(MiddleBlock->getTerminator(),
+                      BranchInst::Create(ExitBlock, ScalarPH, CmpN));
 
   // Create i+1 and fill the PHINode.
   Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next");
@@ -2906,7 +2905,7 @@ struct CSEDenseMapInfo {
     return LHS->isIdenticalTo(RHS);
   }
 };
-} // namespace
+}
 
 /// \brief Check whether this block is a predicated block.
 /// Due to if predication of stores we might create a sequence of "if(pred) a[i]
@@ -3448,12 +3447,14 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
       // This is the normalized GEP that starts counting at zero.
       Value *NormalizedIdx =
           Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx");
+      NormalizedIdx =
+          Builder.CreateSExtOrTrunc(NormalizedIdx, II.StepValue->getType());
       // This is the vector of results. Notice that we don't generate
       // vector geps because scalar geps result in better code.
       for (unsigned part = 0; part < UF; ++part) {
         if (VF == 1) {
           int EltIndex = part;
-          Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+          Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
           Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
           Value *SclrGep = II.transform(Builder, GlobalIdx);
           SclrGep->setName("next.gep");
@@ -3464,7 +3465,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
         Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
         for (unsigned int i = 0; i < VF; ++i) {
           int EltIndex = i + part * VF;
-          Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+          Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
           Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
           Value *SclrGep = II.transform(Builder, GlobalIdx);
           SclrGep->setName("next.gep");
@@ -4642,10 +4643,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
 
     if (VF == 0)
       VF = MaxVectorSize;
-
-    // If the trip count that we found modulo the vectorization factor is not
-    // zero then we require a tail.
-    if (VF < 2) {
+    else {
+      // If the trip count that we found modulo the vectorization factor is not
+      // zero then we require a tail.
       emitAnalysis(VectorizationReport() <<
                    "cannot optimize for size and vectorize at the "
                    "same time. Enable vectorization of this loop "
@@ -5507,9 +5507,8 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
         LoopVectorBody.push_back(NewIfBlock);
         VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
         Builder.SetInsertPoint(InsertPt);
-        Instruction *OldBr = IfBlock->getTerminator();
-        BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
-        OldBr->eraseFromParent();
+        ReplaceInstWithInst(IfBlock->getTerminator(),
+                            BranchInst::Create(CondBlock, NewIfBlock, Cmp));
         IfBlock = NewIfBlock;
       }
   }
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 370e295..7c4c279 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -43,7 +43,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/VectorUtils.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include <algorithm>
 #include <map>
 #include <memory>
diff --git a/test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll b/test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll
new file mode 100644
index 0000000..01782e0
--- /dev/null
+++ b/test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll
@@ -0,0 +1,26 @@
+; RUN: opt -S -disable-output -passes=print-cg < %s 2>&1 | FileCheck %s
+
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+
+define private void @f() {
+  ret void
+}
+
+define void @calls_statepoint(i8 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK: Call edges in function: calls_statepoint
+; CHECK-NEXT:  -> f
+entry:
+  %cast = bitcast i8 addrspace(1)* %arg to i64 addrspace(1)*
+  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+  ret void
+}
+
+define void @calls_patchpoint() {
+; CHECK:  Call edges in function: calls_patchpoint
+; CHECK-NEXT:    -> f
+entry:
+  %c = bitcast void()* @f to i8*
+  tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 15, i8* %c, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1)
+  ret void
+}
diff --git a/test/Analysis/LoopAccessAnalysis/non-wrapping-pointer.ll b/test/Analysis/LoopAccessAnalysis/non-wrapping-pointer.ll
new file mode 100644
index 0000000..0de1cd1
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/non-wrapping-pointer.ll
@@ -0,0 +1,41 @@
+; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s
+
+; For this loop:
+;   for (int i = 0; i < n; i++)
+;    A[2 * i] = A[2 * i] + B[i];
+;
+; , SCEV is unable to prove that A[2 * i] does not overflow.  However,
+; analyzing the IR helps us to conclude it and in turn allow dependence
+; analysis.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK: Memory dependences are safe{{$}}
+
+define void @f(i16* noalias %a,
+               i16* noalias %b, i64 %N) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+
+  %mul = mul nuw nsw i64 %ind, 2
+
+  %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %mul
+  %loadA = load i16, i16* %arrayidxA, align 2
+
+  %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
+  %loadB = load i16, i16* %arrayidxB, align 2
+
+  %add = mul i16 %loadA, %loadB
+
+  store i16 %add, i16* %arrayidxA, align 2
+
+  %inc = add nuw nsw i64 %ind, 1
+  %exitcond = icmp eq i64 %inc, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Assembler/dimodule.ll b/test/Assembler/dimodule.ll
new file mode 100644
index 0000000..994bc12
--- /dev/null
+++ b/test/Assembler/dimodule.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
+
+; CHECK: !named = !{!0, !1, !2, !1}
+!named = !{!0, !1, !2, !3}
+
+!0 = distinct !{}
+
+; CHECK: !1 = !DIModule(scope: !0, name: "Module")
+!1 = !DIModule(scope: !0, name: "Module")
+
+; CHECK: !2 = !DIModule(scope: !0, name: "Module", configMacros: "-DNDEBUG", includePath: "/usr/include", isysroot: "/")
+!2 = !DIModule(scope: !0, name: "Module", configMacros: "-DNDEBUG", includePath: "/usr/include", isysroot: "/")
+
+!3 = !DIModule(scope: !0, name: "Module", configMacros: "")
diff --git a/test/Bindings/llvm-c/ARM/disassemble.test b/test/Bindings/llvm-c/ARM/disassemble.test
new file mode 100644
index 0000000..ffa7ebf
--- /dev/null
+++ b/test/Bindings/llvm-c/ARM/disassemble.test
@@ -0,0 +1,21 @@
+; RUN: llvm-c-test --disassemble < %s | FileCheck %s
+
+armv8-linux-gnu     +crypto 02 00 81 e0 02 03 b0 f3
+;CHECK: triple: armv8-linux-gnu, features: +crypto
+;CHECK: 02 00 81 e0                  add r0, r1, r2
+;CHECK: 02 03 b0 f3                  aese.8 q0, q1
+
+armv8-linux-gnu     -crypto 02 00 81 e0 02 03 b0 f3
+;CHECK: triple: armv8-linux-gnu, features: -crypto
+;CHECK: 02 00 81 e0                  add r0, r1, r2
+;CHECK: 02                           ???
+;CHECK: 03                           ???
+;CHECK: b0                           ???
+;CHECK: f3                           ???
+
+arm-linux-android     NULL  44 26 1f e5 0c 10 4b e2 02 20 81 e0
+;CHECK: triple: arm-linux-android, features: NULL
+;CHECK: ldr	r2, [pc, #-1604]
+;CHECK: sub	r1, r11, #12
+;CHECK: 02 20 81 e0
+;CHECK: add	r2, r1, r2
diff --git a/test/Bindings/llvm-c/ARM/lit.local.cfg b/test/Bindings/llvm-c/ARM/lit.local.cfg
new file mode 100644
index 0000000..7c23e4f
--- /dev/null
+++ b/test/Bindings/llvm-c/ARM/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "ARM" in config.root.targets:
+    config.unsupported = True
diff --git a/test/Bindings/llvm-c/X86/disassemble.test b/test/Bindings/llvm-c/X86/disassemble.test
new file mode 100644
index 0000000..465b370
--- /dev/null
+++ b/test/Bindings/llvm-c/X86/disassemble.test
@@ -0,0 +1,23 @@
+; RUN: llvm-c-test --disassemble < %s | FileCheck %s
+
+x86_64-linux-unknown  NULL  48 83 c4 38 5b 5d 41 5c 41 5d 41 5e 41 5f c3
+;CHECK: triple: x86_64-linux-unknown, features: NULL
+;CHECK: addq	$56, %rsp
+;CHECK: popq	%rbx
+;CHECK: popq	%rbp
+;CHECK: popq	%r12
+;CHECK: popq	%r13
+;CHECK: popq	%r14
+;CHECK: popq	%r15
+;CHECK: ret
+
+i686-apple-darwin     NULL  0f b7 4c 24 0a e8 29 ce ff ff
+;CHECK: triple: i686-apple-darwin, features: NULL
+;CHECK: movzwl	10(%esp), %ecx
+;CHECK: calll	-12759
+
+i686-linux-unknown    NULL  dd 44 24 04 d9 e1 c3
+;CHECK: triple: i686-linux-unknown, features: NULL
+;CHECK: fldl	4(%esp)
+;CHECK: fabs
+;CHECK: ret
diff --git a/test/Bindings/llvm-c/X86/lit.local.cfg b/test/Bindings/llvm-c/X86/lit.local.cfg
new file mode 100644
index 0000000..42bf50d
--- /dev/null
+++ b/test/Bindings/llvm-c/X86/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "X86" in config.root.targets:
+    config.unsupported = True
diff --git a/test/Bindings/llvm-c/disassemble.test b/test/Bindings/llvm-c/disassemble.test
deleted file mode 100644
index bb7a9a0..0000000
--- a/test/Bindings/llvm-c/disassemble.test
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: llvm-c-test --disassemble < %s | FileCheck %s
-
-armv8-linux-gnu     +crypto 02 00 81 e0 02 03 b0 f3
-;CHECK: triple: armv8-linux-gnu, features: +crypto
-;CHECK: 02 00 81 e0                  add r0, r1, r2
-;CHECK: 02 03 b0 f3                  aese.8 q0, q1
-
-armv8-linux-gnu     -crypto 02 00 81 e0 02 03 b0 f3
-;CHECK: triple: armv8-linux-gnu, features: -crypto
-;CHECK: 02 00 81 e0                  add r0, r1, r2
-;CHECK: 02                           ???
-;CHECK: 03                           ???
-;CHECK: b0                           ???
-;CHECK: f3                           ???
-
-arm-linux-android     NULL  44 26 1f e5 0c 10 4b e2 02 20 81 e0
-;CHECK: triple: arm-linux-android, features: NULL
-;CHECK: ldr	r2, [pc, #-1604]
-;CHECK: sub	r1, r11, #12
-;CHECK: 02 20 81 e0
-;CHECK: add	r2, r1, r2
-
-x86_64-linux-unknown  NULL  48 83 c4 38 5b 5d 41 5c 41 5d 41 5e 41 5f c3
-;CHECK: triple: x86_64-linux-unknown, features: NULL
-;CHECK: addq	$56, %rsp
-;CHECK: popq	%rbx
-;CHECK: popq	%rbp
-;CHECK: popq	%r12
-;CHECK: popq	%r13
-;CHECK: popq	%r14
-;CHECK: popq	%r15
-;CHECK: ret
-
-i686-apple-darwin     NULL  0f b7 4c 24 0a e8 29 ce ff ff
-;CHECK: triple: i686-apple-darwin, features: NULL
-;CHECK: movzwl	10(%esp), %ecx
-;CHECK: calll	-12759
-
-i686-linux-unknown    NULL  dd 44 24 04 d9 e1 c3
-;CHECK: triple: i686-linux-unknown, features: NULL
-;CHECK: fldl	4(%esp)
-;CHECK: fabs
-;CHECK: ret
diff --git a/test/Bindings/llvm-c/lit.local.cfg b/test/Bindings/llvm-c/lit.local.cfg
deleted file mode 100644
index 75b22c0..0000000
--- a/test/Bindings/llvm-c/lit.local.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-if not "X86" in config.root.targets:
-    config.unsupported = True
-if not "ARM" in config.root.targets:
-    config.unsupported = True
diff --git a/test/Bitcode/Inputs/PR23310.bc b/test/Bitcode/Inputs/PR23310.bc
new file mode 100644
index 0000000..cd1202f
Binary files /dev/null and b/test/Bitcode/Inputs/PR23310.bc differ
diff --git a/test/Bitcode/PR23310.test b/test/Bitcode/PR23310.test
new file mode 100644
index 0000000..6b79471
--- /dev/null
+++ b/test/Bitcode/PR23310.test
@@ -0,0 +1 @@
+RUN: llvm-dis -disable-output %p/Inputs/PR23310.bc
diff --git a/test/CodeGen/AArch64/aarch-multipart.ll b/test/CodeGen/AArch64/aarch-multipart.ll
new file mode 100644
index 0000000..fd42d6e
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch-multipart.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -o - | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-os"
+
+declare <4 x double> @user_func(<4 x double>) #1
+
+; Make sure we are not crashing on this code.
+; CHECK-LABEL: caller_function
+; CHECK: ret
+define void @caller_function(<4 x double>, <4 x double>, <4 x double>, <4 x double>, <4 x double>) #1 {
+entry:
+  %r = call <4 x double> @user_func(<4 x double> %4)
+  ret void
+}
+
+attributes #1 = { nounwind readnone }
+
diff --git a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
new file mode 100644
index 0000000..ea3b8fa
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
@@ -0,0 +1,197 @@
+; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic -lower-interleaved-accesses=true < %s | FileCheck %s
+
+; CHECK-LABEL: load_factor2:
+; CHECK: ld2 { v0.8b, v1.8b }, [x0]
+define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
+  %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
+  %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %add = add nsw <8 x i8> %strided.v0, %strided.v1
+  ret <8 x i8> %add
+}
+
+; CHECK-LABEL: load_factor3:
+; CHECK: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
+define <4 x i32> @load_factor3(i32* %ptr) {
+  %base = bitcast i32* %ptr to <12 x i32>*
+  %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
+  %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+  %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %add = add nsw <4 x i32> %strided.v2, %strided.v1
+  ret <4 x i32> %add
+}
+
+; CHECK-LABEL: load_factor4:
+; CHECK: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+define <4 x i32> @load_factor4(i32* %ptr) {
+  %base = bitcast i32* %ptr to <16 x i32>*
+  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
+  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+  %add = add nsw <4 x i32> %strided.v0, %strided.v2
+  ret <4 x i32> %add
+}
+
+; CHECK-LABEL: store_factor2:
+; CHECK: st2 { v0.8b, v1.8b }, [x0]
+define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
+  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_factor3:
+; CHECK: st3 { v0.4s, v1.4s, v2.4s }, [x0]
+define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+  %base = bitcast i32* %ptr to <12 x i32>*
+  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_factor4:
+; CHECK: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+  %base = bitcast i32* %ptr to <16 x i32>*
+  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
+  ret void
+}
+
+; The following cases test that interleaved access of pointer vectors can be
+; matched to ldN/stN instruction.
+
+; CHECK-LABEL: load_ptrvec_factor2:
+; CHECK: ld2 { v0.2d, v1.2d }, [x0]
+define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
+  %base = bitcast i32** %ptr to <4 x i32*>*
+  %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
+  %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32*> %strided.v0
+}
+
+; CHECK-LABEL: load_ptrvec_factor3:
+; CHECK: ld3 { v0.2d, v1.2d, v2.2d }, [x0]
+define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
+  %base = bitcast i32** %ptr to <6 x i32*>*
+  %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
+  %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
+  store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1
+  %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
+  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2
+  ret void
+}
+
+; CHECK-LABEL: load_ptrvec_factor4:
+; CHECK: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
+  %base = bitcast i32** %ptr to <8 x i32*>*
+  %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
+  %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
+  %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
+  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1
+  store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2
+  ret void
+}
+
+; CHECK-LABEL: store_ptrvec_factor2:
+; CHECK: st2 { v0.2d, v1.2d }, [x0]
+define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
+  %base = bitcast i32** %ptr to <4 x i32*>*
+  %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_ptrvec_factor3:
+; CHECK: st3 { v0.2d, v1.2d, v2.2d }, [x0]
+define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
+  %base = bitcast i32** %ptr to <6 x i32*>*
+  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+  store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_ptrvec_factor4:
+; CHECK: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
+  %base = bitcast i32* %ptr to <8 x i32*>*
+  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+  store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4
+  ret void
+}
+
+; Following cases check that shuffle maskes with undef indices can be matched
+; into ldN/stN instruction.
+
+; CHECK-LABEL: load_undef_mask_factor2:
+; CHECK: ld2 { v0.4s, v1.4s }, [x0]
+define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
+  %base = bitcast i32* %ptr to <8 x i32>*
+  %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
+  %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
+  %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
+  %add = add nsw <4 x i32> %strided.v0, %strided.v1
+  ret <4 x i32> %add
+}
+
+; CHECK-LABEL: load_undef_mask_factor3:
+; CHECK: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
+define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
+  %base = bitcast i32* %ptr to <12 x i32>*
+  %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
+  %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
+  %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %add = add nsw <4 x i32> %strided.v2, %strided.v1
+  ret <4 x i32> %add
+}
+
+; CHECK-LABEL: load_undef_mask_factor4:
+; CHECK: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
+  %base = bitcast i32* %ptr to <16 x i32>*
+  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
+  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
+  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
+  %add = add nsw <4 x i32> %strided.v0, %strided.v2
+  ret <4 x i32> %add
+}
+
+; CHECK-LABEL: store_undef_mask_factor2:
+; CHECK: st2 { v0.4s, v1.4s }, [x0]
+define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
+  %base = bitcast i32* %ptr to <8 x i32>*
+  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_undef_mask_factor3:
+; CHECK: st3 { v0.4s, v1.4s, v2.4s }, [x0]
+define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+  %base = bitcast i32* %ptr to <12 x i32>*
+  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_undef_mask_factor4:
+; CHECK: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+  %base = bitcast i32* %ptr to <16 x i32>*
+  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/commute-shifts.ll b/test/CodeGen/AMDGPU/commute-shifts.ll
new file mode 100644
index 0000000..f88cf64
--- /dev/null
+++ b/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+; GCN-LABEL: {{^}}main:
+; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
+
+define void @main() #0 {
+main_body:
+  %0 = fptosi float undef to i32
+  %1 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> undef, <32 x i8> undef, i32 2)
+  %2 = extractelement <4 x i32> %1, i32 0
+  %3 = and i32 %0, 7
+  %4 = shl i32 1, %3
+  %5 = and i32 %2, %4
+  %6 = icmp eq i32 %5, 0
+  %.10 = select i1 %6, float 0.000000e+00, float undef
+  %7 = call i32 @llvm.SI.packf16(float undef, float %.10)
+  %8 = bitcast i32 %7 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %8, float undef, float %8)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/elf.ll b/test/CodeGen/AMDGPU/elf.ll
index d0fd06a..90af678 100644
--- a/test/CodeGen/AMDGPU/elf.ll
+++ b/test/CodeGen/AMDGPU/elf.ll
@@ -1,14 +1,16 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s
 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s
 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TONGA %s
-; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
+; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s
 ; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
 
 ; Test that we don't try to produce a COFF file on windows
-; RUN: llc < %s -mtriple=amdgcn-pc-mingw -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
+; RUN: llc < %s -mtriple=amdgcn-pc-mingw -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s
 
-; ELF: Format: ELF32
+; ELF: Format: ELF64
+; ELF: OS/ABI: AMDGPU_HSA (0x40)
+; ELF: Machine: EM_AMDGPU (0xE0)
 ; ELF: Name: .AMDGPU.config
 ; ELF: Type: SHT_PROGBITS
 
diff --git a/test/CodeGen/AMDGPU/hsa.ll b/test/CodeGen/AMDGPU/hsa.ll
index f911339..653a6bb 100644
--- a/test/CodeGen/AMDGPU/hsa.ll
+++ b/test/CodeGen/AMDGPU/hsa.ll
@@ -1,10 +1,31 @@
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA-CI --check-prefix=HSA %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA-VI --check-prefix=HSA %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj  | llvm-readobj -s -sd | FileCheck --check-prefix=ELF %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -s -sd | FileCheck %s --check-prefix=ELF
+
+; The SHT_NOTE section contains the output from the .hsa_code_object_*
+; directives.
+
+; ELF: SHT_NOTE
+; ELF: 0000: 04000000 08000000 01000000 414D4400
+; ELF: 0010: 01000000 00000000 04000000 1B000000
+; ELF: 0020: 03000000 414D4400 04000700 07000000
+; ELF: 0030: 00000000 00000000 414D4400 414D4447
+; ELF: 0040: 50550000
+
+; HSA: .hsa_code_object_version 1,0
+; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
+; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
 
-; HSA: .section        .hsa.version
-; HSA-NEXT: .ascii  "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0"
 ; HSA: {{^}}simple:
+; HSA: .amd_kernel_code_t
+; HSA: .end_amd_kernel_code_t
+; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[0:1], 0x0
+
 ; Make sure we are setting the ATC bit:
-; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
+; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000
+; On VI+ we also need to set MTYPE = 2
+; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000
 ; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
 
 define void @simple(i32 addrspace(1)* %out) {
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
index bcb7f87..f948c98 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=SI %s
+; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=VI %s
 
 ; FIXME: Enable for VI.
 
diff --git a/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
new file mode 100644
index 0000000..ac9bedb
--- /dev/null
+++ b/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=amdgcn -mcpu=SI -o - %s | FileCheck %s
+; Don't crash when the use of an undefined value is only detected by the
+; register coalescer because it is hidden with subregister insert/extract.
+target triple="amdgcn--"
+
+; CHECK-LABEL: foobar:
+; CHECK: s_load_dword s2, s[0:1], 0x9
+; CHECK-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, s2
+; CHECK-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
+; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
+; BB0_1:
+; CHECK: s_load_dword s6, s[0:1], 0xa
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v0, s6
+; BB0_2:
+; CHECK: s_or_b64 exec, exec, s[2:3]
+; CHECK-NEXT: s_mov_b32 s7, 0xf000
+; CHECK-NEXT: s_mov_b32 s6, -1
+; CHECK-NEXT: buffer_store_dword v1, s[4:7], 0
+; CHECK-NEXT: s_endpgm
+define void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind {
+entry:
+  %v0 = insertelement <4 x float> undef, float %a0, i32 0
+  br i1 undef, label %ift, label %ife
+
+ift:
+  %v1 = insertelement <4 x float> undef, float %a1, i32 0
+  br label %ife
+
+ife:
+  %val = phi <4 x float> [ %v1, %ift ], [ %v0, %entry ]
+  %v2 = extractelement <4 x float> %val, i32 1
+  store float %v2, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/arm-interleaved-accesses.ll b/test/CodeGen/ARM/arm-interleaved-accesses.ll
new file mode 100644
index 0000000..9a9885c
--- /dev/null
+++ b/test/CodeGen/ARM/arm-interleaved-accesses.ll
@@ -0,0 +1,204 @@
+; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s
+
+; CHECK-LABEL: load_factor2:
+; CHECK: vld2.8 {d16, d17}, [r0]
+define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
+  %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
+  %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %add = add nsw <8 x i8> %strided.v0, %strided.v1
+  ret <8 x i8> %add
+}
+
+; CHECK-LABEL: load_factor3:
+; CHECK: vld3.32 {d16, d17, d18}, [r0]
+define <2 x i32> @load_factor3(i32* %ptr) {
+  %base = bitcast i32* %ptr to <6 x i32>*
+  %wide.vec = load <6 x i32>, <6 x i32>* %base, align 4
+  %strided.v2 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
+  %strided.v1 = shufflevector <6 x i32> %wide.vec, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
+  %add = add nsw <2 x i32> %strided.v2, %strided.v1
+  ret <2 x i32> %add
+}
+
+; CHECK-LABEL: load_factor4:
+; CHECK: vld4.32 {d16, d18, d20, d22}, [r0]!
+; CHECK: vld4.32 {d17, d19, d21, d23}, [r0]
+define <4 x i32> @load_factor4(i32* %ptr) {
+  %base = bitcast i32* %ptr to <16 x i32>*
+  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
+  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+  %add = add nsw <4 x i32> %strided.v0, %strided.v2
+  ret <4 x i32> %add
+}
+
+; CHECK-LABEL: store_factor2:
+; CHECK: vst2.8 {d16, d17}, [r0]
+define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
+  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_factor3:
+; CHECK: vst3.32 {d16, d18, d20}, [r0]!
+; CHECK: vst3.32 {d17, d19, d21}, [r0]
+define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+  %base = bitcast i32* %ptr to <12 x i32>*
+  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_factor4:
+; CHECK: vst4.32 {d16, d18, d20, d22}, [r0]!
+; CHECK: vst4.32 {d17, d19, d21, d23}, [r0]
+define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+  %base = bitcast i32* %ptr to <16 x i32>*
+  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
+  ret void
+}
+
+; The following cases test that interleaved access of pointer vectors can be
+; matched to ldN/stN instruction.
+
+; CHECK-LABEL: load_ptrvec_factor2:
+; CHECK: vld2.32 {d16, d17}, [r0]
+define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
+  %base = bitcast i32** %ptr to <4 x i32*>*
+  %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
+  %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
+  ret <2 x i32*> %strided.v0
+}
+
+; CHECK-LABEL: load_ptrvec_factor3:
+; CHECK: vld3.32 {d16, d17, d18}, [r0]
+define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
+  %base = bitcast i32** %ptr to <6 x i32*>*
+  %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
+  %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
+  store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1
+  %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
+  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2
+  ret void
+}
+
+; CHECK-LABEL: load_ptrvec_factor4:
+; CHECK: vld4.32 {d16, d17, d18, d19}, [r0]
+define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
+  %base = bitcast i32** %ptr to <8 x i32*>*
+  %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
+  %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
+  %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
+  store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1
+  store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2
+  ret void
+}
+
+; CHECK-LABEL: store_ptrvec_factor2:
+; CHECK: vst2.32 {d16, d17}, [r0]
+define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
+  %base = bitcast i32** %ptr to <4 x i32*>*
+  %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_ptrvec_factor3:
+; CHECK: vst3.32 {d16, d17, d18}, [r0]
+define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
+  %base = bitcast i32** %ptr to <6 x i32*>*
+  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+  store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_ptrvec_factor4:
+; CHECK: vst4.32 {d16, d17, d18, d19}, [r0]
+define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
+  %base = bitcast i32* %ptr to <8 x i32*>*
+  %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
+  store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4
+  ret void
+}
+
+; Following cases check that shuffle maskes with undef indices can be matched
+; into ldN/stN instruction.
+
+; CHECK-LABEL: load_undef_mask_factor2:
+; CHECK: vld2.32 {d16, d17, d18, d19}, [r0]
+define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
+  %base = bitcast i32* %ptr to <8 x i32>*
+  %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
+  %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
+  %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
+  %add = add nsw <4 x i32> %strided.v0, %strided.v1
+  ret <4 x i32> %add
+}
+
+; CHECK-LABEL: load_undef_mask_factor3:
+; CHECK: vld3.32 {d16, d18, d20}, [r0]!
+; CHECK: vld3.32 {d17, d19, d21}, [r0]
+define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
+  %base = bitcast i32* %ptr to <12 x i32>*
+  %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
+  %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
+  %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %add = add nsw <4 x i32> %strided.v2, %strided.v1
+  ret <4 x i32> %add
+}
+
+; CHECK-LABEL: load_undef_mask_factor4:
+; CHECK: vld4.32 {d16, d18, d20, d22}, [r0]!
+; CHECK: vld4.32 {d17, d19, d21, d23}, [r0]
+define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
+  %base = bitcast i32* %ptr to <16 x i32>*
+  %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
+  %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
+  %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
+  %add = add nsw <4 x i32> %strided.v0, %strided.v2
+  ret <4 x i32> %add
+}
+
+; CHECK-LABEL: store_undef_mask_factor2:
+; CHECK: vst2.32 {d16, d17, d18, d19}, [r0]
+define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
+  %base = bitcast i32* %ptr to <8 x i32>*
+  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
+  store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_undef_mask_factor3:
+; CHECK: vst3.32 {d16, d18, d20}, [r0]!
+; CHECK: vst3.32 {d17, d19, d21}, [r0]
+define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+  %base = bitcast i32* %ptr to <12 x i32>*
+  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
+  ret void
+}
+
+; CHECK-LABEL: store_undef_mask_factor4:
+; CHECK: vst4.32 {d16, d18, d20, d22}, [r0]!
+; CHECK: vst4.32 {d17, d19, d21, d23}, [r0]
+define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+  %base = bitcast i32* %ptr to <16 x i32>*
+  %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll
index 0cc4f23..29c7023 100644
--- a/test/CodeGen/ARM/build-attributes.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -51,6 +51,13 @@
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-FAST
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A17-NOFPU
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-NOFPU-FAST
+
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3-FP16
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+d16,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3-D16-FP16
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp-only-sp,+d16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp-only-sp,+d16,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD-FP16
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=+neon,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-NEON-FP16
+
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0
 ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0-FAST
@@ -1049,7 +1056,7 @@
 ; CORTEX-R4F:  .eabi_attribute 23, 3
 ; CORTEX-R4F:  .eabi_attribute 24, 1
 ; CORTEX-R4F:  .eabi_attribute 25, 1
-; CORTEX-R4F:  .eabi_attribute 27, 1
+; CORTEX-R4F-NOT:  .eabi_attribute 27, 1
 ; CORTEX-R4F-NOT:  .eabi_attribute 28
 ; CORTEX-R4F-NOT:  .eabi_attribute 36
 ; CORTEX-R4F:  .eabi_attribute 38, 1
@@ -1071,7 +1078,7 @@
 ; CORTEX-R5:  .eabi_attribute 23, 3
 ; CORTEX-R5:  .eabi_attribute 24, 1
 ; CORTEX-R5:  .eabi_attribute 25, 1
-; CORTEX-R5:  .eabi_attribute 27, 1
+; CORTEX-R5-NOT:  .eabi_attribute 27, 1
 ; CORTEX-R5-NOT:  .eabi_attribute 28
 ; CORTEX-R5-NOT:  .eabi_attribute 36
 ; CORTEX-R5:  .eabi_attribute 38, 1
@@ -1091,7 +1098,7 @@
 ; CORTEX-R7:  .eabi_attribute 7, 82
 ; CORTEX-R7:  .eabi_attribute 8, 1
 ; CORTEX-R7:  .eabi_attribute 9, 2
-; CORTEX-R7:  .fpu vfpv3-d16
+; CORTEX-R7:  .fpu vfpv3xd
 ; CORTEX-R7-NOT:   .eabi_attribute 19
 ;; We default to IEEE 754 compliance
 ; CORTEX-R7:  .eabi_attribute 20, 1
@@ -1205,6 +1212,12 @@
 ; CORTEX-A72-FAST-NOT:  .eabi_attribute 22
 ; CORTEX-A72-FAST:  .eabi_attribute 23, 1
 
+; GENERIC-FPU-VFPV3-FP16: .fpu vfpv3-fp16
+; GENERIC-FPU-VFPV3-D16-FP16: .fpu vfpv3-d16-fp16
+; GENERIC-FPU-VFPV3XD: .fpu vfpv3xd
+; GENERIC-FPU-VFPV3XD-FP16: .fpu vfpv3xd-fp16
+; GENERIC-FPU-NEON-FP16: .fpu neon-fp16
+
 ; GENERIC-ARMV8_1-A:  .eabi_attribute 6, 14
 ; GENERIC-ARMV8_1-A:  .eabi_attribute 7, 65
 ; GENERIC-ARMV8_1-A:  .eabi_attribute 8, 1
diff --git a/test/CodeGen/ARM/fnattr-trap.ll b/test/CodeGen/ARM/fnattr-trap.ll
new file mode 100644
index 0000000..492e31b
--- /dev/null
+++ b/test/CodeGen/ARM/fnattr-trap.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=arm-unknown-unknown | FileCheck %s -check-prefix=NOOPTION
+; RUN: llc < %s -mtriple=arm-unknown-unknown -trap-func=trap_llc | FileCheck %s -check-prefix=TRAP
+
+; NOOPTION-LABEL: {{\_?}}foo0:
+; NOOPTION: trap{{$}}
+
+; TRAP-LABEL: {{\_?}}foo0:
+; TRAP: bl {{\_?}}trap_llc
+
+define void @foo0() {
+  call void @llvm.trap()
+  unreachable
+}
+
+; NOOPTION-LABEL: {{\_?}}foo1:
+; NOOPTION: bl {{\_?}}trap_func_attr0
+
+; TRAP-LABEL: {{\_?}}foo1:
+; TRAP: bl {{\_?}}trap_llc
+
+define void @foo1() {
+  call void @llvm.trap() #0
+  unreachable
+}
+
+; NOOPTION-LABEL: {{\_?}}foo2:
+; NOOPTION: bl {{\_?}}trap_func_attr1
+
+; TRAP-LABEL: {{\_?}}foo2:
+; TRAP: bl {{\_?}}trap_llc
+
+define void @foo2() {
+  call void @llvm.trap() #1
+  unreachable
+}
+
+declare void @llvm.trap() nounwind
+
+attributes #0 = { "trap-func-name"="trap_func_attr0" }
+attributes #1 = { "trap-func-name"="trap_func_attr1" }
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index a8070ea..f3e1367 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -6,23 +6,24 @@
 
 ; Magic ARM pair hints works best with linearscan / fast.
 
-; Cortex-M3 errata 602117: LDRD with base in list may result in incorrect base
-; register when interrupted or faulted.
-
 @b = external global i64*
 
-define i64 @t(i64 %a) nounwind readonly {
-entry:
-; A8-LABEL: t:
-; A8:   ldrd r2, r3, [r2]
-
-; M3-LABEL: t:
-; M3-NOT: ldrd
+; We use the following two to force values into specific registers.
+declare i64* @get_ptr()
+declare void @use_i64(i64 %v)
 
-	%0 = load i64*, i64** @b, align 4
-	%1 = load i64, i64* %0, align 4
-	%2 = mul i64 %1, %a
-	ret i64 %2
+define void @test_ldrd(i64 %a) nounwind readonly {
+; CHECK-LABEL: test_ldrd:
+; CHECK: bl{{x?}} _get_ptr
+; A8: ldrd r0, r1, [r0]
+; Cortex-M3 errata 602117: LDRD with base in list may result in incorrect base
+; register when interrupted or faulted.
+; M3-NOT: ldrd r[[REGNUM:[0-9]+]], {{r[0-9]+}}, [r[[REGNUM]]]
+; CHECK: bl{{x?}} _use_i64
+  %ptr = call i64* @get_ptr()
+  %v = load i64, i64* %ptr, align 8
+  call void @use_i64(i64 %v)
+  ret void
 }
 
 ; rdar://10435045 mixed LDRi8/LDRi12
diff --git a/test/CodeGen/ARM/load-store-flags.ll b/test/CodeGen/ARM/load-store-flags.ll
new file mode 100644
index 0000000..5825a30
--- /dev/null
+++ b/test/CodeGen/ARM/load-store-flags.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mtriple=thumbv7-apple-ios7.0 -o - %s -verify-machineinstrs | FileCheck %s
+
+; The base register for the store is killed by the last instruction, but is
+; actually also used during as part of the store itself. If an extra ADD is
+; inserted, it should not kill the base.
+define void @test_base_kill(i32 %v0, i32 %v1, i32* %addr) {
+; CHECK-LABEL: test_base_kill:
+; CHECK: adds [[NEWBASE:r[0-9]+]], r2, #4
+; CHECK: stm.w [[NEWBASE]], {r0, r1, r2}
+
+  %addr.1 = getelementptr i32, i32* %addr, i32 1
+  store i32 %v0, i32* %addr.1
+
+  %addr.2 = getelementptr i32, i32* %addr, i32 2
+  store i32 %v1, i32* %addr.2
+
+  %addr.3 = getelementptr i32, i32* %addr, i32 3
+  %val = ptrtoint i32* %addr to i32
+  store i32 %val, i32* %addr.3
+
+  ret void
+}
+
+; Similar, but it's not sufficient to look at just the last instruction (where
+; liveness of the base is determined). An intervening instruction might be moved
+; past it to form the STM.
+define void @test_base_kill_mid(i32 %v0, i32* %addr, i32 %v1) {
+; CHECK-LABEL: test_base_kill_mid:
+; CHECK: adds [[NEWBASE:r[0-9]+]], r1, #4
+; CHECK: stm.w [[NEWBASE]], {r0, r1, r2}
+
+  %addr.1 = getelementptr i32, i32* %addr, i32 1
+  store i32 %v0, i32* %addr.1
+
+  %addr.2 = getelementptr i32, i32* %addr, i32 2
+  %val = ptrtoint i32* %addr to i32
+  store i32 %val, i32* %addr.2
+
+  %addr.3 = getelementptr i32, i32* %addr, i32 3
+  store i32 %v1, i32* %addr.3
+
+  ret void
+}
diff --git a/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll b/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll
index 4b274d2..96c5fb8 100644
--- a/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll
+++ b/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a9 -O1 -filetype=obj %s -o - | llvm-objdump -arch thumb -mcpu=cortex-a9 -d - | FileCheck %s
+; RUN: llc -mtriple=thumbv7-- -mcpu=cortex-a9 -O1 -filetype=obj %s -o - | llvm-objdump -triple=thumbv7-- -mcpu=cortex-a9 -d - | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv7--linux-gnueabi"
diff --git a/test/CodeGen/Generic/vector-casts.ll b/test/CodeGen/Generic/vector-casts.ll
index fee72b6..0afbb8c 100644
--- a/test/CodeGen/Generic/vector-casts.ll
+++ b/test/CodeGen/Generic/vector-casts.ll
@@ -2,44 +2,44 @@
 ; PR2671
 
 define void @a(<2 x double>* %p, <2 x i8>* %q) {
-        %t = load <2 x double>, <2 x double>* %p
-	%r = fptosi <2 x double> %t to <2 x i8>
-        store <2 x i8> %r, <2 x i8>* %q
-	ret void
+  %t = load <2 x double>, <2 x double>* %p
+  %r = fptosi <2 x double> %t to <2 x i8>
+  store <2 x i8> %r, <2 x i8>* %q
+  ret void
 }
 define void @b(<2 x double>* %p, <2 x i8>* %q) {
-        %t = load <2 x double>, <2 x double>* %p
-	%r = fptoui <2 x double> %t to <2 x i8>
-        store <2 x i8> %r, <2 x i8>* %q
-	ret void
+  %t = load <2 x double>, <2 x double>* %p
+  %r = fptoui <2 x double> %t to <2 x i8>
+  store <2 x i8> %r, <2 x i8>* %q
+  ret void
 }
 define void @c(<2 x i8>* %p, <2 x double>* %q) {
-        %t = load <2 x i8>, <2 x i8>* %p
-	%r = sitofp <2 x i8> %t to <2 x double>
-        store <2 x double> %r, <2 x double>* %q
-	ret void
+  %t = load <2 x i8>, <2 x i8>* %p
+  %r = sitofp <2 x i8> %t to <2 x double>
+  store <2 x double> %r, <2 x double>* %q
+  ret void
 }
 define void @d(<2 x i8>* %p, <2 x double>* %q) {
-        %t = load <2 x i8>, <2 x i8>* %p
-	%r = uitofp <2 x i8> %t to <2 x double>
-        store <2 x double> %r, <2 x double>* %q
-	ret void
+  %t = load <2 x i8>, <2 x i8>* %p
+  %r = uitofp <2 x i8> %t to <2 x double>
+  store <2 x double> %r, <2 x double>* %q
+  ret void
 }
 define void @e(<2 x i8>* %p, <2 x i16>* %q) {
-        %t = load <2 x i8>, <2 x i8>* %p
-	%r = sext <2 x i8> %t to <2 x i16>
-        store <2 x i16> %r, <2 x i16>* %q
-	ret void
+  %t = load <2 x i8>, <2 x i8>* %p
+  %r = sext <2 x i8> %t to <2 x i16>
+  store <2 x i16> %r, <2 x i16>* %q
+  ret void
 }
 define void @f(<2 x i8>* %p, <2 x i16>* %q) {
-        %t = load <2 x i8>, <2 x i8>* %p
-	%r = zext <2 x i8> %t to <2 x i16>
-        store <2 x i16> %r, <2 x i16>* %q
-	ret void
+  %t = load <2 x i8>, <2 x i8>* %p
+  %r = zext <2 x i8> %t to <2 x i16>
+  store <2 x i16> %r, <2 x i16>* %q
+  ret void
 }
 define void @g(<2 x i16>* %p, <2 x i8>* %q) {
-        %t = load <2 x i16>, <2 x i16>* %p
-	%r = trunc <2 x i16> %t to <2 x i8>
-        store <2 x i8> %r, <2 x i8>* %q
-	ret void
+  %t = load <2 x i16>, <2 x i16>* %p
+  %r = trunc <2 x i16> %t to <2 x i8>
+  store <2 x i8> %r, <2 x i8>* %q
+  ret void
 }
diff --git a/test/CodeGen/MIR/X86/expected-machine-operand.mir b/test/CodeGen/MIR/X86/expected-machine-operand.mir
new file mode 100644
index 0000000..3725c93
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-machine-operand.mir
@@ -0,0 +1,21 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo() {
+  entry:
+    ret i32 0
+  }
+
+...
+---
+name:            foo
+body:
+ - id:           0
+   name:         entry
+   instructions:
+     # CHECK: [[@LINE+1]]:24: expected a machine operand
+     - '%eax = XOR32rr ='
+     - 'RETQ %eax'
+...
+
diff --git a/test/CodeGen/MIR/X86/expected-number-after-bb.mir b/test/CodeGen/MIR/X86/expected-number-after-bb.mir
new file mode 100644
index 0000000..f4248a7
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-number-after-bb.mir
@@ -0,0 +1,37 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo(i32* %p) {
+  entry:
+    %a = load i32, i32* %p
+    %b = icmp sle i32 %a, 10
+    br i1 %b, label %yes, label %nah
+
+  yes:
+    ret i32 0
+
+  nah:
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+body:
+ - id: 0
+   name:   entry
+   instructions:
+     - '%eax = MOV32rm %rdi, 1, _, 0, _'
+     - 'CMP32ri8 %eax, 10'
+     # CHECK: [[@LINE+1]]:18: expected a number after '%bb.'
+     - 'JG_1 %bb.nah'
+ - id: 1
+   name: yes
+   instructions:
+     - '%eax = MOV32r0'
+ - id: 2
+   name: nah
+   instructions:
+     - 'RETQ %eax'
+...
diff --git a/test/CodeGen/MIR/X86/global-value-operands.mir b/test/CodeGen/MIR/X86/global-value-operands.mir
new file mode 100644
index 0000000..4aa88fe
--- /dev/null
+++ b/test/CodeGen/MIR/X86/global-value-operands.mir
@@ -0,0 +1,49 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses global value operands correctly.
+
+--- |
+
+  @G = external global i32
+  @0 = external global i32
+
+  define i32 @inc() {
+  entry:
+    %a = load i32, i32* @G
+    %b = add i32 %a, 1
+    ret i32 %b
+  }
+
+  define i32 @inc2() {
+  entry:
+    %a = load i32, i32* @0
+    %b = add i32 %a, 1
+    ret i32 %b
+  }
+
+...
+---
+# CHECK: name: inc
+name: inc
+body:
+  - id: 0
+    name: entry
+    instructions:
+      # CHECK: - '%rax = MOV64rm %rip, 1, _, @G, _'
+      - '%rax = MOV64rm %rip, 1, _, @G, _'
+      - '%eax = MOV32rm %rax, 1, _, 0, _'
+      - '%eax = INC32r %eax'
+      - 'RETQ %eax'
+...
+---
+# CHECK: name: inc2
+name: inc2
+body:
+  - id: 0
+    name: entry
+    instructions:
+      # CHECK: - '%rax = MOV64rm %rip, 1, _, @0, _'
+      - '%rax = MOV64rm %rip, 1, _, @0, _'
+      - '%eax = MOV32rm %rax, 1, _, 0, _'
+      - '%eax = INC32r %eax'
+      - 'RETQ %eax'
+...
diff --git a/test/CodeGen/MIR/X86/immediate-operands.mir b/test/CodeGen/MIR/X86/immediate-operands.mir
new file mode 100644
index 0000000..5d4956f
--- /dev/null
+++ b/test/CodeGen/MIR/X86/immediate-operands.mir
@@ -0,0 +1,40 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses immediate machine operands.
+
+--- |
+
+  define i32 @foo() {
+  entry:
+    ret i32 42
+  }
+
+  define i32 @bar() {
+  entry:
+    ret i32 -11
+  }
+
+...
+---
+# CHECK: name: foo
+name:            foo
+body:
+ - id:           0
+   name:         entry
+   instructions:
+     # CHECK:      - '%eax = MOV32ri 42'
+     # CHECK-NEXT: - 'RETQ %eax'
+     - '%eax = MOV32ri 42'
+     - 'RETQ %eax'
+...
+---
+# CHECK: name: bar
+name:            bar
+body:
+ - id:           0
+   name:         entry
+   instructions:
+     # CHECK:      - '%eax = MOV32ri -11'
+     # CHECK-NEXT: - 'RETQ %eax'
+     - '%eax = MOV32ri -11'
+     - 'RETQ %eax'
+...
diff --git a/test/CodeGen/MIR/X86/large-index-number-error.mir b/test/CodeGen/MIR/X86/large-index-number-error.mir
new file mode 100644
index 0000000..61a5bdf
--- /dev/null
+++ b/test/CodeGen/MIR/X86/large-index-number-error.mir
@@ -0,0 +1,35 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo(i32* %p) {
+  entry:
+    %a = load i32, i32* %p
+    %b = icmp sle i32 %a, 10
+    br i1 %b, label %0, label %1
+
+  ; <label>:0
+    ret i32 0
+
+  ; <label>:1
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+body:
+ - id: 0
+   name: entry
+   instructions:
+     - '%eax = MOV32rm %rdi, 1, _, 0, _'
+     - 'CMP32ri8 %eax, 10'
+     # CHECK: [[@LINE+1]]:14: expected 32-bit integer (too large)
+     - 'JG_1 %bb.123456789123456'
+ - id: 1
+   instructions:
+     - '%eax = MOV32r0'
+ - id: 2
+   instructions:
+     - 'RETQ %eax'
+...
diff --git a/test/CodeGen/MIR/X86/lit.local.cfg b/test/CodeGen/MIR/X86/lit.local.cfg
new file mode 100644
index 0000000..c8625f4
--- /dev/null
+++ b/test/CodeGen/MIR/X86/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
diff --git a/test/CodeGen/MIR/X86/machine-basic-block-operands.mir b/test/CodeGen/MIR/X86/machine-basic-block-operands.mir
new file mode 100644
index 0000000..9d1bd0b
--- /dev/null
+++ b/test/CodeGen/MIR/X86/machine-basic-block-operands.mir
@@ -0,0 +1,75 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses machine basic block operands.
+
+--- |
+
+  define i32 @foo(i32* %p) {
+  entry:
+    %a = load i32, i32* %p
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:
+    ret i32 0
+
+  exit:
+    ret i32 %a
+  }
+
+  define i32 @bar(i32* %p) {
+  entry:
+    %a = load i32, i32* %p
+    %b = icmp sle i32 %a, 10
+    br i1 %b, label %0, label %1
+
+  ; <label>:0
+    ret i32 0
+
+  ; <label>:1
+    ret i32 %a
+  }
+
+...
+---
+# CHECK: name: foo
+name:            foo
+body:
+ # CHECK: name: entry
+ - id:              0
+   name:            entry
+   instructions:
+     - '%eax = MOV32rm %rdi, 1, _, 0, _'
+     # CHECK:      - 'CMP32ri8 %eax, 10
+     # CHECK-NEXT: - 'JG_1 %bb.2.exit
+     - 'CMP32ri8 %eax, 10'
+     - 'JG_1 %bb.2.exit'
+ # CHECK: name: less
+ - id:              1
+   name:            less
+   instructions:
+     - '%eax = MOV32r0'
+ - id:              2
+   name:            exit
+   instructions:
+     - 'RETQ %eax'
+...
+---
+# CHECK: name: bar
+name:            bar
+body:
+ # CHECK: name: entry
+ - id: 0
+   name: entry
+   instructions:
+     - '%eax = MOV32rm %rdi, 1, _, 0, _'
+     # CHECK:      - 'CMP32ri8 %eax, 10
+     # CHECK-NEXT: - 'JG_1 %bb.2
+     - 'CMP32ri8 %eax, 10'
+     - 'JG_1 %bb.3'
+ - id: 1
+   instructions:
+     - '%eax = MOV32r0'
+ - id: 3
+   instructions:
+     - 'RETQ %eax'
+...
diff --git a/test/CodeGen/MIR/X86/machine-instructions.mir b/test/CodeGen/MIR/X86/machine-instructions.mir
new file mode 100644
index 0000000..b743198
--- /dev/null
+++ b/test/CodeGen/MIR/X86/machine-instructions.mir
@@ -0,0 +1,25 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses X86 machine instructions
+# correctly.
+
+--- |
+
+  define i32 @inc(i32 %a) {
+  entry:
+    %b = mul i32 %a, 11
+    ret i32 %b
+  }
+
+...
+---
+# CHECK: name: inc
+name:            inc
+body:
+ - id:           0
+   name:         entry
+   instructions:
+     # CHECK:      - IMUL32rri8
+     # CHECK-NEXT: - RETQ
+     - IMUL32rri8
+     - ' RETQ '
+...
diff --git a/test/CodeGen/MIR/X86/missing-comma.mir b/test/CodeGen/MIR/X86/missing-comma.mir
new file mode 100644
index 0000000..54c67ac
--- /dev/null
+++ b/test/CodeGen/MIR/X86/missing-comma.mir
@@ -0,0 +1,21 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo() {
+  entry:
+    ret i32 0
+  }
+
+...
+---
+name:            foo
+body:
+ - id:           0
+   name:         entry
+   instructions:
+     # CHECK: [[@LINE+1]]:29: expected ',' before the next machine operand
+     - '%eax = XOR32rr %eax %eflags'
+     - 'RETQ %eax'
+...
+
diff --git a/test/CodeGen/MIR/X86/missing-instruction.mir b/test/CodeGen/MIR/X86/missing-instruction.mir
new file mode 100644
index 0000000..8d11ab5
--- /dev/null
+++ b/test/CodeGen/MIR/X86/missing-instruction.mir
@@ -0,0 +1,19 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define void @foo() {
+  entry:
+    ret void
+  }
+
+...
+---
+name:            foo
+body:
+ - id:           0
+   name:         entry
+   instructions:
+     # CHECK: [[@LINE+1]]:9: expected a machine instruction
+     - ''
+...
diff --git a/test/CodeGen/MIR/X86/named-registers.mir b/test/CodeGen/MIR/X86/named-registers.mir
new file mode 100644
index 0000000..5defb84
--- /dev/null
+++ b/test/CodeGen/MIR/X86/named-registers.mir
@@ -0,0 +1,23 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses X86 registers correctly.
+
+--- |
+
+  define i32 @foo() {
+  entry:
+    ret i32 0
+  }
+
+...
+---
+# CHECK: name: foo
+name:            foo
+body:
+ - id:           0
+   name:         entry
+   instructions:
+     # CHECK:      - '%eax = MOV32r0
+     # CHECK-NEXT: - 'RETQ %eax
+     - '%eax = MOV32r0'
+     - 'RETQ %eax'
+...
diff --git a/test/CodeGen/MIR/X86/null-register-operands.mir b/test/CodeGen/MIR/X86/null-register-operands.mir
new file mode 100644
index 0000000..55c0ceb
--- /dev/null
+++ b/test/CodeGen/MIR/X86/null-register-operands.mir
@@ -0,0 +1,24 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses null register operands correctly.
+
+--- |
+
+  define i32 @deref(i32* %p) {
+  entry:
+    %a = load i32, i32* %p
+    ret i32 %a
+  }
+
+...
+---
+# CHECK: name: deref
+name:            deref
+body:
+ - id:           0
+   name:         entry
+   instructions:
+     # CHECK:      - '%eax = MOV32rm %rdi, 1, _, 0, _'
+     # CHECK-NEXT: - 'RETQ %eax'
+     - '%eax = MOV32rm %rdi, 1, _, 0, %noreg'
+     - 'RETQ %eax'
+...
diff --git a/test/CodeGen/MIR/X86/register-mask-operands.mir b/test/CodeGen/MIR/X86/register-mask-operands.mir
new file mode 100644
index 0000000..ecaedea
--- /dev/null
+++ b/test/CodeGen/MIR/X86/register-mask-operands.mir
@@ -0,0 +1,43 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses register mask operands correctly.
+
+--- |
+
+  define i32 @compute(i32 %a) #0 {
+  body:
+    %c = mul i32 %a, 11
+    ret i32 %c
+  }
+
+  define i32 @foo(i32 %a) #0 {
+  entry:
+    %b = call i32 @compute(i32 %a)
+    ret i32 %b
+  }
+
+  attributes #0 = { "no-frame-pointer-elim"="false" }
+
+...
+---
+name:            compute
+body:
+  - id:          0
+    name:        body
+    instructions:
+      - '%eax = IMUL32rri8 %edi, 11'
+      - 'RETQ %eax'
+...
+---
+# CHECK: name: foo
+name:            foo
+body:
+  - id:          0
+    name:        entry
+    instructions:
+      # CHECK:      - 'PUSH64r %rax
+      # CHECK-NEXT: - 'CALL64pcrel32 @compute, csr_64, %rsp, %edi, %rsp, %eax'
+      - 'PUSH64r %rax'
+      - 'CALL64pcrel32 @compute, csr_64, %rsp, %edi, %rsp, %eax'
+      - '%rdx = POP64r'
+      - 'RETQ %eax'
+...
diff --git a/test/CodeGen/MIR/X86/undefined-global-value.mir b/test/CodeGen/MIR/X86/undefined-global-value.mir
new file mode 100644
index 0000000..e41dc04
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-global-value.mir
@@ -0,0 +1,28 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that an error is reported when an invalid global value index
+# is used.
+
+--- |
+
+  @0 = external global i32
+
+  define i32 @inc() {
+  entry:
+    %a = load i32, i32* @0
+    %b = add i32 %a, 1
+    ret i32 %b
+  }
+
+...
+---
+name: inc
+body:
+  - id: 0
+    name: entry
+    instructions:
+      # CHECK: [[@LINE+1]]:37: use of undefined global value '@2'
+      - '%rax = MOV64rm %rip, 1, _, @2, _'
+      - '%eax = MOV32rm %rax, 1, _, 0, _'
+      - '%eax = INC32r %eax'
+      - 'RETQ %eax'
+...
diff --git a/test/CodeGen/MIR/X86/undefined-named-global-value.mir b/test/CodeGen/MIR/X86/undefined-named-global-value.mir
new file mode 100644
index 0000000..b40c2ce
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-named-global-value.mir
@@ -0,0 +1,28 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that an error is reported when an undefined global value is
+# used.
+
+--- |
+
+  @G = external global i32
+
+  define i32 @inc() {
+  entry:
+    %a = load i32, i32* @G
+    %b = add i32 %a, 1
+    ret i32 %b
+  }
+
+...
+---
+name: inc
+body:
+  - id: 0
+    name: entry
+    instructions:
+      # CHECK: [[@LINE+1]]:37: use of undefined global value '@GG'
+      - '%rax = MOV64rm %rip, 1, _, @GG, _'
+      - '%eax = MOV32rm %rax, 1, _, 0, _'
+      - '%eax = INC32r %eax'
+      - 'RETQ %eax'
+...
diff --git a/test/CodeGen/MIR/X86/unknown-instruction.mir b/test/CodeGen/MIR/X86/unknown-instruction.mir
new file mode 100644
index 0000000..4e58ca6
--- /dev/null
+++ b/test/CodeGen/MIR/X86/unknown-instruction.mir
@@ -0,0 +1,21 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that an error is reported when an unknown instruction is
+# encountered.
+
+--- |
+
+  define i32 @foo() {
+  entry:
+    ret i32 0
+  }
+
+...
+---
+name:            foo
+body:
+ - id:           0
+   name:         entry
+   instructions:
+     # CHECK: [[@LINE+1]]:8: unknown machine instruction name 'retJust0'
+     - retJust0
+...
diff --git a/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir b/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir
new file mode 100644
index 0000000..5bc979a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir
@@ -0,0 +1,38 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that an error is reported when an invalid machine basic
+# block index is used.
+
+
+--- |
+
+  define i32 @foo(i32* %p) {
+  entry:
+    %a = load i32, i32* %p
+    %b = icmp sle i32 %a, 10
+    br i1 %b, label %0, label %1
+
+  ; <label>:0
+    ret i32 0
+
+  ; <label>:1
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+body:
+ - id: 0
+   name:         entry
+   instructions:
+     - '%eax = MOV32rm %rdi, 1, _, 0, _'
+     - 'CMP32ri8 %eax, 10'
+     # CHECK: [[@LINE+1]]:14: use of undefined machine basic block #4
+     - 'JG_1 %bb.4'
+ - id: 1
+   instructions:
+     - '%eax = MOV32r0'
+ - id: 2
+   instructions:
+     - 'RETQ %eax'
+...
diff --git a/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir b/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir
new file mode 100644
index 0000000..cd8c540
--- /dev/null
+++ b/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir
@@ -0,0 +1,39 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that an error is reported when an unknown named machine
+# basic block is encountered.
+
+--- |
+
+  define i32 @foo(i32* %p) {
+  entry:
+    %a = load i32, i32* %p
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:
+    ret i32 0
+
+  exit:
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+body:
+ - id:              0
+   name:            entry
+   instructions:
+     - '%eax = MOV32rm %rdi, 1, _, 0, _'
+     - 'CMP32ri8 %eax, 10'
+     # CHECK: [[@LINE+1]]:14: the name of machine basic block #2 isn't 'hit'
+     - 'JG_1 %bb.2.hit'
+ - id:              1
+   name:            less
+   instructions:
+     - '%eax = MOV32r0'
+ - id:              2
+   name:            exit
+   instructions:
+     - 'RETQ %eax'
+...
diff --git a/test/CodeGen/MIR/X86/unknown-register.mir b/test/CodeGen/MIR/X86/unknown-register.mir
new file mode 100644
index 0000000..ce40ee8
--- /dev/null
+++ b/test/CodeGen/MIR/X86/unknown-register.mir
@@ -0,0 +1,22 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that an error is reported when an unknown register is
+# encountered.
+
+--- |
+
+  define i32 @foo() {
+  entry:
+    ret i32 0
+  }
+
+...
+---
+name:            foo
+body:
+ - id:           0
+   name:         entry
+   instructions:
+     # CHECK: [[@LINE+1]]:9: unknown register name 'xax'
+     - '%xax = MOV32r0'
+     - 'RETQ %xax'
+...
diff --git a/test/CodeGen/MIR/X86/unrecognized-character.mir b/test/CodeGen/MIR/X86/unrecognized-character.mir
new file mode 100644
index 0000000..3b4fb1a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/unrecognized-character.mir
@@ -0,0 +1,19 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define void @foo() {
+  entry:
+    ret void
+  }
+
+...
+---
+name:            foo
+body:
+ - id:           0
+   name:         entry
+   instructions:
+     # CHECK: [[@LINE+1]]:9: unexpected character '`'
+     - '` RETQ'
+...
diff --git a/test/CodeGen/MIR/basic-blocks.mir b/test/CodeGen/MIR/basic-blocks.mir
index 43d8750..1731304 100644
--- a/test/CodeGen/MIR/basic-blocks.mir
+++ b/test/CodeGen/MIR/basic-blocks.mir
@@ -17,27 +17,33 @@
 ---
 # CHECK: name: foo
 # CHECK: body:
-# CHECK-NEXT: - name: entry
+# CHECK-NEXT: - id: 0
+# CHECK-NEXT:   name: entry
 # CHECK-NEXT:   alignment: 0
 # CHECK-NEXT:   isLandingPad: false
 # CHECK-NEXT:   addressTaken: false
 name:            foo
 body:
- - name:         entry
+ - id:           0
+   name:         entry
 ...
 ---
 # CHECK: name: bar
 # CHECK: body:
-# CHECK-NEXT: - name: start
+# CHECK-NEXT: - id: 0
+# CHECK-NEXT:   name: start
 # CHECK-NEXT:   alignment: 4
 # CHECK-NEXT:   isLandingPad: false
 # CHECK-NEXT:   addressTaken: false
-# CHECK-NEXT: - alignment: 0
+# CHECK-NEXT: - id: 1
+# CHECK-NEXT:   alignment: 0
 # CHECK-NEXT:   isLandingPad: false
 # CHECK-NEXT:   addressTaken: true
 name:            bar
 body:
- - name:         start
+ - id:           0
+   name:         start
    alignment:    4
- - addressTaken: true
+ - id:           1
+   addressTaken: true
 ...
diff --git a/test/CodeGen/MIR/expected-eof-after-successor-mbb.mir b/test/CodeGen/MIR/expected-eof-after-successor-mbb.mir
new file mode 100644
index 0000000..25ae511
--- /dev/null
+++ b/test/CodeGen/MIR/expected-eof-after-successor-mbb.mir
@@ -0,0 +1,29 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo(i32 %a) {
+  entry:
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:
+    ret i32 0
+
+  exit:
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+body:
+  - id:          0
+    name:        entry
+    # CHECK: [[@LINE+1]]:46: expected end of string after the machine basic block reference
+    successors:  [ '%bb.1.less', '%bb.2.exit 2' ]
+  - id:          1
+    name:        less
+  - id:          2
+    name:        exit
+...
diff --git a/test/CodeGen/MIR/expected-mbb-reference-for-successor-mbb.mir b/test/CodeGen/MIR/expected-mbb-reference-for-successor-mbb.mir
new file mode 100644
index 0000000..ce91929
--- /dev/null
+++ b/test/CodeGen/MIR/expected-mbb-reference-for-successor-mbb.mir
@@ -0,0 +1,29 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo(i32 %a) {
+  entry:
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:
+    ret i32 0
+
+  exit:
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+body:
+  - id:          0
+    name:        entry
+    # CHECK: [[@LINE+1]]:35: expected a machine basic block reference
+    successors:  [ '%bb.1.less', '2' ]
+  - id:          1
+    name:        less
+  - id:          2
+    name:        exit
+...
diff --git a/test/CodeGen/MIR/machine-basic-block-redefinition-error.mir b/test/CodeGen/MIR/machine-basic-block-redefinition-error.mir
new file mode 100644
index 0000000..deac3b0
--- /dev/null
+++ b/test/CodeGen/MIR/machine-basic-block-redefinition-error.mir
@@ -0,0 +1,17 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo() {
+  entry:
+    ret i32 0
+  }
+
+...
+---
+name:            foo
+body:
+  # CHECK: redefinition of machine basic block with id #0
+  - id:       0
+  - id:       0
+...
diff --git a/test/CodeGen/MIR/machine-basic-block-unknown-name.mir b/test/CodeGen/MIR/machine-basic-block-unknown-name.mir
index 4c363c6..ed675c5 100644
--- a/test/CodeGen/MIR/machine-basic-block-unknown-name.mir
+++ b/test/CodeGen/MIR/machine-basic-block-unknown-name.mir
@@ -14,5 +14,6 @@
 name:            foo
 body:
   # CHECK: basic block 'entrie' is not defined in the function 'foo'
-  - name:         entrie
+  - id:          0
+    name:        entrie
 ...
diff --git a/test/CodeGen/MIR/machine-function.mir b/test/CodeGen/MIR/machine-function.mir
index a3c1d1d..8f053ad 100644
--- a/test/CodeGen/MIR/machine-function.mir
+++ b/test/CodeGen/MIR/machine-function.mir
@@ -25,7 +25,7 @@
 # CHECK-NEXT: alignment:
 # CHECK-NEXT: exposesReturnsTwice: false
 # CHECK-NEXT: hasInlineAsm: false
-# CHECK-NEXT: ...
+# CHECK: ...
 name:            foo
 ...
 ---
@@ -33,7 +33,7 @@ name:            foo
 # CHECK-NEXT: alignment:
 # CHECK-NEXT: exposesReturnsTwice: false
 # CHECK-NEXT: hasInlineAsm: false
-# CHECK-NEXT: ...
+# CHECK: ...
 name:            bar
 ...
 ---
@@ -41,7 +41,7 @@ name:            bar
 # CHECK-NEXT: alignment: 8
 # CHECK-NEXT: exposesReturnsTwice: false
 # CHECK-NEXT: hasInlineAsm: false
-# CHECK-NEXT: ...
+# CHECK: ...
 name:            func
 alignment:       8
 ...
@@ -50,7 +50,7 @@ alignment:       8
 # CHECK-NEXT: alignment: 16
 # CHECK-NEXT: exposesReturnsTwice: true
 # CHECK-NEXT: hasInlineAsm: true
-# CHECK-NEXT: ...
+# CHECK: ...
 name:            func2
 alignment:       16
 exposesReturnsTwice: true
diff --git a/test/CodeGen/MIR/register-info.mir b/test/CodeGen/MIR/register-info.mir
new file mode 100644
index 0000000..c01997b
--- /dev/null
+++ b/test/CodeGen/MIR/register-info.mir
@@ -0,0 +1,36 @@
+# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses machine register info properties
+# correctly.
+
+--- |
+
+  define i32 @foo() {
+  entry:
+    ret i32 0
+  }
+
+  define i32 @bar() {
+  start:
+    ret i32 0
+  }
+
+...
+---
+# CHECK: name: foo
+# CHECK:      isSSA: false
+# CHECK-NEXT: tracksRegLiveness: false
+# CHECK-NEXT: tracksSubRegLiveness: false
+# CHECK: ...
+name:            foo
+...
+---
+# CHECK: name: bar
+# CHECK:      isSSA: false
+# CHECK-NEXT: tracksRegLiveness: true
+# CHECK-NEXT: tracksSubRegLiveness: true
+# CHECK: ...
+name: bar
+isSSA: false
+tracksRegLiveness: true
+tracksSubRegLiveness: true
+...
diff --git a/test/CodeGen/MIR/successor-basic-blocks.mir b/test/CodeGen/MIR/successor-basic-blocks.mir
new file mode 100644
index 0000000..3fe01e3
--- /dev/null
+++ b/test/CodeGen/MIR/successor-basic-blocks.mir
@@ -0,0 +1,58 @@
+# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses basic block successors correctly.
+
+--- |
+
+  define i32 @foo(i32 %a) {
+  entry:
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:
+    ret i32 0
+
+  exit:
+    ret i32 %a
+  }
+
+  define i32 @bar(i32 %a) {
+  entry:
+    %b = icmp sle i32 %a, 10
+    br i1 %b, label %0, label %1
+
+  ; <label>:0
+    ret i32 0
+
+  ; <label>:1
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+body:
+  # CHECK: name: entry
+  # CHECK: successors: [ '%bb.1.less', '%bb.2.exit' ]
+  # CHECK: name: less
+  - id:          0
+    name:        entry
+    successors:  [ '%bb.1.less', '%bb.2.exit' ]
+  - id:          1
+    name:        less
+  - id:          2
+    name:        exit
+...
+---
+name:            bar
+body:
+  # CHECK: name: bar
+  # CHECK: name: entry
+  # CHECK: successors: [ '%bb.1', '%bb.2' ]
+  # CHECK: id: 1
+  # CHECK: id: 2
+  - id:          0
+    name:        entry
+    successors:  [ '%bb.1', '%bb.2' ]
+  - id:          1
+  - id:          2
+...
diff --git a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index 8ff762a..7ca31bb 100644
--- a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -20,8 +20,7 @@ entry:
   %buf = alloca [16 x i8], align 4
 
 ; CHECK: .local .align 4 .b8 	__local_depot0[16]
-; CHECK: mov.u64 %rd[[BUF_REG:[0-9]+]]
-; CHECK: cvta.local.u64 %SP, %rd[[BUF_REG]]
+; CHECK: mov.u64 %SPL
 
 ; CHECK: ld.param.u64 %rd[[A_REG:[0-9]+]], [kernel_func_param_0]
 ; CHECK: cvta.to.global.u64 %rd[[A1_REG:[0-9]+]], %rd[[A_REG]]
diff --git a/test/CodeGen/NVPTX/extloadv.ll b/test/CodeGen/NVPTX/extloadv.ll
new file mode 100644
index 0000000..8c264ae
--- /dev/null
+++ b/test/CodeGen/NVPTX/extloadv.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+
+define void @foo(float* nocapture readonly %x_value, double* nocapture %output) #0 {
+  %1 = bitcast float* %x_value to <4 x float>*
+  %2 = load <4 x float>, <4 x float>* %1, align 16
+  %3 = fpext <4 x float> %2 to <4 x double>
+; CHECK-NOT: ld.v2.f32 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}];
+; CHECK:  cvt.f64.f32
+; CHECK:  cvt.f64.f32
+; CHECK:  cvt.f64.f32
+; CHECK:  cvt.f64.f32
+  %4 = bitcast double* %output to <4 x double>*
+  store <4 x double> %3, <4 x double>* %4
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/globals_lowering.ll b/test/CodeGen/NVPTX/globals_lowering.ll
new file mode 100644
index 0000000..84c61ef
--- /dev/null
+++ b/test/CodeGen/NVPTX/globals_lowering.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -relocation-model=static | FileCheck %s --check-prefix CHK
+
+%MyStruct = type { i32, i32, float }
+@Gbl = internal addrspace(3) global [1024 x %MyStruct] zeroinitializer
+
+; CHK-LABEL: foo
+define void @foo(float %f) {
+entry:
+  ; CHK: ld.shared.f32  %{{[a-zA-Z0-9]+}}, [Gbl+8];
+  %0 = load float, float addrspace(3)* getelementptr inbounds ([1024 x %MyStruct], [1024 x %MyStruct] addrspace(3)* @Gbl, i32 0, i32 0, i32 2)
+  %add = fadd float %0, %f
+  ; CHK: st.shared.f32   [Gbl+8], %{{[a-zA-Z0-9]+}};
+  store float %add, float addrspace(3)* getelementptr inbounds ([1024 x %MyStruct], [1024 x %MyStruct] addrspace(3)* @Gbl, i32 0, i32 0, i32 2)
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll
index 34b671d..06a8712 100644
--- a/test/CodeGen/NVPTX/intrinsics.ll
+++ b/test/CodeGen/NVPTX/intrinsics.ll
@@ -16,6 +16,8 @@ define ptx_device double @test_fabs(double %d) {
 }
 
 define float @test_nvvm_sqrt(float %a) {
+; CHECK: sqrt.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
   %val = call float @llvm.nvvm.sqrt.f(float %a)
   ret float %val
 }
diff --git a/test/CodeGen/NVPTX/local-stack-frame.ll b/test/CodeGen/NVPTX/local-stack-frame.ll
index 377eee9..ef1b7da 100644
--- a/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -3,12 +3,12 @@
 
 ; Ensure we access the local stack properly
 
-; PTX32:        mov.u32          %r{{[0-9]+}}, __local_depot{{[0-9]+}};
-; PTX32:        cvta.local.u32   %SP, %r{{[0-9]+}};
+; PTX32:        mov.u32          %SPL, __local_depot{{[0-9]+}};
+; PTX32:        cvta.local.u32   %SP, %SPL;
 ; PTX32:        ld.param.u32     %r{{[0-9]+}}, [foo_param_0];
 ; PTX32:        st.volatile.u32  [%SP+0], %r{{[0-9]+}};
-; PTX64:        mov.u64          %rd{{[0-9]+}}, __local_depot{{[0-9]+}};
-; PTX64:        cvta.local.u64   %SP, %rd{{[0-9]+}};
+; PTX64:        mov.u64          %SPL, __local_depot{{[0-9]+}};
+; PTX64:        cvta.local.u64   %SP, %SPL;
 ; PTX64:        ld.param.u32     %r{{[0-9]+}}, [foo_param_0];
 ; PTX64:        st.volatile.u32  [%SP+0], %r{{[0-9]+}};
 define void @foo(i32 %a) {
@@ -16,3 +16,67 @@ define void @foo(i32 %a) {
   store volatile i32 %a, i32* %local
   ret void
 }
+
+; PTX32:        mov.u32          %SPL, __local_depot{{[0-9]+}};
+; PTX32:        cvta.local.u32   %SP, %SPL;
+; PTX32:        ld.param.u32     %r{{[0-9]+}}, [foo2_param_0];
+; PTX32:        add.u32          %r[[SP_REG:[0-9]+]], %SPL, 0;
+; PTX32:        st.local.u32  [%r[[SP_REG]]], %r{{[0-9]+}};
+; PTX64:        mov.u64          %SPL, __local_depot{{[0-9]+}};
+; PTX64:        cvta.local.u64   %SP, %SPL;
+; PTX64:        ld.param.u32     %r{{[0-9]+}}, [foo2_param_0];
+; PTX64:        add.u64          %rd[[SP_REG:[0-9]+]], %SPL, 0;
+; PTX64:        st.local.u32  [%rd[[SP_REG]]], %r{{[0-9]+}};
+define void @foo2(i32 %a) {
+  %local = alloca i32, align 4
+  store i32 %a, i32* %local
+  call void @bar(i32* %local)
+  ret void
+}
+
+declare void @bar(i32* %a)
+
+!nvvm.annotations = !{!0}
+!0 = !{void (i32)* @foo2, !"kernel", i32 1}
+
+; PTX32:        mov.u32          %SPL, __local_depot{{[0-9]+}};
+; PTX32-NOT:    cvta.local.u32   %SP, %SPL;
+; PTX32:        ld.param.u32     %r{{[0-9]+}}, [foo3_param_0];
+; PTX32:        add.u32          %r{{[0-9]+}}, %SPL, 0;
+; PTX32:        st.local.u32  [%r{{[0-9]+}}], %r{{[0-9]+}};
+; PTX64:        mov.u64          %SPL, __local_depot{{[0-9]+}};
+; PTX64-NOT:    cvta.local.u64   %SP, %SPL;
+; PTX64:        ld.param.u32     %r{{[0-9]+}}, [foo3_param_0];
+; PTX64:        add.u64          %rd{{[0-9]+}}, %SPL, 0;
+; PTX64:        st.local.u32  [%rd{{[0-9]+}}], %r{{[0-9]+}};
+define void @foo3(i32 %a) {
+  %local = alloca [3 x i32], align 4
+  %1 = bitcast [3 x i32]* %local to i32*
+  %2 = getelementptr inbounds i32, i32* %1, i32 %a
+  store i32 %a, i32* %2
+  ret void
+}
+
+; PTX32:        cvta.local.u32   %SP, %SPL;
+; PTX32:        add.u32          {{%r[0-9]+}}, %SP, 0;
+; PTX32:        add.u32          {{%r[0-9]+}}, %SPL, 0;
+; PTX32:        add.u32          {{%r[0-9]+}}, %SP, 4;
+; PTX32:        add.u32          {{%r[0-9]+}}, %SPL, 4;
+; PTX32:        st.local.u32     [{{%r[0-9]+}}], {{%r[0-9]+}}
+; PTX32:        st.local.u32     [{{%r[0-9]+}}], {{%r[0-9]+}}
+; PTX64:        cvta.local.u64   %SP, %SPL;
+; PTX64:        add.u64          {{%rd[0-9]+}}, %SP, 0;
+; PTX64:        add.u64          {{%rd[0-9]+}}, %SPL, 0;
+; PTX64:        add.u64          {{%rd[0-9]+}}, %SP, 4;
+; PTX64:        add.u64          {{%rd[0-9]+}}, %SPL, 4;
+; PTX64:        st.local.u32     [{{%rd[0-9]+}}], {{%r[0-9]+}}
+; PTX64:        st.local.u32     [{{%rd[0-9]+}}], {{%r[0-9]+}}
+define void @foo4() {
+  %A = alloca i32
+  %B = alloca i32
+  store i32 0, i32* %A
+  store i32 0, i32* %B
+  call void @bar(i32* %A)
+  call void @bar(i32* %B)
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll b/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
index 53220bd..0de72c4 100644
--- a/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
+++ b/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
@@ -16,5 +16,16 @@ define void @kernel(float* %input, float* %output) {
   ret void
 }
 
-!nvvm.annotations = !{!0}
+define void @kernel2(float addrspace(1)* %input, float addrspace(1)* %output) {
+; CHECK-LABEL: .visible .entry kernel2(
+; CHECK-NOT: cvta.to.global.u64
+  %1 = load float, float addrspace(1)* %input, align 4
+; CHECK: ld.global.f32
+  store float %1, float addrspace(1)* %output, align 4
+; CHECK: st.global.f32
+  ret void
+}
+
+!nvvm.annotations = !{!0, !1}
 !0 = !{void (float*, float*)* @kernel, !"kernel", i32 1}
+!1 = !{void (float addrspace(1)*, float addrspace(1)*)* @kernel2, !"kernel", i32 1}
diff --git a/test/CodeGen/PowerPC/builtins-ppc-elf2-abi.ll b/test/CodeGen/PowerPC/builtins-ppc-elf2-abi.ll
new file mode 100644
index 0000000..16dc2cc
--- /dev/null
+++ b/test/CodeGen/PowerPC/builtins-ppc-elf2-abi.ll
@@ -0,0 +1,165 @@
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+
+@vda = common global <2 x double> zeroinitializer, align 16
+@vdb = common global <2 x double> zeroinitializer, align 16
+@vdr = common global <2 x double> zeroinitializer, align 16
+@vfa = common global <4 x float> zeroinitializer, align 16
+@vfb = common global <4 x float> zeroinitializer, align 16
+@vfr = common global <4 x float> zeroinitializer, align 16
+@vbllr = common global <2 x i64> zeroinitializer, align 16
+@vbir = common global <4 x i32> zeroinitializer, align 16
+@vblla = common global <2 x i64> zeroinitializer, align 16
+@vbllb = common global <2 x i64> zeroinitializer, align 16
+@vbia = common global <4 x i32> zeroinitializer, align 16
+@vbib = common global <4 x i32> zeroinitializer, align 16
+
+; Function Attrs: nounwind
+define void @test1() {
+entry:
+  %0 = load <2 x double>, <2 x double>* @vda, align 16
+  %1 = load <2 x double>, <2 x double>* @vdb, align 16
+  %2 = call <2 x double> @llvm.ppc.vsx.xvdivdp(<2 x double> %0, <2 x double> %1)
+  store <2 x double> %2, <2 x double>* @vdr, align 16
+  ret void
+; CHECK-LABEL: @test1
+; CHECK: xvdivdp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define void @test2() {
+entry:
+  %0 = load <4 x float>, <4 x float>* @vfa, align 16
+  %1 = load <4 x float>, <4 x float>* @vfb, align 16
+  %2 = call <4 x float> @llvm.ppc.vsx.xvdivsp(<4 x float> %0, <4 x float> %1)
+  store <4 x float> %2, <4 x float>* @vfr, align 16
+  ret void
+; CHECK-LABEL: @test2
+; CHECK: xvdivsp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define void @test3() {
+entry:
+  %0 = load <2 x double>, <2 x double>* @vda, align 16
+  %1 = load <2 x double>, <2 x double>* @vda, align 16
+  %2 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %1)
+  store <2 x double> %2, <2 x double>* @vdr, align 16
+  ret void
+; CHECK-LABEL: @test3
+; CHECK: xvrdpip {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define void @test4() {
+entry:
+  %0 = load <4 x float>, <4 x float>* @vfa, align 16
+  %1 = load <4 x float>, <4 x float>* @vfa, align 16
+  %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %1)
+  store <4 x float> %2, <4 x float>* @vfr, align 16
+  ret void
+; CHECK-LABEL: @test4
+; CHECK: xvrspip {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define void @test5() {
+entry:
+  %0 = load <2 x double>, <2 x double>* @vda, align 16
+  %1 = load <2 x double>, <2 x double>* @vdb, align 16
+  %2 = call <2 x i64> @llvm.ppc.vsx.xvcmpeqdp(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @vbllr, align 16
+  ret void
+; CHECK-LABEL: @test5
+; CHECK: xvcmpeqdp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define void @test6() {
+entry:
+  %0 = load <4 x float>, <4 x float>* @vfa, align 16
+  %1 = load <4 x float>, <4 x float>* @vfb, align 16
+  %2 = call <4 x i32> @llvm.ppc.vsx.xvcmpeqsp(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @vbir, align 16
+  ret void
+; CHECK-LABEL: @test6
+; CHECK: xvcmpeqsp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define void @test7() {
+entry:
+  %0 = load <2 x double>, <2 x double>* @vda, align 16
+  %1 = load <2 x double>, <2 x double>* @vdb, align 16
+  %2 = call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @vbllr, align 16
+  ret void
+; CHECK-LABEL: @test7
+; CHECK: xvcmpgedp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define void @test8() {
+entry:
+  %0 = load <4 x float>, <4 x float>* @vfa, align 16
+  %1 = load <4 x float>, <4 x float>* @vfb, align 16
+  %2 = call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @vbir, align 16
+  ret void
+; CHECK-LABEL: @test8
+; CHECK: xvcmpgesp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define void @test9() {
+entry:
+  %0 = load <2 x double>, <2 x double>* @vda, align 16
+  %1 = load <2 x double>, <2 x double>* @vdb, align 16
+  %2 = call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %0, <2 x double> %1)
+  store <2 x i64> %2, <2 x i64>* @vbllr, align 16
+  ret void
+; CHECK-LABEL: @test9
+; CHECK: xvcmpgtdp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define void @test10() {
+entry:
+  %0 = load <4 x float>, <4 x float>* @vfa, align 16
+  %1 = load <4 x float>, <4 x float>* @vfb, align 16
+  %2 = call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %0, <4 x float> %1)
+  store <4 x i32> %2, <4 x i32>* @vbir, align 16
+  ret void
+; CHECK-LABEL: @test10
+; CHECK: xvcmpgtsp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
+
+; Function Attrs: nounwind readnone
+declare <2 x double> @llvm.ppc.vsx.xvdivdp(<2 x double>, <2 x double>)
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.ppc.vsx.xvdivsp(<4 x float>, <4 x float>)
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.vsx.xvcmpeqdp(<2 x double>, <2 x double>)
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.ppc.vsx.xvcmpeqsp(<4 x float>, <4 x float>)
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double>, <2 x double>)
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float>, <4 x float>)
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double>, <2 x double>)
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float>, <4 x float>)
diff --git a/test/CodeGen/PowerPC/lxvw4x-bug.ll b/test/CodeGen/PowerPC/lxvw4x-bug.ll
new file mode 100644
index 0000000..1f521a5
--- /dev/null
+++ b/test/CodeGen/PowerPC/lxvw4x-bug.ll
@@ -0,0 +1,25 @@
+; RUN: llc -O0 -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; Function Attrs: nounwind
+define void @test() {
+entry:
+  %__a.addr.i = alloca i32, align 4
+  %__b.addr.i = alloca <4 x i32>*, align 8
+  %i = alloca <4 x i32>, align 16
+  %j = alloca <4 x i32>, align 16
+  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %i, align 16
+  store i32 0, i32* %__a.addr.i, align 4
+  store <4 x i32>* %i, <4 x i32>** %__b.addr.i, align 8
+  %0 = load i32, i32* %__a.addr.i, align 4
+  %1 = load <4 x i32>*, <4 x i32>** %__b.addr.i, align 8
+  %2 = bitcast <4 x i32>* %1 to i8*
+  %3 = getelementptr i8, i8* %2, i32 %0
+  %4 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %3)
+; CHECK: lwa [[REG0:[0-9]+]],
+; CHECK: lxvd2x [[REG1:[0-9]+]], {{[0-9]+}}, [[REG0]]
+; CHECK: xxswapd [[REG1]], [[REG1]]
+  store <4 x i32> %4, <4 x i32>* %j, align 16
+  ret void
+}
+
+; Function Attrs: nounwind readonly
+declare <4 x i32> @llvm.ppc.vsx.lxvw4x(i8*)
diff --git a/test/CodeGen/PowerPC/swaps-le-3.ll b/test/CodeGen/PowerPC/swaps-le-3.ll
new file mode 100644
index 0000000..0c1748d
--- /dev/null
+++ b/test/CodeGen/PowerPC/swaps-le-3.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
+
+; This test verifies that VSX swap optimization works for the
+; doubleword splat idiom.
+
+@a = external global <2 x double>, align 16
+@b = external global <2 x double>, align 16
+
+define void @test(double %s) {
+entry:
+  %0 = insertelement <2 x double> undef, double %s, i32 0
+  %1 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
+  %2 = load <2 x double>, <2 x double>* @a, align 16
+  %3 = fadd <2 x double> %0, %2
+  store <2 x double> %3, <2 x double>* @b, align 16
+  ret void
+}
+
+; CHECK-LABEL: @test
+; CHECK: xxspltd
+; CHECK: lxvd2x
+; CHECK: xvadddp
+; CHECK: stxvd2x
+; CHECK-NOT: xxswapd
diff --git a/test/CodeGen/PowerPC/swaps-le-4.ll b/test/CodeGen/PowerPC/swaps-le-4.ll
new file mode 100644
index 0000000..7d8239b
--- /dev/null
+++ b/test/CodeGen/PowerPC/swaps-le-4.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
+
+; This test verifies that VSX swap optimization works when an implicit
+; subregister is present (in this case, in the XXPERMDI associated with
+; the store).
+
+define void @bar() {
+entry:
+  %x = alloca <2 x i64>, align 16
+  %0 = bitcast <2 x i64>* %x to i8*
+  call void @llvm.lifetime.start(i64 16, i8* %0)
+  %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %x, i64 0, i64 0
+  store <2 x i64> <i64 0, i64 1>, <2 x i64>* %x, align 16
+  call void @foo(i64* %arrayidx)
+  call void @llvm.lifetime.end(i64 16, i8* %0)
+  ret void
+}
+
+; CHECK-LABEL: @bar
+; CHECK: lxvd2x
+; CHECK: stxvd2x
+; CHECK-NOT: xxswapd
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @foo(i64*)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
diff --git a/test/CodeGen/PowerPC/vec_mergeow.ll b/test/CodeGen/PowerPC/vec_mergeow.ll
new file mode 100644
index 0000000..c7c7448
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_mergeow.ll
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | \
+; RUN:   FileCheck %s  -check-prefix=CHECK-LE
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | \
+; RUN:   FileCheck %s -check-prefix=CHECK-BE
+
+; Check for a vector merge instruction using two inputs
+; The shufflevector specifies the even elements, using big endian element 
+; ordering. If run on a big endian machine, this should produce the vmrgew 
+; instruction. If run on a little endian machine, this should produce the
+; vmrgow instruction. Note also that on little endian the input registers 
+; are swapped also.
+define void @check_merge_even_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK-LE-LABEL: @check_merge_even_xy
+; CHECK-BE-LABEL: @check_merge_even_xy
+        %tmp = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, 
+	      		      <16 x i32> <i32 0, i32 1, i32 2, i32 3, 
+			      	    	  i32 16, i32 17, i32 18, i32 19, 
+					  i32 8, i32 9, i32 10, i32 11, 
+					  i32 24, i32 25, i32 26, i32 27>
+; CHECK-LE: vmrgow 2, 3, 2
+; CHECK-BE: vmrgew 2, 2, 3
+      	store <16 x i8> %tmp3, <16 x i8>* %A
+	ret void
+; CHECK-LE: blr
+; CHECK-BE: blr
+}
+
+; Check for a vector merge instruction using a single input. 
+; The shufflevector specifies the even elements, using big endian element 
+; ordering. If run on a big endian machine, this should produce the vmrgew 
+; instruction. If run on a little endian machine, this should produce the
+; vmrgow instruction. 
+define void @check_merge_even_xx(<16 x i8>* %A) {
+entry:
+; CHECK-LE-LABEL: @check_merge_even_xx
+; CHECK-BE-LABEL: @check_merge_even_xx
+        %tmp = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, 
+	      		      <16 x i32> <i32 0, i32 1, i32 2, i32 3, 
+			      	          i32 0, i32 1, i32 2, i32 3, 
+					  i32 8, i32 9, i32 10, i32 11, 
+					  i32 8, i32 9, i32 10, i32 11>
+; CHECK-LE: vmrgow 2, 2, 2
+; CHECK-BE: vmrgew 2, 2, 2
+  	store <16 x i8> %tmp2, <16 x i8>* %A
+	ret void
+; CHECK-LE: blr
+; CHECK-BE: blr       
+}
+
+; Check for a vector merge instruction using two inputs.
+; The shufflevector specifies the odd elements, using big endian element 
+; ordering. If run on a big endian machine, this should produce the vmrgow 
+; instruction. If run on a little endian machine, this should produce the
+; vmrgew instruction. Note also that on little endian the input registers 
+; are swapped also.
+define void @check_merge_odd_xy(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+; CHECK-LE-LABEL: @check_merge_odd_xy
+; CHECK-BE-LABEL: @check_merge_odd_xy
+        %tmp = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, 
+	      		      <16 x i32> <i32 4, i32 5, i32 6, i32 7, 
+			      	    	  i32 20, i32 21, i32 22, i32 23, 
+					  i32 12, i32 13, i32 14, i32 15, 
+					  i32 28, i32 29, i32 30, i32 31>
+; CHECK-LE: vmrgew 2, 3, 2
+; CHECK-BE: vmrgow 2, 2, 3
+        store <16 x i8> %tmp3, <16 x i8>* %A
+	ret void
+; CHECK-LE: blr
+; CHECK-BE: blr
+}
+
+; Check for a vector merge instruction using a single input.
+; The shufflevector specifies the odd elements, using big endian element 
+; ordering. If run on a big endian machine, this should produce the vmrgow 
+; instruction. If run on a little endian machine, this should produce the
+; vmrgew instruction. 
+define void @check_merge_odd_xx(<16 x i8>* %A) {
+entry:
+; CHECK-LE-LABEL: @check_merge_odd_xx
+; CHECK-BE-LABEL: @check_merge_odd_xx
+        %tmp = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, 
+	      		      <16 x i32> <i32 4, i32 5, i32 6, i32 7, 
+			      	    	  i32 4, i32 5, i32 6, i32 7, 
+					  i32 12, i32 13, i32 14, i32 15, 
+					  i32 12, i32 13, i32 14, i32 15>
+; CHECK-LE: vmrgew 2, 2, 2
+; CHECK-BE: vmrgow 2, 2, 2
+        store <16 x i8> %tmp2, <16 x i8>* %A
+	ret void
+; CHECK-LE: blr
+; CHECK-BE: blr
+}
+
diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll
index b185fed..f85aceb 100644
--- a/test/CodeGen/PowerPC/vsx.ll
+++ b/test/CodeGen/PowerPC/vsx.ll
@@ -1,9 +1,8 @@
-; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
-; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-REG %s
-; RUN: llc -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | FileCheck %s
-; RUN: llc -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | FileCheck -check-prefix=CHECK-FISL %s
-target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-REG %s
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s | FileCheck -check-prefix=CHECK-FISL %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-LE %s
 
 define double @test1(double %a, double %b) {
 entry:
@@ -13,6 +12,10 @@ entry:
 ; CHECK-LABEL: @test1
 ; CHECK: xsmuldp 1, 1, 2
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test1
+; CHECK-LE: xsmuldp 1, 1, 2
+; CHECK-LE: blr
 }
 
 define double @test2(double %a, double %b) {
@@ -23,6 +26,10 @@ entry:
 ; CHECK-LABEL: @test2
 ; CHECK: xsdivdp 1, 1, 2
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test2
+; CHECK-LE: xsdivdp 1, 1, 2
+; CHECK-LE: blr
 }
 
 define double @test3(double %a, double %b) {
@@ -33,6 +40,10 @@ entry:
 ; CHECK-LABEL: @test3
 ; CHECK: xsadddp 1, 1, 2
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test3
+; CHECK-LE: xsadddp 1, 1, 2
+; CHECK-LE: blr
 }
 
 define <2 x double> @test4(<2 x double> %a, <2 x double> %b) {
@@ -43,6 +54,10 @@ entry:
 ; CHECK-LABEL: @test4
 ; CHECK: xvadddp 34, 34, 35
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test4
+; CHECK-LE: xvadddp 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
@@ -60,6 +75,10 @@ entry:
 ; CHECK-FISL: xxlxor 36, 36, 37
 ; CHECK-FISL: vor 2, 4, 4
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test5
+; CHECK-LE: xxlxor 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
@@ -77,6 +96,10 @@ entry:
 ; CHECK-FISL: xxlxor 36, 36, 37
 ; CHECK-FISL: vor 2, 4, 4
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test6
+; CHECK-LE: xxlxor 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <16 x i8> @test7(<16 x i8> %a, <16 x i8> %b) {
@@ -94,6 +117,10 @@ entry:
 ; CHECK-FISL: xxlxor 36, 36, 37
 ; CHECK-FISL: vor 2, 4, 4
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test7
+; CHECK-LE: xxlxor 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <4 x i32> @test8(<4 x i32> %a, <4 x i32> %b) {
@@ -111,6 +138,10 @@ entry:
 ; CHECK-FISL: xxlor 36, 36, 37
 ; CHECK-FISL: vor 2, 4, 4
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test8
+; CHECK-LE: xxlor 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) {
@@ -128,6 +159,10 @@ entry:
 ; CHECK-FISL: xxlor 36, 36, 37
 ; CHECK-FISL: vor 2, 4, 4
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test9
+; CHECK-LE: xxlor 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <16 x i8> @test10(<16 x i8> %a, <16 x i8> %b) {
@@ -145,6 +180,10 @@ entry:
 ; CHECK-FISL: xxlor 36, 36, 37
 ; CHECK-FISL: vor 2, 4, 4
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test10
+; CHECK-LE: xxlor 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
@@ -162,6 +201,10 @@ entry:
 ; CHECK-FISL: xxland 36, 36, 37
 ; CHECK-FISL: vor 2, 4, 4
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test11
+; CHECK-LE: xxland 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) {
@@ -179,6 +222,10 @@ entry:
 ; CHECK-FISL: xxland 36, 36, 37
 ; CHECK-FISL: vor 2, 4, 4
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test12
+; CHECK-LE: xxland 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <16 x i8> @test13(<16 x i8> %a, <16 x i8> %b) {
@@ -196,6 +243,10 @@ entry:
 ; CHECK-FISL: xxland 36, 36, 37
 ; CHECK-FISL: vor 2, 4, 4
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test13
+; CHECK-LE: xxland 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <4 x i32> @test14(<4 x i32> %a, <4 x i32> %b) {
@@ -221,6 +272,10 @@ entry:
 ; CHECK-FISL: ori 0, 0, 65520
 ; CHECK-FISL: stvx 0, 1, 0
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test14
+; CHECK-LE: xxlnor 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
@@ -246,6 +301,10 @@ entry:
 ; CHECK-FISL: ori 0, 0, 65520
 ; CHECK-FISL: stvx 0, 1, 0
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test15
+; CHECK-LE: xxlnor 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <16 x i8> @test16(<16 x i8> %a, <16 x i8> %b) {
@@ -271,6 +330,10 @@ entry:
 ; CHECK-FISL: ori 0, 0, 65520
 ; CHECK-FISL: stvx 0, 1, 0
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test16
+; CHECK-LE: xxlnor 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
@@ -294,6 +357,10 @@ entry:
 ; CHECK-FISL: xxland 37, 37, 32
 ; CHECK-FISL: vor 2, 5, 5
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test17
+; CHECK-LE: xxlandc 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <8 x i16> @test18(<8 x i16> %a, <8 x i16> %b) {
@@ -320,6 +387,10 @@ entry:
 ; CHECK-FISL: ori 0, 0, 65520
 ; CHECK-FISL: stvx 4, 1, 0
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test18
+; CHECK-LE: xxlandc 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <16 x i8> @test19(<16 x i8> %a, <16 x i8> %b) {
@@ -346,6 +417,10 @@ entry:
 ; CHECK-FISL: ori 0, 0, 65520
 ; CHECK-FISL: stvx 4, 1, 0
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test19
+; CHECK-LE: xxlandc 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <4 x i32> @test20(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
@@ -367,6 +442,11 @@ entry:
 ; CHECK-FISL: xxsel 32, 32, 33, 38
 ; CHECK-FISL: vor 2, 0, 0
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test20
+; CHECK-LE: vcmpequw {{[0-9]+}}, 4, 5
+; CHECK-LE: xxsel 34, 35, 34, {{[0-9]+}}
+; CHECK-LE: blr
 }
 
 define <4 x float> @test21(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
@@ -389,6 +469,11 @@ entry:
 ; CHECK-FISL: xxsel 32, 38, 39, 32
 ; CHECK-FISL: vor 2, 0, 0
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test21
+; CHECK-LE: xvcmpeqsp [[V1:[0-9]+]], 36, 37
+; CHECK-LE: xxsel 34, 35, 34, [[V1]]
+; CHECK-LE: blr
 }
 
 define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
@@ -418,6 +503,17 @@ entry:
 ; CHECK-FISL-DAG: xxlor
 ; CHECK-FISL: xxsel 0, 38, 39, {{[0-9]+}}
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test22
+; CHECK-LE-DAG: xvcmpeqsp {{[0-9]+}}, 37, 37
+; CHECK-LE-DAG: xvcmpeqsp {{[0-9]+}}, 36, 36
+; CHECK-LE-DAG: xvcmpeqsp {{[0-9]+}}, 36, 37
+; CHECK-LE-DAG: xxlnor
+; CHECK-LE-DAG: xxlnor
+; CHECK-LE-DAG: xxlor
+; CHECK-LE-DAG: xxlor
+; CHECK-LE: xxsel 34, 35, 34, {{[0-9]+}}
+; CHECK-LE: blr
 }
 
 define <8 x i16> @test23(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d) {
@@ -439,6 +535,11 @@ entry:
 ; CHECK-FISL: xxsel 32, 32, 33, 38
 ; CHECK-FISL: vor 2, 0, 
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test23
+; CHECK-LE: vcmpequh {{[0-9]+}}, 4, 5
+; CHECK-LE: xxsel 34, 35, 34, {{[0-9]+}}
+; CHECK-LE: blr
 }
 
 define <16 x i8> @test24(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
@@ -460,6 +561,11 @@ entry:
 ; CHECK-FISL: xxsel 32, 32, 33, 38
 ; CHECK-FISL: vor 2, 0, 0
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test24
+; CHECK-LE: vcmpequb {{[0-9]+}}, 4, 5
+; CHECK-LE: xxsel 34, 35, 34, {{[0-9]+}}
+; CHECK-LE: blr
 }
 
 define <2 x double> @test25(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
@@ -472,6 +578,11 @@ entry:
 ; CHECK: xvcmpeqdp [[V1:[0-9]+]], 36, 37
 ; CHECK: xxsel 34, 35, 34, [[V1]]
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test25
+; CHECK-LE: xvcmpeqdp [[V1:[0-9]+]], 36, 37
+; CHECK-LE: xxsel 34, 35, 34, [[V1]]
+; CHECK-LE: blr
 }
 
 define <2 x i64> @test26(<2 x i64> %a, <2 x i64> %b) {
@@ -489,6 +600,9 @@ define <2 x i64> @test26(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK: add
 ; CHECK: add
 ; CHECK: blr
+
+; CHECK-LE: vaddudm 2, 2, 3
+; CHECK-LE: blr
 }
 
 define <2 x i64> @test27(<2 x i64> %a, <2 x i64> %b) {
@@ -498,6 +612,10 @@ define <2 x i64> @test27(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: @test27
 ; CHECK: xxland 34, 34, 35
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test27
+; CHECK-LE: xxland 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <2 x double> @test28(<2 x double>* %a) {
@@ -507,6 +625,11 @@ define <2 x double> @test28(<2 x double>* %a) {
 ; CHECK-LABEL: @test28
 ; CHECK: lxvd2x 34, 0, 3
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test28
+; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3
+; CHECK-LE: xxswapd 34, [[V1]]
+; CHECK-LE: blr
 }
 
 define void @test29(<2 x double>* %a, <2 x double> %b) {
@@ -516,6 +639,11 @@ define void @test29(<2 x double>* %a, <2 x double> %b) {
 ; CHECK-LABEL: @test29
 ; CHECK: stxvd2x 34, 0, 3
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test29
+; CHECK-LE: xxswapd [[V1:[0-9]+]], 34
+; CHECK-LE: stxvd2x [[V1]], 0, 3
+; CHECK-LE: blr
 }
 
 define <2 x double> @test28u(<2 x double>* %a) {
@@ -525,6 +653,11 @@ define <2 x double> @test28u(<2 x double>* %a) {
 ; CHECK-LABEL: @test28u
 ; CHECK: lxvd2x 34, 0, 3
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test28u
+; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3
+; CHECK-LE: xxswapd 34, [[V1]]
+; CHECK-LE: blr
 }
 
 define void @test29u(<2 x double>* %a, <2 x double> %b) {
@@ -534,6 +667,11 @@ define void @test29u(<2 x double>* %a, <2 x double> %b) {
 ; CHECK-LABEL: @test29u
 ; CHECK: stxvd2x 34, 0, 3
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test29u
+; CHECK-LE: xxswapd [[V1:[0-9]+]], 34
+; CHECK-LE: stxvd2x [[V1]], 0, 3
+; CHECK-LE: blr
 }
 
 define <2 x i64> @test30(<2 x i64>* %a) {
@@ -550,6 +688,11 @@ define <2 x i64> @test30(<2 x i64>* %a) {
 ; CHECK-FISL: vor 3, 2, 2
 ; CHECK-FISL: vor 2, 3, 3
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test30
+; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3
+; CHECK-LE: xxswapd 34, [[V1]]
+; CHECK-LE: blr
 }
 
 define void @test31(<2 x i64>* %a, <2 x i64> %b) {
@@ -559,6 +702,11 @@ define void @test31(<2 x i64>* %a, <2 x i64> %b) {
 ; CHECK-LABEL: @test31
 ; CHECK: stxvd2x 34, 0, 3
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test31
+; CHECK-LE: xxswapd [[V1:[0-9]+]], 34
+; CHECK-LE: stxvd2x [[V1]], 0, 3
+; CHECK-LE: blr
 }
 
 define <4 x float> @test32(<4 x float>* %a) {
@@ -573,6 +721,11 @@ define <4 x float> @test32(<4 x float>* %a) {
 ; CHECK-FISL: lxvw4x 0, 0, 3
 ; CHECK-FISL: xxlor 34, 0, 0
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test32
+; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3
+; CHECK-LE: xxswapd 34, [[V1]]
+; CHECK-LE: blr
 }
 
 define void @test33(<4 x float>* %a, <4 x float> %b) {
@@ -587,6 +740,11 @@ define void @test33(<4 x float>* %a, <4 x float> %b) {
 ; CHECK-FISL: vor 3, 2, 2
 ; CHECK-FISL: stxvw4x 35, 0, 3
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test33
+; CHECK-LE: xxswapd [[V1:[0-9]+]], 34
+; CHECK-LE: stxvd2x [[V1]], 0, 3
+; CHECK-LE: blr
 }
 
 define <4 x float> @test32u(<4 x float>* %a) {
@@ -599,6 +757,11 @@ define <4 x float> @test32u(<4 x float>* %a) {
 ; CHECK-DAG: lvx
 ; CHECK: vperm 2,
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test32u
+; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3
+; CHECK-LE: xxswapd 34, [[V1]]
+; CHECK-LE: blr
 }
 
 define void @test33u(<4 x float>* %a, <4 x float> %b) {
@@ -613,6 +776,11 @@ define void @test33u(<4 x float>* %a, <4 x float> %b) {
 ; CHECK-FISL: vor 3, 2, 2
 ; CHECK-FISL: stxvw4x 35, 0, 3
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test33u
+; CHECK-LE: xxswapd [[V1:[0-9]+]], 34
+; CHECK-LE: stxvd2x [[V1]], 0, 3
+; CHECK-LE: blr
 }
 
 define <4 x i32> @test34(<4 x i32>* %a) {
@@ -629,6 +797,11 @@ define <4 x i32> @test34(<4 x i32>* %a) {
 ; CHECK-FISL: vor 3, 2, 2
 ; CHECK-FISL: vor 2, 3, 3
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test34
+; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3
+; CHECK-LE: xxswapd 34, [[V1]]
+; CHECK-LE: blr
 }
 
 define void @test35(<4 x i32>* %a, <4 x i32> %b) {
@@ -643,6 +816,11 @@ define void @test35(<4 x i32>* %a, <4 x i32> %b) {
 ; CHECK-FISL: vor 3, 2, 2
 ; CHECK-FISL: stxvw4x 35, 0, 3
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test35
+; CHECK-LE: xxswapd [[V1:[0-9]+]], 34
+; CHECK-LE: stxvd2x [[V1]], 0, 3
+; CHECK-LE: blr
 }
 
 define <2 x double> @test40(<2 x i64> %a) {
@@ -652,6 +830,10 @@ define <2 x double> @test40(<2 x i64> %a) {
 ; CHECK-LABEL: @test40
 ; CHECK: xvcvuxddp 34, 34
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test40
+; CHECK-LE: xvcvuxddp 34, 34
+; CHECK-LE: blr
 }
 
 define <2 x double> @test41(<2 x i64> %a) {
@@ -661,6 +843,10 @@ define <2 x double> @test41(<2 x i64> %a) {
 ; CHECK-LABEL: @test41
 ; CHECK: xvcvsxddp 34, 34
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test41
+; CHECK-LE: xvcvsxddp 34, 34
+; CHECK-LE: blr
 }
 
 define <2 x i64> @test42(<2 x double> %a) {
@@ -670,6 +856,10 @@ define <2 x i64> @test42(<2 x double> %a) {
 ; CHECK-LABEL: @test42
 ; CHECK: xvcvdpuxds 34, 34
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test42
+; CHECK-LE: xvcvdpuxds 34, 34
+; CHECK-LE: blr
 }
 
 define <2 x i64> @test43(<2 x double> %a) {
@@ -679,6 +869,10 @@ define <2 x i64> @test43(<2 x double> %a) {
 ; CHECK-LABEL: @test43
 ; CHECK: xvcvdpsxds 34, 34
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test43
+; CHECK-LE: xvcvdpsxds 34, 34
+; CHECK-LE: blr
 }
 
 define <2 x float> @test44(<2 x i64> %a) {
@@ -726,6 +920,10 @@ define <2 x double> @test50(double* %a) {
 ; CHECK-LABEL: @test50
 ; CHECK: lxvdsx 34, 0, 3
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test50
+; CHECK-LE: lxvdsx 34, 0, 3
+; CHECK-LE: blr
 }
 
 define <2 x double> @test51(<2 x double> %a, <2 x double> %b) {
@@ -735,6 +933,10 @@ define <2 x double> @test51(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test51
 ; CHECK: xxspltd 34, 34, 0
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test51
+; CHECK-LE: xxspltd 34, 34, 1
+; CHECK-LE: blr
 }
 
 define <2 x double> @test52(<2 x double> %a, <2 x double> %b) {
@@ -744,6 +946,10 @@ define <2 x double> @test52(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test52
 ; CHECK: xxmrghd 34, 34, 35
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test52
+; CHECK-LE: xxmrgld 34, 35, 34
+; CHECK-LE: blr
 }
 
 define <2 x double> @test53(<2 x double> %a, <2 x double> %b) {
@@ -753,6 +959,10 @@ define <2 x double> @test53(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test53
 ; CHECK: xxmrghd 34, 35, 34
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test53
+; CHECK-LE: xxmrgld 34, 34, 35
+; CHECK-LE: blr
 }
 
 define <2 x double> @test54(<2 x double> %a, <2 x double> %b) {
@@ -762,6 +972,10 @@ define <2 x double> @test54(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test54
 ; CHECK: xxpermdi 34, 34, 35, 2
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test54
+; CHECK-LE: xxpermdi 34, 35, 34, 2
+; CHECK-LE: blr
 }
 
 define <2 x double> @test55(<2 x double> %a, <2 x double> %b) {
@@ -771,6 +985,10 @@ define <2 x double> @test55(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @test55
 ; CHECK: xxmrgld 34, 34, 35
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test55
+; CHECK-LE: xxmrghd 34, 35, 34
+; CHECK-LE: blr
 }
 
 define <2 x i64> @test56(<2 x i64> %a, <2 x i64> %b) {
@@ -780,6 +998,10 @@ define <2 x i64> @test56(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: @test56
 ; CHECK: xxmrgld 34, 34, 35
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test56
+; CHECK-LE: xxmrghd 34, 35, 34
+; CHECK-LE: blr
 }
 
 define <2 x i64> @test60(<2 x i64> %a, <2 x i64> %b) {
@@ -836,6 +1058,10 @@ define double @test63(<2 x double> %a) {
 ; CHECK-FISL: xxlor 0, 34, 34
 ; CHECK-FISL: fmr 1, 0
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test63
+; CHECK-LE: xxswapd 1, 34
+; CHECK-LE: blr
 }
 
 define double @test64(<2 x double> %a) {
@@ -851,6 +1077,9 @@ define double @test64(<2 x double> %a) {
 ; CHECK-FISL: xxlor 0, 34, 34
 ; CHECK-FISL: fmr 1, 0
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test64
+; CHECK-LE: xxlor 1, 34, 34
 }
 
 define <2 x i1> @test65(<2 x i64> %a, <2 x i64> %b) {
@@ -867,6 +1096,10 @@ define <2 x i1> @test65(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-FISL: vcmpequw 4, 5, 4
 ; CHECK-FISL: vor 2, 4, 4
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test65
+; CHECK-LE: vcmpequd 2, 2, 3
+; CHECK-LE: blr
 }
 
 define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) {
@@ -882,6 +1115,11 @@ define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-FISL: vcmpequw {{[0-9]+}}, 5, 4
 ; CHECK-FISL: xxlnor 34, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test66
+; CHECK-LE: vcmpequd {{[0-9]+}}, 2, 3
+; CHECK-LE: xxlnor 34, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE: blr
 }
 
 define <2 x i1> @test67(<2 x i64> %a, <2 x i64> %b) {
@@ -896,6 +1134,10 @@ define <2 x i1> @test67(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK: cmpld
 ; CHECK: lxvd2x
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test67
+; CHECK-LE: vcmpgtud 2, 3, 2
+; CHECK-LE: blr
 }
 
 define <2 x double> @test68(<2 x i32> %a) {
@@ -906,6 +1148,11 @@ define <2 x double> @test68(<2 x i32> %a) {
 ; CHECK: xxsldwi [[V1:[0-9]+]], 34, 34, 1
 ; CHECK: xvcvsxwdp 34, [[V1]]
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test68
+; CHECK-LE: xxsldwi [[V1:[0-9]+]], 34, 34, 1
+; CHECK-LE: xvcvsxwdp 34, [[V1]]
+; CHECK-LE: blr
 }
 
 define <2 x double> @test69(<2 x i16> %a) {
@@ -920,6 +1167,15 @@ define <2 x double> @test69(<2 x i16> %a) {
 ; CHECK: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
 ; CHECK: xvcvsxwdp 34, [[V4]]
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test69
+; CHECK-LE: vspltisw [[V1:[0-9]+]], 8
+; CHECK-LE: vadduwm [[V2:[0-9]+]], [[V1]], [[V1]]
+; CHECK-LE: vslw [[V3:[0-9]+]], {{[0-9]+}}, [[V2]]
+; CHECK-LE: vsraw {{[0-9]+}}, [[V3]], [[V2]]
+; CHECK-LE: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
+; CHECK-LE: xvcvsxwdp 34, [[V4]]
+; CHECK-LE: blr
 }
 
 define <2 x double> @test70(<2 x i8> %a) {
@@ -934,6 +1190,15 @@ define <2 x double> @test70(<2 x i8> %a) {
 ; CHECK: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
 ; CHECK: xvcvsxwdp 34, [[V4]]
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test70
+; CHECK-LE: vspltisw [[V1:[0-9]+]], 12
+; CHECK-LE: vadduwm [[V2:[0-9]+]], [[V1]], [[V1]]
+; CHECK-LE: vslw [[V3:[0-9]+]], {{[0-9]+}}, [[V2]]
+; CHECK-LE: vsraw {{[0-9]+}}, [[V3]], [[V2]]
+; CHECK-LE: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
+; CHECK-LE: xvcvsxwdp 34, [[V4]]
+; CHECK-LE: blr
 }
 
 define <2 x i32> @test80(i32 %v) {
@@ -960,6 +1225,16 @@ define <2 x i32> @test80(i32 %v) {
 ; CHECK-FISL-DAG: std [[R3]], -16(1)
 ; CHECK-FISL-DAG: lxvd2x 0, 0, [[R2]]
 ; CHECK-FISL: blr
+
+; CHECK-LE-LABEL: @test80
+; CHECK-LE-DAG: addi [[R1:[0-9]+]], 1, -16
+; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI
+; CHECK-LE-DAG: lxvd2x [[V1:[0-9]+]], 0, [[R1]]
+; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]]
+; CHECK-LE-DAG: xxswapd 34, [[V1]]
+; CHECK-LE-DAG: xxswapd 35, [[V2]]
+; CHECK-LE: vaddudm 2, 2, 3
+; CHECK-LE: blr
 }
 
 define <2 x double> @test81(<4 x float> %b) {
@@ -968,6 +1243,9 @@ define <2 x double> @test81(<4 x float> %b) {
 
 ; CHECK-LABEL: @test81
 ; CHECK: blr
+
+; CHECK-LE-LABEL: @test81
+; CHECK-LE: blr
 }
 
 define double @test82(double %a, double %b, double %c, double %d) {
@@ -983,4 +1261,8 @@ entry:
 ; CHECK-FISL-LABEL: @test82
 ; CHECK-FISL: xscmpudp [[REG:[0-9]+]], 3, 4
 ; CHECK-FISL: beq [[REG]], {{.*}}
+
+; CHECK-LE-LABEL: @test82
+; CHECK-LE: xscmpudp [[REG:[0-9]+]], 3, 4
+; CHECK-LE: beqlr [[REG]]
 }
diff --git a/test/CodeGen/Thumb2/float-ops.ll b/test/CodeGen/Thumb2/float-ops.ll
index 4c42908..c9f93f2 100644
--- a/test/CodeGen/Thumb2/float-ops.ll
+++ b/test/CodeGen/Thumb2/float-ops.ll
@@ -109,7 +109,7 @@ entry:
 define double @load_d(double* %a) {
 entry:
 ; CHECK-LABEL: load_d:
-; NONE: ldrd r0, r1, [r0]
+; NONE: ldm r0, {r0, r1}
 ; HARD: vldr d0, [r0]
   %0 = load double, double* %a, align 8
   ret double %0
diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch.ll b/test/CodeGen/WinEH/cppeh-prepared-catch.ll
index c7a829a..02cc682 100644
--- a/test/CodeGen/WinEH/cppeh-prepared-catch.ll
+++ b/test/CodeGen/WinEH/cppeh-prepared-catch.ll
@@ -61,7 +61,7 @@ entry:
   %.i8 = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?f@@YAXXZ" to i8*), i8* %1, i32 1)
   %2 = bitcast i8* %.i8 to double*
   %3 = bitcast double* %2 to i8*
-  invoke void (...) @llvm.donothing()
+  invoke void () @llvm.donothing()
           to label %done unwind label %lpad
 
 done:
@@ -201,7 +201,7 @@ declare void @llvm.frameescape(...) #3
 ; Function Attrs: nounwind readnone
 declare i8* @llvm.framerecover(i8*, i8*, i32) #2
 
-declare void @llvm.donothing(...)
+declare void @llvm.donothing()
 
 attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?f@@YAXXZ" }
 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/StackColoring.ll b/test/CodeGen/X86/StackColoring.ll
index 414ccf4..634f66a 100644
--- a/test/CodeGen/X86/StackColoring.ll
+++ b/test/CodeGen/X86/StackColoring.ll
@@ -1,9 +1,10 @@
-; RUN: llc -mcpu=corei7 -no-stack-coloring=false < %s | FileCheck %s --check-prefix=YESCOLOR
-; RUN: llc -mcpu=corei7 -no-stack-coloring=true  < %s | FileCheck %s --check-prefix=NOCOLOR
+; RUN: llc -mcpu=corei7 -no-stack-coloring=false < %s | FileCheck %s --check-prefix=YESCOLOR --check-prefix=CHECK
+; RUN: llc -mcpu=corei7 -no-stack-coloring=true  < %s | FileCheck %s --check-prefix=NOCOLOR --check-prefix=CHECK
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
+;CHECK-LABEL: myCall_w2:
 ;YESCOLOR: subq  $144, %rsp
 ;NOCOLOR: subq  $272, %rsp
 
@@ -28,6 +29,7 @@ entry:
 }
 
 
+;CHECK-LABEL: myCall2_no_merge
 ;YESCOLOR: subq  $272, %rsp
 ;NOCOLOR: subq  $272, %rsp
 
@@ -56,6 +58,7 @@ bb3:
   ret i32 0
 }
 
+;CHECK-LABEL: myCall2_w2
 ;YESCOLOR: subq  $144, %rsp
 ;NOCOLOR: subq  $272, %rsp
 
@@ -82,12 +85,11 @@ bb2:
 bb3:
   ret i32 0
 }
+
+;CHECK-LABEL: myCall_w4:
 ;YESCOLOR: subq  $200, %rsp
 ;NOCOLOR: subq  $408, %rsp
 
-
-
-
 define i32 @myCall_w4(i32 %in) {
 entry:
   %a1 = alloca [14 x i8*], align 8
@@ -119,6 +121,7 @@ entry:
   ret i32 %t7
 }
 
+;CHECK-LABEL: myCall2_w4:
 ;YESCOLOR: subq  $112, %rsp
 ;NOCOLOR: subq  $400, %rsp
 
@@ -158,6 +161,7 @@ bb3:
 }
 
 
+;CHECK-LABEL: myCall2_noend:
 ;YESCOLOR: subq  $144, %rsp
 ;NOCOLOR: subq  $272, %rsp
 
@@ -185,6 +189,7 @@ bb3:
   ret i32 0
 }
 
+;CHECK-LABEL: myCall2_noend2:
 ;YESCOLOR: subq  $144, %rsp
 ;NOCOLOR: subq  $272, %rsp
 define i32 @myCall2_noend2(i32 %in, i1 %d) {
@@ -211,6 +216,7 @@ bb3:
 }
 
 
+;CHECK-LABEL: myCall2_nostart:
 ;YESCOLOR: subq  $144, %rsp
 ;NOCOLOR: subq  $272, %rsp
 define i32 @myCall2_nostart(i32 %in, i1 %d) {
@@ -236,6 +242,7 @@ bb3:
 }
 
 ; Adopt the test from Transforms/Inline/array_merge.ll'
+;CHECK-LABEL: array_merge:
 ;YESCOLOR: subq  $816, %rsp
 ;NOCOLOR: subq  $1616, %rsp
 define void @array_merge() nounwind ssp {
@@ -261,6 +268,7 @@ entry:
   ret void
 }
 
+;CHECK-LABEL: func_phi_lifetime:
 ;YESCOLOR: subq  $272, %rsp
 ;NOCOLOR: subq  $272, %rsp
 define i32 @func_phi_lifetime(i32 %in, i1 %d) {
@@ -297,8 +305,7 @@ bb3:
 }
 
 
-;YESCOLOR-LABEL: multi_region_bb:
-;NOCOLOR-LABEL: multi_region_bb:
+;CHECK-LABEL: multi_region_bb:
 define void @multi_region_bb() nounwind ssp {
 entry:
   %A.i1 = alloca [100 x i32], align 4
@@ -323,10 +330,9 @@ entry:
   call void @llvm.lifetime.end(i64 -1, i8* %3) nounwind
   ret void
 }
-
-
 ;YESCOLOR: subq  $272, %rsp
 ;NOCOLOR: subq  $272, %rsp
+
 define i32 @myCall_end_before_begin(i32 %in, i1 %d) {
 entry:
   %a = alloca [17 x i8*], align 8
@@ -353,9 +359,8 @@ bb3:
 
 ; Regression test for PR15707.  %buf1 and %buf2 should not be merged
 ; in this test case.
-;YESCOLOR-LABEL: myCall_pr15707:
+;CHECK-LABEL: myCall_pr15707:
 ;YESCOLOR: subq $200008, %rsp
-;NOCOLOR-LABEL: myCall_pr15707:
 ;NOCOLOR: subq $200008, %rsp
 define void @myCall_pr15707() {
   %buf1 = alloca i8, i32 100000, align 16
@@ -374,8 +379,7 @@ define void @myCall_pr15707() {
 
 ; Check that we don't assert and crash even when there are allocas
 ; outside the declared lifetime regions.
-;YESCOLOR-LABEL: bad_range:
-;NOCOLOR-LABEL:  bad_range:
+;CHECK-LABEL: bad_range:
 define void @bad_range() nounwind ssp {
 entry:
   %A.i1 = alloca [100 x i32], align 4
@@ -400,8 +404,7 @@ block2:
 
 ; Check that we don't assert and crash even when there are usages
 ; of allocas which do not read or write outside the declared lifetime regions.
-;YESCOLOR-LABEL: shady_range:
-;NOCOLOR-LABEL:  shady_range:
+;CHECK-LABEL: shady_range:
 
 %struct.Klass = type { i32, i32 }
 
diff --git a/test/CodeGen/X86/asm-mismatched-types.ll b/test/CodeGen/X86/asm-mismatched-types.ll
new file mode 100644
index 0000000..97f9c08
--- /dev/null
+++ b/test/CodeGen/X86/asm-mismatched-types.ll
@@ -0,0 +1,135 @@
+; RUN: llc -o - %s -no-integrated-as | FileCheck %s
+target triple = "x86_64--"
+
+; Allow to specify any of the 8/16/32/64 register names interchangeably in
+; constraints
+
+; Produced by C-programs like this:
+; void foo(int p) { register int reg __asm__("r8") = p;
+; __asm__ __volatile__("# REG: %0" : : "r" (reg)); }
+
+; CHECK-LABEL: reg64_as_32:
+; CHECK: # REG: %r8d
+define void @reg64_as_32(i32 %p) {
+  call void asm sideeffect "# REG: $0", "{r8}"(i32 %p)
+  ret void
+}
+
+; CHECK-LABEL: reg64_as_32_float:
+; CHECK: # REG: %r8d
+define void @reg64_as_32_float(float %p) {
+  call void asm sideeffect "# REG: $0", "{r8}"(float %p)
+  ret void
+}
+
+; CHECK-LABEL: reg64_as_16:
+; CHECK: # REG: %r9w
+define void @reg64_as_16(i16 %p) {
+  call void asm sideeffect "# REG: $0", "{r9}"(i16 %p)
+  ret void
+}
+
+; CHECK-LABEL: reg64_as_8:
+; CHECK: # REG: %bpl
+define void @reg64_as_8(i8 %p) {
+  call void asm sideeffect "# REG: $0", "{rbp}"(i8 %p)
+  ret void
+}
+
+; CHECK-LABEL: reg32_as_16:
+; CHECK: # REG: %r15w
+define void @reg32_as_16(i16 %p) {
+  call void asm sideeffect "# REG: $0", "{r15d}"(i16 %p)
+  ret void
+}
+
+; CHECK-LABEL: reg32_as_8:
+; CHECK: # REG: %r12b
+define void @reg32_as_8(i8 %p) {
+  call void asm sideeffect "# REG: $0", "{r12d}"(i8 %p)
+  ret void
+}
+
+; CHECK-LABEL: reg16_as_8:
+; CHECK: # REG: %cl
+define void @reg16_as_8(i8 %p) {
+  call void asm sideeffect "# REG: $0", "{cx}"(i8 %p)
+  ret void
+}
+
+; CHECK-LABEL: reg32_as_64:
+; CHECK: # REG: %rbp
+define void @reg32_as_64(i64 %p) {
+  call void asm sideeffect "# REG: $0", "{ebp}"(i64 %p)
+  ret void
+}
+
+; CHECK-LABEL: reg32_as_64_float:
+; CHECK: # REG: %rbp
+define void @reg32_as_64_float(double %p) {
+  call void asm sideeffect "# REG: $0", "{ebp}"(double %p)
+  ret void
+}
+
+; CHECK-LABEL: reg16_as_64:
+; CHECK: # REG: %r13
+define void @reg16_as_64(i64 %p) {
+  call void asm sideeffect "# REG: $0", "{r13w}"(i64 %p)
+  ret void
+}
+
+; CHECK-LABEL: reg16_as_64_float:
+; CHECK: # REG: %r13
+define void @reg16_as_64_float(double %p) {
+  call void asm sideeffect "# REG: $0", "{r13w}"(double %p)
+  ret void
+}
+
+; CHECK-LABEL: reg8_as_64:
+; CHECK: # REG: %rax
+define void @reg8_as_64(i64 %p) {
+  call void asm sideeffect "# REG: $0", "{al}"(i64 %p)
+  ret void
+}
+
+; CHECK-LABEL: reg8_as_64_float:
+; CHECK: # REG: %rax
+define void @reg8_as_64_float(double %p) {
+  call void asm sideeffect "# REG: $0", "{al}"(double %p)
+  ret void
+}
+
+; CHECK-LABEL: reg16_as_32:
+; CHECK: # REG: %r11d
+define void @reg16_as_32(i32 %p) {
+  call void asm sideeffect "# REG: $0", "{r11w}"(i32 %p)
+  ret void
+}
+
+; CHECK-LABEL: reg16_as_32_float:
+; CHECK: # REG: %r11d
+define void @reg16_as_32_float(float %p) {
+  call void asm sideeffect "# REG: $0", "{r11w}"(float %p)
+  ret void
+}
+
+; CHECK-LABEL: reg8_as_32:
+; CHECK: # REG: %r9d
+define void @reg8_as_32(i32 %p) {
+  call void asm sideeffect "# REG: $0", "{r9b}"(i32 %p)
+  ret void
+}
+
+; CHECK-LABEL: reg8_as_32_float:
+; CHECK: # REG: %r9d
+define void @reg8_as_32_float(float %p) {
+  call void asm sideeffect "# REG: $0", "{r9b}"(float %p)
+  ret void
+}
+
+; CHECK-LABEL: reg8_as_16:
+; CHECK: # REG: %di
+define void @reg8_as_16(i16 %p) {
+  call void asm sideeffect "# REG: $0", "{dil}"(i16 %p)
+  ret void
+}
diff --git a/test/CodeGen/X86/asm-reject-reg-type-mismatch.ll b/test/CodeGen/X86/asm-reject-reg-type-mismatch.ll
index 016e2d2..c7e86f5 100644
--- a/test/CodeGen/X86/asm-reject-reg-type-mismatch.ll
+++ b/test/CodeGen/X86/asm-reject-reg-type-mismatch.ll
@@ -1,10 +1,8 @@
-; RUN: not llc -no-integrated-as %s -o - 2> %t1
-; RUN: FileCheck %s < %t1
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+; RUN: not llc -o /dev/null %s 2>&1 | FileCheck %s
 target triple = "x86_64--"
 
 ; CHECK: error: couldn't allocate output register for constraint '{ax}'
 define i128 @blup() {
-  %v = tail call i128 asm "", "={ax},0,~{dirflag},~{fpsr},~{flags}"(i128 0)
+  %v = tail call i128 asm "", "={ax},0"(i128 0)
   ret i128 %v
 }
diff --git a/test/CodeGen/X86/avx512-build-vector.ll b/test/CodeGen/X86/avx512-build-vector.ll
index e70d9f3..e5373c5 100644
--- a/test/CodeGen/X86/avx512-build-vector.ll
+++ b/test/CodeGen/X86/avx512-build-vector.ll
@@ -1,15 +1,5 @@
 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
 
-define <16 x i32> @test1(i32* %x) {
-; CHECK-LABEL: test1:
-; CHECK:    vmovd (%rdi), %xmm
-; CHECK:    vmovdqa32
-; CHECK:    vpermt2d %zmm
-   %y = load i32, i32* %x, align 4
-   %res = insertelement <16 x i32>zeroinitializer, i32 %y, i32 4
-   ret <16 x i32>%res
-}
-
 define <16 x i32> @test2(<16 x i32> %x) {
 ; CHECK-LABEL: test2:
 ; CHECK:       ## BB#0:
diff --git a/test/CodeGen/X86/avx512-fma-intrinsics.ll b/test/CodeGen/X86/avx512-fma-intrinsics.ll
index 9814a61..c30fc90 100644
--- a/test/CodeGen/X86/avx512-fma-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-fma-intrinsics.ll
@@ -1,422 +1,675 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s
 
-declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
-declare <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
-declare <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
-
-define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
-  ; CHECK-LABEL: test_x86_vfmsubpd_z
-  ; CHECK: vfmsub213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
-  ret <8 x double> %res
-}
-declare <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
-
-define <8 x double> @test_mask_vfmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsub_pd
-  ; CHECK: vfmsub213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
-  ret <8 x double> %res
-}
+declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
 
 define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_x86_vfnmadd_ps_z
   ; CHECK: vfnmadd213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
   ret <16 x float> %res
 }
-declare <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
+declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
 
 define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
   ; CHECK-LABEL: test_mask_vfnmadd_ps
   ; CHECK: vfnmadd213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
   ret <16 x float> %res
 }
 
 define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_x86_vfnmadd_pd_z
   ; CHECK: vfnmadd213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
   ret <8 x double> %res
 }
-declare <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
 
 define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfnmadd_pd
   ; CHECK: vfnmadd213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
   ret <8 x double> %res
 }
 
 define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_x86_vfnmsubps_z
   ; CHECK: vfnmsub213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
   ret <16 x float> %res
 }
-declare <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
+declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
 
 define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
   ; CHECK-LABEL: test_mask_vfnmsub_ps
   ; CHECK: vfnmsub213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
   ret <16 x float> %res
 }
 
 define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_x86_vfnmsubpd_z
   ; CHECK: vfnmsub213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
   ret <8 x double> %res
 }
-declare <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
 
 define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfnmsub_pd
   ; CHECK: vfnmsub213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
   ret <8 x double> %res
 }
 
 define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_x86_vfmaddsubps_z
   ; CHECK: vfmaddsub213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
 ; CHECK-LABEL: test_mask_fmaddsub_ps:
 ; CHECK: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa6,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
   ret <16 x float> %res
 }
 
-declare <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
+declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
 
 define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_x86_vfmaddsubpd_z
   ; CHECK: vfmaddsub213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
   ret <8 x double> %res
 }
-declare <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
 
 define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmaddsub_pd
   ; CHECK: vfmaddsub213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
-  ret <8 x double> %res
-}
-
-define <16 x float> @test_x86_vfmsubaddps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
-  ; CHECK-LABEL: test_x86_vfmsubaddps_z
-  ; CHECK: vfmsubadd213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
-  ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-define <16 x float> @test_mask_vfmsubadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsubadd_ps
-  ; CHECK: vfmsubadd213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
-  ret <16 x float> %res
-}
-
-define <8 x double> @test_x86_vfmsubaddpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
-  ; CHECK-LABEL: test_x86_vfmsubaddpd_z
-  ; CHECK: vfmsubadd213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
   ret <8 x double> %res
 }
-declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
 
-define <8 x double> @test_mask_vfmsubadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsubadd_pd
-  ; CHECK: vfmsubadd213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
-  ret <8 x double> %res
+define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
 }
 
 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne
   ; CHECK: vfmadd213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn
   ; CHECK: vfmadd213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp
   ; CHECK: vfmadd213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz
   ; CHECK: vfmadd213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current
   ; CHECK: vfmadd213ps  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa8,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne
   ; CHECK: vfmadd213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn
   ; CHECK: vfmadd213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp
   ; CHECK: vfmadd213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz
   ; CHECK: vfmadd213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current
   ; CHECK: vfmadd213ps  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
-  ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rne
-  ; CHECK: vfmsub213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xaa,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
-  ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtn
-  ; CHECK: vfmsub213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xaa,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
-  ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtp
-  ; CHECK: vfmsub213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xaa,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
-  ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtz
-  ; CHECK: vfmsub213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xaa,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
-  ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_current
-  ; CHECK: vfmsub213ps  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xaa,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
-  ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
-  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rne
-  ; CHECK: vfmsub213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xaa,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
-  ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
-  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtn
-  ; CHECK: vfmsub213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xaa,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
-  ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
-  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtp
-  ; CHECK: vfmsub213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xaa,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
-  ret <16 x float> %res
-}
-
-define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
-  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtz
-  ; CHECK: vfmsub213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xaa,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
   ret <16 x float> %res
 }
 
-define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
-  ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_current
-  ; CHECK: vfmsub213ps  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xaa,0xc2]
-  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
-  ret <16 x float> %res
+declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
 }
 
 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne
   ; CHECK: vfmadd213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn
   ; CHECK: vfmadd213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp
   ; CHECK: vfmadd213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz
   ; CHECK: vfmadd213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current
   ; CHECK: vfmadd213pd  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne
   ; CHECK: vfmadd213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn
   ; CHECK: vfmadd213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp
   ; CHECK: vfmadd213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz
   ; CHECK: vfmadd213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current
   ; CHECK: vfmadd213pd  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
   ret <8 x double> %res
 }
 
+define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
 
 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne
   ; CHECK: vfnmsub213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn
   ; CHECK: vfnmsub213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp
   ; CHECK: vfnmsub213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz
   ; CHECK: vfnmsub213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current
   ; CHECK: vfnmsub213pd  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xae,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne
   ; CHECK: vfnmsub213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn
   ; CHECK: vfnmsub213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp
   ; CHECK: vfnmsub213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz
   ; CHECK: vfnmsub213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
   ret <8 x double> %res
 }
 
 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current
   ; CHECK: vfnmsub213pd  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
-  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
   ret <8 x double> %res
 }
+
+define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
diff --git a/test/CodeGen/X86/avx512-fma.ll b/test/CodeGen/X86/avx512-fma.ll
index d6926e2..ed046de 100644
--- a/test/CodeGen/X86/avx512-fma.ll
+++ b/test/CodeGen/X86/avx512-fma.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=SKX
 
 ; CHECK-LABEL: test_x86_fmadd_ps_z
 ; CHECK: vfmadd213ps     %zmm2, %zmm1, %zmm0
@@ -58,26 +59,129 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8
   ret <8 x double> %res
 }
 
-define double @test_x86_fmsub_sd_z(double %a0, double %a1, double %a2) {
+define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
+; CHECK-LABEL: test_x86_fmsub_213:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %x = fmul double %a0, %a1
   %res = fsub double %x, %a2
   ret double %res
 }
 
-;CHECK-LABEL: test132_br
-;CHECK: vfmadd132ps  LCP{{.*}}(%rip){1to16}
-;CHECK: ret
-define <16 x float> @test132_br(<16 x float> %a1, <16 x float> %a2) nounwind {
+define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
+; CHECK-LABEL: test_x86_fmsub_213_m:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vfmsub213sd (%rdi), %xmm0, %xmm1
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %a2 = load double , double *%a2_ptr
+  %x = fmul double %a0, %a1
+  %res = fsub double %x, %a2
+  ret double %res
+}
+
+define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
+; CHECK-LABEL: test_x86_fmsub_231_m:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vfmsub231sd (%rdi), %xmm0, %xmm1
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %a2 = load double , double *%a2_ptr
+  %x = fmul double %a0, %a2
+  %res = fsub double %x, %a1
+  ret double %res
+}
+
+define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
+; CHECK-LABEL: test231_br:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
   %b2 = fadd <16 x float> %b1, %a2
   ret <16 x float> %b2
 }
 
-;CHECK-LABEL: test213_br
-;CHECK: vfmadd213ps  LCP{{.*}}(%rip){1to16}
-;CHECK: ret
 define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
+; CHECK-LABEL: test213_br:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %b1 = fmul <16 x float> %a1, %a2
   %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
   ret <16 x float> %b2
 }
+
+;mask (a*c+b , a)
+define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
+; CHECK-LABEL: test_x86_fmadd132_ps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsxbd %xmm2, %zmm2
+; CHECK-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
+; CHECK-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; CHECK-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT:    retq
+;
+; SKX-LABEL: test_x86_fmadd132_ps:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovb2m %xmm2, %k1
+; SKX-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
+; SKX-NEXT:    retq
+  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
+  %x = fmul <16 x float> %a0, %a2
+  %y = fadd <16 x float> %x, %a1
+  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
+  ret <16 x float> %res
+}
+
+;mask (a*c+b , b)
+define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
+; CHECK-LABEL: test_x86_fmadd231_ps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsxbd %xmm2, %zmm2
+; CHECK-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
+; CHECK-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; CHECK-NEXT:    vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+;
+; SKX-LABEL: test_x86_fmadd231_ps:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovb2m %xmm2, %k1
+; SKX-NEXT:    vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    retq
+  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
+  %x = fmul <16 x float> %a0, %a2
+  %y = fadd <16 x float> %x, %a1
+  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
+  ret <16 x float> %res
+}
+
+;mask (b*a+c , b)
+define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
+; CHECK-LABEL: test_x86_fmadd213_ps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsxbd %xmm2, %zmm2
+; CHECK-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
+; CHECK-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; CHECK-NEXT:    vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+;
+; SKX-LABEL: test_x86_fmadd213_ps:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovb2m %xmm2, %k1
+; SKX-NEXT:    vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    retq
+  %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
+  %x = fmul <16 x float> %a1, %a0
+  %y = fadd <16 x float> %x, %a2
+  %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
+  ret <16 x float> %res
+}
+
diff --git a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
index 0e32a1c..3fca5a8 100644
--- a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
+++ b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
 
 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
@@ -10,52 +10,60 @@ declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>,
 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32)
 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
 
-;CHECK-LABEL: gather_mask_dps
-;CHECK: kmovw
-;CHECK: vgatherdps
-;CHECK: vpadd
-;CHECK: vscatterdps
-;CHECK: ret
 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf)  {
+; CHECK-LABEL: gather_mask_dps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
+; CHECK-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
+; CHECK-NEXT:    retq
   %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
   %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
   ret void
 }
 
-;CHECK-LABEL: gather_mask_dpd
-;CHECK: kmovw
-;CHECK: vgatherdpd
-;CHECK: vpadd
-;CHECK: vscatterdpd
-;CHECK: ret
 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+; CHECK-LABEL: gather_mask_dpd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
+; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT:    vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
+; CHECK-NEXT:    retq
   %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
   %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
   ret void
 }
 
-;CHECK-LABEL: gather_mask_qps
-;CHECK: kmovw
-;CHECK: vgatherqps
-;CHECK: vpadd
-;CHECK: vscatterqps
-;CHECK: ret
 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+; CHECK-LABEL: gather_mask_qps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
+; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
+; CHECK-NEXT:    retq
   %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
   call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
   ret void
 }
 
-;CHECK-LABEL: gather_mask_qpd
-;CHECK: kmovw
-;CHECK: vgatherqpd
-;CHECK: vpadd
-;CHECK: vscatterqpd
-;CHECK: ret
 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+; CHECK-LABEL: gather_mask_qpd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
+; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
+; CHECK-NEXT:    retq
   %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
   call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
@@ -74,162 +82,710 @@ declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i3
 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32)
 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
 
-;CHECK-LABEL: gather_mask_dd
-;CHECK: kmovw
-;CHECK: vpgatherdd
-;CHECK: vpadd
-;CHECK: vpscatterdd
-;CHECK: ret
 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf)  {
+; CHECK-LABEL: gather_mask_dd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
+; CHECK-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
+; CHECK-NEXT:    retq
   %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
   %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
   ret void
 }
 
-;CHECK-LABEL: gather_mask_qd
-;CHECK: kmovw
-;CHECK: vpgatherqd
-;CHECK: vpadd
-;CHECK: vpscatterqd
-;CHECK: ret
 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+; CHECK-LABEL: gather_mask_qd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
+; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
+; CHECK-NEXT:    retq
   %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
   call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
   ret void
 }
 
-;CHECK-LABEL: gather_mask_qq
-;CHECK: kmovw
-;CHECK: vpgatherqq
-;CHECK: vpadd
-;CHECK: vpscatterqq
-;CHECK: ret
 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+; CHECK-LABEL: gather_mask_qq:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
+; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
+; CHECK-NEXT:    retq
   %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
   call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
   ret void
 }
 
-;CHECK-LABEL: gather_mask_dq
-;CHECK: kmovw
-;CHECK: vpgatherdq
-;CHECK: vpadd
-;CHECK: vpscatterdq
-;CHECK: ret
 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf)  {
+; CHECK-LABEL: gather_mask_dq:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
+; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT:    vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
+; CHECK-NEXT:    retq
   %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
   %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
   ret void
 }
 
-
-;CHECK-LABEL: gather_mask_dpd_execdomain
-;CHECK: vgatherdpd
-;CHECK: vmovapd
-;CHECK: ret
 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf)  {
+; CHECK-LABEL: gather_mask_dpd_execdomain:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1}
+; CHECK-NEXT:    vmovapd %zmm1, (%rdx)
+; CHECK-NEXT:    retq
   %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
   store <8 x double> %x, <8 x double>* %stbuf
   ret void
 }
 
-;CHECK-LABEL: gather_mask_qpd_execdomain
-;CHECK: vgatherqpd
-;CHECK: vmovapd
-;CHECK: ret
 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf)  {
+; CHECK-LABEL: gather_mask_qpd_execdomain:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1}
+; CHECK-NEXT:    vmovapd %zmm1, (%rdx)
+; CHECK-NEXT:    retq
   %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
   store <8 x double> %x, <8 x double>* %stbuf
   ret void
 }
 
-;CHECK-LABEL: gather_mask_dps_execdomain
-;CHECK: vgatherdps
-;CHECK: vmovaps 
-;CHECK: ret
 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base)  {
+; CHECK-LABEL: gather_mask_dps_execdomain:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
   ret <16 x float> %res;
 }
 
-;CHECK-LABEL: gather_mask_qps_execdomain
-;CHECK: vgatherqps
-;CHECK: vmovaps
-;CHECK: ret
 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base)  {
+; CHECK-LABEL: gather_mask_qps_execdomain:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
   ret <8 x float> %res;
 }
 
-;CHECK-LABEL: scatter_mask_dpd_execdomain
-;CHECK: vmovapd
-;CHECK: vscatterdpd
-;CHECK: ret
 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
-  %x = load <8 x double>, <8 x double>* %src, align 64 
+; CHECK-LABEL: scatter_mask_dpd_execdomain:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovapd (%rdi), %zmm1
+; CHECK-NEXT:    vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
+; CHECK-NEXT:    retq
+  %x = load <8 x double>, <8 x double>* %src, align 64
   call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
   ret void
 }
 
-;CHECK-LABEL: scatter_mask_qpd_execdomain
-;CHECK: vmovapd
-;CHECK: vscatterqpd
-;CHECK: ret
 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
+; CHECK-LABEL: scatter_mask_qpd_execdomain:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovapd (%rdi), %zmm1
+; CHECK-NEXT:    vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
+; CHECK-NEXT:    retq
   %x = load <8 x double>, <8 x double>* %src, align 64
   call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
   ret void
 }
 
-;CHECK-LABEL: scatter_mask_dps_execdomain
-;CHECK: vmovaps
-;CHECK: vscatterdps
-;CHECK: ret
 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf)  {
+; CHECK-LABEL: scatter_mask_dps_execdomain:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps (%rdi), %zmm1
+; CHECK-NEXT:    vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
+; CHECK-NEXT:    retq
   %x = load <16 x float>, <16 x float>* %src, align 64
   call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
   ret void
 }
 
-;CHECK-LABEL: scatter_mask_qps_execdomain
-;CHECK: vmovaps
-;CHECK: vscatterqps
-;CHECK: ret
 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
-  %x = load <8 x float>, <8 x float>* %src, align 32 
+; CHECK-LABEL: scatter_mask_qps_execdomain:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovaps (%rdi), %ymm1
+; CHECK-NEXT:    vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
+; CHECK-NEXT:    retq
+  %x = load <8 x float>, <8 x float>* %src, align 32
   call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
   ret void
 }
 
-;CHECK-LABEL: gather_qps
-;CHECK: kxnorw
-;CHECK: vgatherqps
-;CHECK: vpadd
-;CHECK: vscatterqps
-;CHECK: ret
 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf)  {
+; CHECK-LABEL: gather_qps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    kxnorw %k2, %k2, %k2
+; CHECK-NEXT:    vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
+; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
+; CHECK-NEXT:    retq
   %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4)
   %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
   call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
   ret void
 }
 
-;CHECK-LABEL: prefetch
-;CHECK: gatherpf0
-;CHECK: gatherpf1
-;CHECK: scatterpf0
-;CHECK: scatterpf1
-;CHECK: ret
 declare  void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
 declare  void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
 define void @prefetch(<8 x i64> %ind, i8* %base) {
+; CHECK-LABEL: prefetch:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vgatherpf0qps (%rdi,%zmm0,4) {%k1}
+; CHECK-NEXT:    vgatherpf1qps (%rdi,%zmm0,4) {%k1}
+; CHECK-NEXT:    vscatterpf0qps (%rdi,%zmm0,2) {%k1}
+; CHECK-NEXT:    vscatterpf1qps (%rdi,%zmm0,2) {%k1}
+; CHECK-NEXT:    retq
   call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
   call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1)
   call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
   call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1)
   ret void
 }
+
+
+declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
+
+define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3div2_df:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm2
+; CHECK-NEXT:    vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vgatherqpd (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
+  %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32)
+
+define <4 x i32>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3div2_di:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1}
+; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
+  %res1 = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32)
+
+define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3div4_df:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm2
+; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1,0), %ymm0 {%k1}
+; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
+  %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3div4_di:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm2
+; CHECK-NEXT:    vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
+; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 8)
+  %res1 = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 8)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm2
+; CHECK-NEXT:    vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vgatherqps (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
+  %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32)
+
+define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3div4_si:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    kxnorw %k2, %k2, %k2
+; CHECK-NEXT:    vmovaps %zmm0, %zmm2
+; CHECK-NEXT:    vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2}
+; CHECK-NEXT:    vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1}
+; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 4)
+  %res1 = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm2
+; CHECK-NEXT:    vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vgatherqps (%rdi,%ymm1,0), %xmm0 {%k1}
+; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
+  %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32)
+
+define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3div8_si:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm2
+; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
+; CHECK-NEXT:    vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1}
+; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
+  %res1 = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 2)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32)
+
+define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm2
+; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
+  %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32)
+
+define <4 x i32>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
+; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
+  %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32)
+
+define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm2
+; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,0), %ymm0 {%k1}
+; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
+  %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1}
+; CHECK-NEXT:    vpaddd %ymm0, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
+  %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm2
+; CHECK-NEXT:    vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vgatherdps (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
+  %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32)
+
+define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    kxnorw %k2, %k2, %k2
+; CHECK-NEXT:    vmovaps %zmm0, %zmm2
+; CHECK-NEXT:    vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2}
+; CHECK-NEXT:    vpgatherdd (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4)
+  %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 0)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32)
+
+define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm2
+; CHECK-NEXT:    vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vgatherdps (%rdi,%ymm1,0), %ymm0 {%k1}
+; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 0)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm2
+; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
+; CHECK-NEXT:    vpgatherdd (%rdi,%ymm1,0), %ymm0 {%k1}
+; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 0)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32)
+
+define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    kxnorw %k2, %k2, %k2
+; CHECK-NEXT:    vscatterqpd %xmm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT:    vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32)
+
+define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vpscatterqq %xmm1, (%rdi,%xmm0,0) {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32)
+
+define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0,0) {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32)
+
+define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vpscatterqq %ymm1, (%rdi,%ymm0,0) {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32)
+
+define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%xmm0,0) {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32)
+
+define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    kxnorw %k2, %k2, %k2
+; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32)
+
+define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%ymm0,0) {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32)
+
+define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%ymm0,0) {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32)
+
+define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    kxnorw %k2, %k2, %k2
+; CHECK-NEXT:    vscatterdpd %xmm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT:    vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32)
+
+define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    kxnorw %k2, %k2, %k2
+; CHECK-NEXT:    vpscatterdq %xmm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT:    vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32)
+
+define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vscatterdpd %ymm1, (%rdi,%xmm0,0) {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32)
+
+define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    kxnorw %k2, %k2, %k2
+; CHECK-NEXT:    vpscatterdq %ymm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT:    vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32)
+
+define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vscatterdps %xmm1, (%rdi,%xmm0,0) {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32)
+
+define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,0) {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32)
+
+define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vscatterdps %ymm1, (%rdi,%ymm0,0) {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4)
+  ret void
+}
+
+declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32)
+
+define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %esi, %k1
+; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,0) {%k1}
+; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4)
+  ret void
+}
+
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index a06cada..b9f490b 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -489,19 +489,31 @@ declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>
  }
  declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
 
- define <16 x i32> @test_pabsd(<16 x i32> %a) {
- ;CHECK: vpabsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0]
- %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a, <16 x i32>zeroinitializer, i16 -1)
- ret < 16 x i32> %res
- }
  declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
 
- define <8 x i64> @test_pabsq(<8 x i64> %a) {
- ;CHECK: vpabsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0]
- %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a, <8 x i64>zeroinitializer, i8 -1)
- ret <8 x i64> %res
- }
- declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsd{{.*}}{%k1} 
+define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
+  %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsq{{.*}}{%k1} 
+define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
 
 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
   ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
@@ -3013,3 +3025,146 @@ define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %
   %res2 = add <8 x i64> %res, %res1
   ret <8 x i64> %res2
 }
+
+declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermi2d {{.*}}{%k1} 
+define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+  %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK:  vpermi2pd {{.*}}{%k1} 
+define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
+  %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermi2ps {{.*}}{%k1} 
+define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
+  %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermi2q {{.*}}{%k1} 
+define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+  %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2d {{.*}}{%k1} {z}
+define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+  %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2pd {{.*}}{%k1} {z}
+define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
+  %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
+  %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2ps {{.*}}{%k1} {z}
+define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
+  %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
+  %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+
+declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2q {{.*}}{%k1} {z}
+define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+  %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+  %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2d {{.*}}{%k1}
+; CHECK-NOT: {z}
+define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+  %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vscalefpd{{.*}}{%k1} 
+define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
+  %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vscalefps{{.*}}{%k1} 
+define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
+  %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
diff --git a/test/CodeGen/X86/avx512-shuffle.ll b/test/CodeGen/X86/avx512-shuffle.ll
deleted file mode 100644
index 7e9eda5..0000000
--- a/test/CodeGen/X86/avx512-shuffle.ll
+++ /dev/null
@@ -1,392 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK-SKX
-
-; CHECK-LABEL: test1:
-; CHECK: vpermps
-; CHECK: ret
-define <16 x float> @test1(<16 x float> %a) nounwind {
-  %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
-  ret <16 x float> %c
-}
-
-; CHECK-LABEL: test2:
-; CHECK: vpermd
-; CHECK: ret
-define <16 x i32> @test2(<16 x i32> %a) nounwind {
-  %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
-  ret <16 x i32> %c
-}
-
-; CHECK-LABEL: test3:
-; CHECK: vpermq
-; CHECK: ret
-define <8 x i64> @test3(<8 x i64> %a) nounwind {
-  %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 5, i32 1, i32 undef, i32 7, i32 undef, i32 3, i32 1>
-  ret <8 x i64> %c
-}
-
-; CHECK-LABEL: test4:
-; CHECK: vpermpd
-; CHECK: ret
-define <8 x double> @test4(<8 x double> %a) nounwind {
-  %c = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  ret <8 x double> %c
-}
-
-; CHECK-LABEL: test5:
-; CHECK: vpermt2pd
-; CHECK: ret
-define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind {
-  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
-  ret <8 x double> %c
-}
-
-; CHECK-LABEL: test6:
-; CHECK: vpermq $30
-; CHECK: ret
-define <8 x i64> @test6(<8 x i64> %a) nounwind {
-  %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
-  ret <8 x i64> %c
-}
-
-; CHECK-LABEL: test7:
-; CHECK: vpermt2q
-; CHECK: ret
-define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
-  %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
-  ret <8 x i64> %c
-}
-
-; CHECK-LABEL: test8:
-; CHECK: vpermt2d
-; CHECK: ret
-define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
-  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
-  ret <16 x i32> %c
-}
-
-; CHECK-LABEL: test9:
-; CHECK: vpermt2ps
-; CHECK: ret
-define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
-  %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
-  ret <16 x float> %c
-}
-
-; CHECK-LABEL: test10:
-; CHECK: vpermt2ps (
-; CHECK: ret
-define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
-  %c = load <16 x float>, <16 x float>* %b
-  %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
-  ret <16 x float> %d
-}
-
-; CHECK-LABEL: test11:
-; CHECK: vpermt2d 
-; CHECK: ret
-define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind {
-  %c = load <16 x i32>, <16 x i32>* %b
-  %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
-  ret <16 x i32> %d
-}
-
-; CHECK-LABEL: test13
-; CHECK: vpermilps $177, %zmm
-; CHECK: ret
-define <16 x float> @test13(<16 x float> %a) {
- %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
- ret <16 x float> %b
-}
-
-; CHECK-LABEL: test14
-; CHECK: vpermilpd $203, %zmm
-; CHECK: ret
-define <8 x double> @test14(<8 x double> %a) {
- %b = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32><i32 1, i32 1, i32 2, i32 3, i32 4, i32 4, i32 7, i32 7>
- ret <8 x double> %b
-}
-
-; CHECK-LABEL: test15
-; CHECK: vpshufd $177, %zmm
-; CHECK: ret
-define <16 x i32> @test15(<16 x i32> %a) {
-; mask 1-0-3-2 = 10110001 = 0xb1 = 177
- %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
- ret <16 x i32> %b
-}
-; CHECK-LABEL: test16
-; CHECK: valignq $3, %zmm0, %zmm1
-; CHECK: ret
-define <8 x double> @test16(<8 x double> %a, <8 x double> %b) nounwind {
-  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
-  ret <8 x double> %c
-}
-
-; CHECK-LABEL: test17
-; CHECK: vshufpd $19, %zmm1, %zmm0
-; CHECK: ret
-define <8 x double> @test17(<8 x double> %a, <8 x double> %b) nounwind {
-  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 9, i32 2, i32 10, i32 5, i32 undef, i32 undef, i32 undef>
-  ret <8 x double> %c
-}
-
-; CHECK-LABEL: test18
-; CHECK: vpunpckhdq %zmm
-; CHECK: ret
-define <16 x i32> @test18(<16 x i32> %a, <16 x i32> %c) {
- %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- ret <16 x i32> %b
-}
-
-; CHECK-LABEL: test19
-; CHECK: vpunpckldq %zmm
-; CHECK: ret
-define <16 x i32> @test19(<16 x i32> %a, <16 x i32> %c) {
- %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- ret <16 x i32> %b
-}
-
-; CHECK-LABEL: test20
-; CHECK: vpunpckhqdq  %zmm
-; CHECK: ret
-define <8 x i64> @test20(<8 x i64> %a, <8 x i64> %c) {
- %b = shufflevector <8 x i64> %a, <8 x i64> %c, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- ret <8 x i64> %b
-}
-
-; CHECK-LABEL: test21
-; CHECK: vbroadcastsd  %xmm0, %zmm
-; CHECK: ret
-define <8 x double> @test21(<8 x double> %a, <8 x double> %b) {
-  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-  ret <8 x double> %shuffle
-}
-
-; CHECK-LABEL: test22
-; CHECK: vpbroadcastq  %xmm0, %zmm
-; CHECK: ret
-define <8 x i64> @test22(<8 x i64> %a, <8 x i64> %b) {
-  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-  ret <8 x i64> %shuffle
-}
-
-; CHECK-LABEL: @test23
-; CHECK: vshufps
-; CHECK: vshufps
-; CHECK: ret
-define <16 x i32> @test23(<16 x i32> %a, <16 x i32> %b) nounwind {
-  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  ret <16 x i32> %c
-}
-
-; CHECK-LABEL: @test24
-; CHECK: vpermt2d
-; CHECK: ret
-define <16 x i32> @test24(<16 x i32> %a, <16 x i32> %b) nounwind {
-  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 25, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  ret <16 x i32> %c
-}
-
-; CHECK-LABEL: @test25
-; CHECK: vshufps  $52
-; CHECK: ret
-define <16 x i32> @test25(<16 x i32> %a, <16 x i32> %b) nounwind {
-; mask - 0-1-3-0 00110100 = 0x34 = 52
-  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 19, i32 16, i32 4, i32 5, i32 23, i32 undef, i32 8, i32 9, i32 27, i32 undef, i32 12, i32 13, i32 undef, i32 undef>
-  ret <16 x i32> %c
-}
-
-; CHECK-LABEL: @test26
-; CHECK: vmovshdup
-; CHECK: ret
-define <16 x i32> @test26(<16 x i32> %a) nounwind {
-  %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 undef, i32 9, i32 9, i32 undef, i32 11, i32 13, i32 undef, i32 undef, i32 undef>
-  ret <16 x i32> %c
-}
-
-; CHECK-LABEL: @test27
-; CHECK: ret
-define <16 x i32> @test27(<4 x i32>%a) {
- %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
- ret <16 x i32> %res
-}
-
-; CHECK-LABEL: test28
-; CHECK: vpshufhw $177, %ymm
-; CHECK: ret
-define <16 x i16> @test28(<16 x i16> %a) {
- %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32><i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 12, i32 15, i32 14>
- ret <16 x i16> %b
-}
-
-; CHECK-LABEL: test29
-; CHECK: vunpcklps %zmm
-; CHECK: ret
-define <16 x float> @test29(<16 x float> %a, <16 x float> %c) {
- %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- ret <16 x float> %b
-}
-
-; CHECK-LABEL: @test30
-; CHECK: vshufps $144, %zmm
-; CHECK: ret
-define <16 x float> @test30(<16 x float> %a, <16 x float> %c) {
- %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 0, i32 17, i32 18, i32 4, i32 4, i32 21, i32 22, i32 8, i32 8, i32 25, i32 26, i32 12, i32 12, i32 29, i32 30>
- ret <16 x float> %b
-}
-
-; CHECK-LABEL: test31
-; CHECK: valignd $3, %zmm0, %zmm1
-; CHECK: ret
-define <16 x i32> @test31(<16 x i32> %a, <16 x i32> %b) nounwind {
-  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-  ret <16 x i32> %c
-}
-
-; CHECK-LABEL: test32
-; CHECK: vshufpd $99, %zmm0, %zmm1
-; CHECK: ret
-define <8 x double> @test32(<8 x double> %a, <8 x double> %b) nounwind {
-  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 1, i32 10, i32 2, i32 undef, i32 5, i32 15, i32 undef>
-  ret <8 x double> %c
-}
-
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
-define <8 x double> @test_vshuff64x2_512(<8 x double> %x, <8 x double> %x1) nounwind {
-; CHECK-LABEL: test_vshuff64x2_512:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vshuff64x2 $136, %zmm0, %zmm0, %zmm0
-; CHECK-NEXT:    retq
-  %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 0, i32 1,  i32 4, i32 5>
-  ret <8 x double> %res
-}
-
-define <8 x double> @test_vshuff64x2_512_mask(<8 x double> %x, <8 x double> %x1, <8 x i1> %mask) nounwind {
-; CHECK-LABEL: test_vshuff64x2_512_mask:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsxwq %xmm2, %zmm1
-; CHECK-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
-; CHECK-NEXT:    vptestmq %zmm1, %zmm1, %k1
-; CHECK-NEXT:    vshuff64x2 $136, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; CHECK-NEXT:    retq
-  %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 0, i32 1,  i32 4, i32 5>
-  %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
-  ret <8 x double> %res
-}
-
-define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> %mask) nounwind {
-; CHECK-LABEL: test_vshufi64x2_512_mask:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsxwq %xmm2, %zmm1
-; CHECK-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
-; CHECK-NEXT:    vptestmq %zmm1, %zmm1, %k1
-; CHECK-NEXT:    vshufi64x2 $168, %zmm0, %zmm0, %zmm0 {%k1}
-; CHECK-NEXT:    retq
-  %y = shufflevector <8 x i64> %x, <8 x i64> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5,  i32 4, i32 5>
-  %res = select <8 x i1> %mask, <8 x i64> %y, <8 x i64> %x
-  ret <8 x i64> %res
-}
-
-define <8 x double> @test_vshuff64x2_512_mem(<8 x double> %x, <8 x double> *%ptr) nounwind {
-; CHECK-LABEL: test_vshuff64x2_512_mem:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vshuff64x2 $40, %zmm0, %zmm0, %zmm0
-; CHECK-NEXT:    retq
-  %x1   = load <8 x double>,<8 x double> *%ptr,align 1
-  %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5,  i32 0, i32 1>
-  ret <8 x double> %res
-}
-
-define <16 x float> @test_vshuff32x4_512_mem(<16 x float> %x, <16 x float> *%ptr) nounwind {
-; CHECK-LABEL: test_vshuff32x4_512_mem:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vshuff64x2 $20, %zmm0, %zmm0, %zmm0
-; CHECK-NEXT:    retq
-  %x1   = load <16 x float>,<16 x float> *%ptr,align 1
-  %res = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-  ret <16 x float> %res
-}
-
-define <16 x i32> @test_align_v16i32_rr(<16 x i32> %a, <16 x i32> %b) nounwind {
-; CHECK-LABEL: test_align_v16i32_rr:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    valignd $3, %zmm0, %zmm1, %zmm0
-; CHECK-NEXT:    retq
-  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-  ret <16 x i32> %c
-}
-
-define <16 x i32> @test_align_v16i32_rm(<16 x i32>* %a.ptr, <16 x i32> %b) nounwind {
-; CHECK-LABEL: test_align_v16i32_rm:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    valignd $3, (%rdi), %zmm0, %zmm0
-; CHECK-NEXT:    retq
-  %a = load <16 x i32>, <16 x i32>* %a.ptr
-  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-  ret <16 x i32> %c
-}
-
-define <16 x i32> @test_align_v16i32_rm_mask(<16 x i32>* %a.ptr, <16 x i32> %b, <16 x i1> %mask) nounwind {
-; CHECK-LABEL: test_align_v16i32_rm_mask:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsxbd %xmm1, %zmm1
-; CHECK-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm1, %zmm1
-; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k1
-; CHECK-NEXT:    vmovdqa32 (%rdi), %zmm1
-; CHECK-NEXT:    valignd $3, %zmm1, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT:    vmovaps %zmm1, %zmm0
-; CHECK-NEXT:    retq
-;
-; CHECK-SKX-LABEL: test_align_v16i32_rm_mask:
-; CHECK-SKX:       ## BB#0:
-; CHECK-SKX-NEXT:    vpmovb2m %xmm1, %k1
-; CHECK-SKX-NEXT:    vmovdqa32 (%rdi), %zmm1
-; CHECK-SKX-NEXT:    valignd $3, %zmm1, %zmm0, %zmm1 {%k1}
-; CHECK-SKX-NEXT:    vmovaps %zmm1, %zmm0
-; CHECK-SKX-NEXT:    retq
-  %a = load <16 x i32>, <16 x i32>* %a.ptr
-  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
-  %res = select <16 x i1> %mask,<16 x i32> %c, <16 x i32> %a
-  ret <16 x i32> %res
-}
-
-define <8 x double> @test_align_v8f64_rr(<8 x double> %a, <8 x double> %b) nounwind {
-; CHECK-LABEL: test_align_v8f64_rr:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    valignq $3, %zmm0, %zmm1, %zmm0
-; CHECK-NEXT:    retq
-  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
-  ret <8 x double> %c
-}
-
-define <8 x double> @test_align_v18f64_rm(<8 x double>* %a.ptr, <8 x double> %b) nounwind {
-; CHECK-LABEL: test_align_v18f64_rm:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    valignq $3, (%rdi), %zmm0, %zmm0
-; CHECK-NEXT:    retq
-  %a = load <8 x double>, <8 x double>* %a.ptr
-  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
-  ret <8 x double> %c
-}
-
-define <8 x double> @test_align_v18f64_rm_mask(<8 x double>* %a.ptr, <8 x double> %b, <8 x i1> %mask) nounwind {
-; CHECK-LABEL: test_align_v18f64_rm_mask:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsxwq %xmm1, %zmm1
-; CHECK-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
-; CHECK-NEXT:    vptestmq %zmm1, %zmm1, %k1
-; CHECK-NEXT:    valignq $3, (%rdi), %zmm0, %zmm0 {%k1} {z}
-; CHECK-NEXT:    retq
-;
-; CHECK-SKX-LABEL: test_align_v18f64_rm_mask:
-; CHECK-SKX:       ## BB#0:
-; CHECK-SKX-NEXT:    vpmovw2m %xmm1, %k1
-; CHECK-SKX-NEXT:    valignq $3, (%rdi), %zmm0, %zmm0 {%k1} {z}
-; CHECK-SKX-NEXT:    retq
-  %a = load <8 x double>, <8 x double>* %a.ptr
-  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
-  %res = select <8 x i1> %mask,<8 x double> %c, <8 x double> zeroinitializer
-  ret <8 x double> %res
-}
-
diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll
index 9ee0e09..9574c01 100644
--- a/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -893,6 +893,45 @@ define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16
   ret <32 x i16> %res2
 }
 
+declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2w %zmm{{.*}}{%k1} 
+define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+  %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
+  %res2 = add <32 x i16> %res, %res1
+  ret <32 x i16> %res2
+}
+
+declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2w %zmm{{.*}}{%k1} {z}
+define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+  %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+  %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
+  %res2 = add <32 x i16> %res, %res1
+  ret <32 x i16> %res2
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermi2w %zmm{{.*}}{%k1} 
+define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+  %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
+  %res2 = add <32 x i16> %res, %res1
+  ret <32 x i16> %res2
+}
+
 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_512
@@ -918,3 +957,43 @@ define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16>
   %res2 = add <32 x i16> %res, %res1
   ret <32 x i16> %res2
 }
+
+declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpshufb %zmm{{.*}}{%k1} 
+define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+  %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
+  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
+  %res2 = add <64 x i8> %res, %res1
+  ret <64 x i8> %res2
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsw{{.*}}{%k1} 
+define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
+  %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
+  %res2 = add <32 x i16> %res, %res1
+  ret <32 x i16> %res2
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsb{{.*}}{%k1} 
+define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
+  %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
+  %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
+  %res2 = add <64 x i8> %res, %res1
+  ret <64 x i8> %res2
+}
+
diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index cf8c32a..0119d39 100644
--- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -612,248 +612,925 @@ define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
 
 declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
 
-declare <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
 
 define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmadd256_ps
   ; CHECK: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xc2]
-  %res = call <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
+  %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
   ret <8 x float> %res
 }
 
-declare <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
 
 define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmadd128_ps
   ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   ret <4 x float> %res
 }
 
-declare <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+declare <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
 
 define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
 ; CHECK-LABEL: test_mask_fmadd256_pd:
 ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
-  %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
+  %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
   ret <4 x double> %res
 }
 
-declare <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+declare <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
 
 define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
 ; CHECK-LABEL: test_mask_fmadd128_pd:
 ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
-  %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
+  %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
   ret <2 x double> %res
 }
 
-declare <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
-
-define <8 x float> @test_mask_vfmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsub256_ps
-  ; CHECK: vfmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xaa,0xc2]
-  %res = call <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
-  ret <8 x float> %res
-}
-
-declare <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
-
-define <4 x float> @test_mask_vfmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsub128_ps
-  ; CHECK: vfmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaa,0xc2]
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
-  ret <4 x float> %res
-}
-
-declare <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
-
-define <4 x double> @test_mask_vfmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsub256_pd
-  ; CHECK: vfmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xaa,0xc2]
-  %res = call <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
-  ret <4 x double> %res
-}
-
-declare <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
-
-define <2 x double> @test_mask_vfmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsub128_pd
-  ; CHECK: vfmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaa,0xc2]
-  %res = call <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
-  ret <2 x double> %res
-}
-
-declare <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+define <2 x double>@test_int_x86_avx512_mask_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmadd231pd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+define <4 x double>@test_int_x86_avx512_mask_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmadd231pd %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+define <4 x float>@test_int_x86_avx512_mask_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmadd231ps %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+define <8 x float>@test_int_x86_avx512_mask_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmadd231ps %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+
+declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmsub231pd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+
+declare <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmsub231pd %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmsub231ps %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmsub231ps %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
 
 define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfnmadd256_ps
   ; CHECK: vfnmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xc2]
-  %res = call <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
+  %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
   ret <8 x float> %res
 }
 
-declare <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+declare <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
 
 define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfnmadd128_ps
   ; CHECK: vfnmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xc2]
-  %res = call <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   ret <4 x float> %res
 }
 
-declare <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+declare <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
 
 define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfnmadd256_pd
   ; CHECK: vfnmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xc2]
-  %res = call <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+  %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   ret <4 x double> %res
 }
 
-declare <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+declare <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
 
 define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfnmadd128_pd
   ; CHECK: vfnmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xc2]
-  %res = call <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+  %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   ret <2 x double> %res
 }
 
-declare <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+declare <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
 
 define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfnmsub256_ps
   ; CHECK: vfnmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xc2]
-  %res = call <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
+  %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
   ret <8 x float> %res
 }
 
-declare <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+declare <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
 
 define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfnmsub128_ps
   ; CHECK: vfnmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xc2]
-  %res = call <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   ret <4 x float> %res
 }
 
-declare <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+declare <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
 
 define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfnmsub256_pd
   ; CHECK: vfnmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xc2]
-  %res = call <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+  %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   ret <4 x double> %res
 }
 
-declare <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+declare <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
 
 define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfnmsub128_pd
   ; CHECK: vfnmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xc2]
-  %res = call <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+  %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   ret <2 x double> %res
 }
 
-declare <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <2 x double>@test_int_x86_avx512_mask_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfnmsub231pd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+define <4 x double>@test_int_x86_avx512_mask_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfnmsub231pd %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+define <4 x float>@test_int_x86_avx512_mask_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfnmsub231ps %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+define <8 x float>@test_int_x86_avx512_mask_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfnmsub231ps %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+define <2 x double>@test_int_x86_avx512_mask_vfnmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+define <4 x double>@test_int_x86_avx512_mask_vfnmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+define <4 x float>@test_int_x86_avx512_mask_vfnmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+define <8 x float>@test_int_x86_avx512_mask_vfnmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
 
 define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
 ; CHECK-LABEL: test_mask_fmaddsub256_ps:
 ; CHECK: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xc2]
-  %res = call <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
+  %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
   ret <8 x float> %res
 }
 
-declare <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+declare <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
 
 define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
 ; CHECK-LABEL: test_mask_fmaddsub128_ps:
 ; CHECK: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xc2]
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
   ret <4 x float> %res
 }
 
-declare <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+declare <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
 
 define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmaddsub256_pd
   ; CHECK: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xc2]
-  %res = call <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+  %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   ret <4 x double> %res
 }
 
-declare <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+declare <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
 
 define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmaddsub128_pd
   ; CHECK: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xc2]
-  %res = call <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+  %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   ret <2 x double> %res
 }
 
-declare <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
-
-define <8 x float> @test_mask_vfmsubadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsubadd256_ps
-  ; CHECK: vfmsubadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa7,0xc2]
-  %res = call <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
-  ret <8 x float> %res
-}
-
-declare <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
-
-define <4 x float> @test_mask_vfmsubadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsubadd128_ps
-  ; CHECK: vfmsubadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa7,0xc2]
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
-  ret <4 x float> %res
-}
-
-declare <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
-
-define <4 x double> @test_mask_vfmsubadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsubadd256_pd
-  ; CHECK: vfmsubadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa7,0xc2]
-  %res = call <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
-  ret <4 x double> %res
-}
-declare <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
-
-define <2 x double> @test_mask_vfmsubadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsubadd128_pd
-  ; CHECK: vfmsubadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0xc2]
-  %res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
-  ret <2 x double> %res
+define <2 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmaddsub231pd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+define <4 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmaddsub231pd %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+define <4 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmaddsub231ps %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+define <8 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmaddsub231ps %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmsubadd231pd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vfmsubadd213pd %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2=fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmsubadd231pd %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT:    vfmsubadd213pd %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2=fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmsubadd231ps %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vfmsubadd213ps %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2=fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vfmsubadd231ps %ymm1, %ymm0, %ymm3 {%k1}
+; CHECK-NEXT:    vfmsubadd213ps %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2=fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
 }
 
-define <2 x double> @test_mask_vfmsubadd128rm_pd(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsubadd128rm_pd
-  ; CHECK: vfmsubadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0x07]
-  %a2 = load <2 x double>, <2 x double>* %ptr_a2
-  %res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
-  ret <2 x double> %res
-}
-declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
-define <8 x double> @test_mask_vfmsubaddrm_pd(<8 x double> %a0, <8 x double> %a1, <8 x double>* %ptr_a2, i8 %mask) {
-  ; CHECK-LABEL: test_mask_vfmsubaddrm_pd
-  ; CHECK: vfmsubadd213pd  (%rdi), %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa7,0x07]
-  %a2 = load <8 x double>, <8 x double>* %ptr_a2, align 8
-  %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
-  ret <8 x double> %res
-}
 
 define <4 x float> @test_mask_vfmadd128_ps_r(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmadd128_ps_r
   ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   ret <4 x float> %res
 }
 
 define <4 x float> @test_mask_vfmadd128_ps_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
   ; CHECK-LABEL: test_mask_vfmadd128_ps_rz
   ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2]
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
   ret <4 x float> %res
 }
 
@@ -861,7 +1538,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1,
   ; CHECK-LABEL: test_mask_vfmadd128_ps_rmk
   ; CHECK: vfmadd213ps	(%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
   %a2 = load <4 x float>, <4 x float>* %ptr_a2
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   ret <4 x float> %res
 }
 
@@ -869,7 +1546,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1
   ; CHECK-LABEL: test_mask_vfmadd128_ps_rmka
   ; CHECK: vfmadd213ps     (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
   %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   ret <4 x float> %res
 }
 
@@ -877,7 +1554,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1
   ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz
   ; CHECK: vfmadd213ps	(%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
   %a2 = load <4 x float>, <4 x float>* %ptr_a2
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
   ret <4 x float> %res
 }
 
@@ -885,7 +1562,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a
   ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza
   ; CHECK: vfmadd213ps	(%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
   %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
   ret <4 x float> %res
 }
 
@@ -897,7 +1574,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1,
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
   ret <4 x float> %res
 }
 
@@ -909,7 +1586,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
   ret <4 x float> %res
 }
 
@@ -921,7 +1598,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
   ret <4 x float> %res
 }
 
@@ -933,21 +1610,21 @@ define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
-  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
+  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
   ret <4 x float> %res
 }
 
 define <2 x double> @test_mask_vfmadd128_pd_r(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmadd128_pd_r
   ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
-  %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+  %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   ret <2 x double> %res
 }
 
 define <2 x double> @test_mask_vfmadd128_pd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
   ; CHECK-LABEL: test_mask_vfmadd128_pd_rz
   ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2]
-  %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
+  %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
   ret <2 x double> %res
 }
 
@@ -955,7 +1632,7 @@ define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %
   ; CHECK-LABEL: test_mask_vfmadd128_pd_rmk
   ; CHECK: vfmadd213pd	(%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
   %a2 = load <2 x double>, <2 x double>* %ptr_a2
-  %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+  %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   ret <2 x double> %res
 }
 
@@ -963,21 +1640,21 @@ define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double>
   ; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz
   ; CHECK: vfmadd213pd	(%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
   %a2 = load <2 x double>, <2 x double>* %ptr_a2
-  %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
+  %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
   ret <2 x double> %res
 }
 
 define <4 x double> @test_mask_vfmadd256_pd_r(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmadd256_pd_r
   ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
-  %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+  %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   ret <4 x double> %res
 }
 
 define <4 x double> @test_mask_vfmadd256_pd_rz(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
   ; CHECK-LABEL: test_mask_vfmadd256_pd_rz
   ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2]
-  %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
+  %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
   ret <4 x double> %res
 }
 
@@ -985,7 +1662,7 @@ define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %
   ; CHECK-LABEL: test_mask_vfmadd256_pd_rmk
   ; CHECK: vfmadd213pd	(%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
   %a2 = load <4 x double>, <4 x double>* %ptr_a2
-  %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+  %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   ret <4 x double> %res
 }
 
@@ -993,7 +1670,7 @@ define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double>
   ; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz
   ; CHECK: vfmadd213pd	(%rdi), %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
   %a2 = load <4 x double>, <4 x double>* %ptr_a2
-  %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
+  %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
   ret <4 x double> %res
 }
 define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
@@ -2877,6 +3554,85 @@ define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16
   ret <16 x i16> %res2
 }
 
+declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2w %xmm{{.*}}{%k1} 
+; CHECK-NOT: {z}
+define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+  %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+  %res2 = add <8 x i16> %res, %res1
+  ret <8 x i16> %res2
+}
+
+declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2w %xmm{{.*}}{%k1} {z}
+define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+  %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+  %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+  %res2 = add <8 x i16> %res, %res1
+  ret <8 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2w %ymm{{.*}}{%k1} 
+define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+  %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
+  %res2 = add <16 x i16> %res, %res1
+  ret <16 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2w %ymm{{.*}}{%k1} {z}
+define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+  %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+  %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
+  %res2 = add <16 x i16> %res, %res1
+  ret <16 x i16> %res2
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermi2w %xmm{{.*}}{%k1} 
+define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+  %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+  %res2 = add <8 x i16> %res, %res1
+  ret <8 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermi2w %ymm{{.*}}{%k1} 
+define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+  %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
+  %res2 = add <16 x i16> %res, %res1
+  ret <16 x i16> %res2
+}
+
 declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_128
@@ -2928,3 +3684,82 @@ define <16 x i16>@test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16>
   %res2 = add <16 x i16> %res, %res1
   ret <16 x i16> %res2
 }
+
+declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpshufb %xmm{{.*}}{%k1} 
+define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
+  %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
+  %res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
+  %res2 = add <16 x i8> %res, %res1
+  ret <16 x i8> %res2
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpshufb %ymm{{.*}}{%k1} 
+define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
+  %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
+  %res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
+  %res2 = add <32 x i8> %res, %res1
+  ret <32 x i8> %res2
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsb{{.*}}{%k1} 
+define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
+  %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
+  %res1 = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
+  %res2 = add <16 x i8> %res, %res1
+  ret <16 x i8> %res2
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsb{{.*}}{%k1} 
+define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
+  %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
+  %res1 = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1)
+  %res2 = add <32 x i8> %res, %res1
+  ret <32 x i8> %res2
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsw{{.*}}{%k1} 
+define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
+  %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
+  %res2 = add <8 x i16> %res, %res1
+  ret <8 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsw{{.*}}{%k1} 
+define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
+  %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1)
+  %res2 = add <16 x i16> %res, %res1
+  ret <16 x i16> %res2
+}
+
diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll
index dfd4986..fb7c93d 100644
--- a/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -2794,4 +2794,213 @@ define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %
   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
   %res2 = add <4 x i64> %res, %res1
   ret <4 x i64> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2d %xmm{{.*}}{%k1}
+; CHECK-NOT: {z}
+define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
+  %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2d %xmm{{.*}}{%k1} {z}
+define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
+  %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+  %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2d %ymm{{.*}}{%k1}
+; CHECK-NOT: {z}
+define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
+  %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermt2d {{.*}}{%k1} {z}
+define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
+  %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+  %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermi2pd %xmm{{.*}}{%k1} 
+define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
+  %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermi2pd %ymm{{.*}}{%k1} 
+define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
+  %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermi2ps %xmm{{.*}}{%k1} 
+define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpermi2ps %ymm{{.*}}{%k1} 
+define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
+  %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsq{{.*}}{%k1} 
+define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
+  %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsq{{.*}}{%k1} 
+define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
+  %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsd{{.*}}{%k1} 
+define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
+  %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vpabsd{{.*}}{%k1} 
+define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
+  %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+
+declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vscalefpd{{.*}}{%k1} 
+define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+  %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vscalefpd{{.*}}{%k1} 
+define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+  %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vscalefps{{.*}}{%k1} 
+define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vscalefps{{.*}}{%k1} 
+define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+  %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
 }
\ No newline at end of file
diff --git a/test/CodeGen/X86/coff-weak.ll b/test/CodeGen/X86/coff-weak.ll
new file mode 100644
index 0000000..3697501
--- /dev/null
+++ b/test/CodeGen/X86/coff-weak.ll
@@ -0,0 +1,9 @@
+; RUN: llc -function-sections -o - %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; CHECK: .section{{.*}}one_only
+define linkonce_odr void @foo() {
+  ret void
+}
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index 656c385..5b01e2f 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -39,7 +39,7 @@ define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8
 entry:
 ; DARWIN-LABEL: t3:
 ; DARWIN: shlq $32, %rcx
-; DARWIN-NEXT: orq %rcx, %rax
+; DARWIN-NEXT: leaq (%rax,%rcx), %rax
 ; DARWIN-NEXT: shll $8
 ; DARWIN-NOT: leaq
   %tmp21 = zext i32 %lb to i64
diff --git a/test/CodeGen/X86/dllexport-x86_64.ll b/test/CodeGen/X86/dllexport-x86_64.ll
index 629a557..bb5e92f 100644
--- a/test/CodeGen/X86/dllexport-x86_64.ll
+++ b/test/CodeGen/X86/dllexport-x86_64.ll
@@ -71,33 +71,33 @@ define weak_odr dllexport void @weak1() {
 @blob_alias = dllexport alias bitcast ([6 x i8]* @blob to i32 ()*)
 
 ; CHECK: .section .drectve
-; WIN32: " /EXPORT:Var1,DATA"
-; WIN32: " /EXPORT:Var2,DATA"
-; WIN32: " /EXPORT:Var3,DATA"
-; WIN32: " /EXPORT:WeakVar1,DATA"
-; WIN32: " /EXPORT:WeakVar2,DATA"
-; WIN32: " /EXPORT:f1"
-; WIN32: " /EXPORT:f2"
-; WIN32: " /EXPORT:lnk1"
-; WIN32: " /EXPORT:lnk2"
-; WIN32: " /EXPORT:weak1"
-; WIN32: " /EXPORT:alias"
-; WIN32: " /EXPORT:alias2"
-; WIN32: " /EXPORT:alias3"
-; WIN32: " /EXPORT:weak_alias"
-; WIN32: " /EXPORT:blob_alias"
-; MINGW: " -export:Var1,data"
-; MINGW: " -export:Var2,data"
-; MINGW: " -export:Var3,data"
-; MINGW: " -export:WeakVar1,data"
-; MINGW: " -export:WeakVar2,data"
-; MINGW: " -export:f1"
-; MINGW: " -export:f2"
-; MINGW: " -export:lnk1"
-; MINGW: " -export:lnk2"
-; MINGW: " -export:weak1"
-; MINGW: " -export:alias"
-; MINGW: " -export:alias2"
-; MINGW: " -export:alias3"
-; MINGW: " -export:weak_alias"
-; MINGW: " -export:blob_alias"
+; WIN32: /EXPORT:f1
+; WIN32-SAME: /EXPORT:f2
+; WIN32-SAME: /EXPORT:lnk1
+; WIN32-SAME: /EXPORT:lnk2
+; WIN32-SAME: /EXPORT:weak1
+; WIN32-SAME: /EXPORT:Var1,DATA
+; WIN32-SAME: /EXPORT:Var2,DATA
+; WIN32-SAME: /EXPORT:Var3,DATA
+; WIN32-SAME: /EXPORT:WeakVar1,DATA
+; WIN32-SAME: /EXPORT:WeakVar2,DATA
+; WIN32-SAME: /EXPORT:alias
+; WIN32-SAME: /EXPORT:alias2
+; WIN32-SAME: /EXPORT:alias3
+; WIN32-SAME: /EXPORT:weak_alias
+; WIN32-SAME: /EXPORT:blob_alias
+; MINGW: -export:f1
+; MINGW-SAME: -export:f2
+; MINGW-SAME: -export:lnk1
+; MINGW-SAME: -export:lnk2
+; MINGW-SAME: -export:weak1
+; MINGW-SAME: -export:Var1,data
+; MINGW-SAME: -export:Var2,data
+; MINGW-SAME: -export:Var3,data
+; MINGW-SAME: -export:WeakVar1,data
+; MINGW-SAME: -export:WeakVar2,data
+; MINGW-SAME: -export:alias
+; MINGW-SAME: -export:alias2
+; MINGW-SAME: -export:alias3
+; MINGW-SAME: -export:weak_alias
+; MINGW-SAME: -export:blob_alias"
diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll
index 02a83ae..915567d 100644
--- a/test/CodeGen/X86/dllexport.ll
+++ b/test/CodeGen/X86/dllexport.ll
@@ -89,40 +89,41 @@ define weak_odr dllexport void @weak1() {
 @weak_alias = weak_odr dllexport alias void()* @f1
 
 ; CHECK: .section .drectve
-; CHECK-CL: " /EXPORT:_Var1,DATA"
-; CHECK-CL: " /EXPORT:_Var2,DATA"
-; CHECK-CL: " /EXPORT:_Var3,DATA"
-; CHECK-CL: " /EXPORT:_WeakVar1,DATA"
-; CHECK-CL: " /EXPORT:_WeakVar2,DATA"
-; CHECK-CL: " /EXPORT:_f1"
-; CHECK-CL: " /EXPORT:_f2"
 ; CHECK-CL-NOT: not_exported
-; CHECK-CL: " /EXPORT:_stdfun@0"
-; CHECK-CL: " /EXPORT:@fastfun@0"
-; CHECK-CL: " /EXPORT:_thisfun"
-; CHECK-CL: " /EXPORT:_lnk1"
-; CHECK-CL: " /EXPORT:_lnk2"
-; CHECK-CL: " /EXPORT:_weak1"
-; CHECK-CL: " /EXPORT:_alias"
-; CHECK-CL: " /EXPORT:_alias2"
-; CHECK-CL: " /EXPORT:_alias3"
-; CHECK-CL: " /EXPORT:_weak_alias"
-; CHECK-GCC: " -export:Var1,data"
-; CHECK-GCC: " -export:Var2,data"
-; CHECK-GCC: " -export:Var3,data"
-; CHECK-GCC: " -export:WeakVar1,data"
-; CHECK-GCC: " -export:WeakVar2,data"
-; CHECK-GCC: " -export:f1"
-; CHECK-GCC: " -export:f2"
+; CHECK-CL: /EXPORT:_f1
+; CHECK-CL-SAME: /EXPORT:_f2
+; CHECK-CL-SAME: /EXPORT:_stdfun@0
+; CHECK-CL-SAME: /EXPORT:@fastfun@0
+; CHECK-CL-SAME: /EXPORT:_thisfun
+; CHECK-CL-SAME: /EXPORT:_lnk1
+; CHECK-CL-SAME: /EXPORT:_lnk2
+; CHECK-CL-SAME: /EXPORT:_weak1
+; CHECK-CL-SAME: /EXPORT:_Var1,DATA
+; CHECK-CL-SAME: /EXPORT:_Var2,DATA
+; CHECK-CL-SAME: /EXPORT:_Var3,DATA
+; CHECK-CL-SAME: /EXPORT:_WeakVar1,DATA
+; CHECK-CL-SAME: /EXPORT:_WeakVar2,DATA
+; CHECK-CL-SAME: /EXPORT:_alias
+; CHECK-CL-SAME: /EXPORT:_alias2
+; CHECK-CL-SAME: /EXPORT:_alias3
+; CHECK-CL-SAME: /EXPORT:_weak_alias"
 ; CHECK-CL-NOT: not_exported
-; CHECK-GCC: " -export:stdfun@0"
-; CHECK-GCC: " -export:@fastfun@0"
-; CHECK-GCC: " -export:thisfun"
-; CHECK-GCC: " -export:lnk1"
-; CHECK-GCC: " -export:lnk2"
-; CHECK-GCC: " -export:weak1"
-; CHECK-GCC: " -export:alias"
-; CHECK-GCC: " -export:alias2"
-; CHECK-GCC: " -export:alias3"
-; CHECK-GCC: " -export:weak_alias"
-
+; CHECK-GCC-NOT: not_exported
+; CHECK-GCC: -export:f1
+; CHECK-GCC-SAME: -export:f2
+; CHECK-GCC-SAME: -export:stdfun@0
+; CHECK-GCC-SAME: -export:@fastfun@0
+; CHECK-GCC-SAME: -export:thisfun
+; CHECK-GCC-SAME: -export:lnk1
+; CHECK-GCC-SAME: -export:lnk2
+; CHECK-GCC-SAME: -export:weak1
+; CHECK-GCC-SAME: -export:Var1,data
+; CHECK-GCC-SAME: -export:Var2,data
+; CHECK-GCC-SAME: -export:Var3,data
+; CHECK-GCC-SAME: -export:WeakVar1,data
+; CHECK-GCC-SAME: -export:WeakVar2,data
+; CHECK-GCC-SAME: -export:alias
+; CHECK-GCC-SAME: -export:alias2
+; CHECK-GCC-SAME: -export:alias3
+; CHECK-GCC-SAME: -export:weak_alias"
+; CHECK-GCC-NOT: not_exported
diff --git a/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll b/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll
new file mode 100644
index 0000000..f7d0cdf
--- /dev/null
+++ b/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll
@@ -0,0 +1,204 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s
+
+; CHECK-LABEL: fmaddsubpd_loop:
+; CHECK:   vfmaddsub231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+define <4 x double> @fmaddsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubaddpd_loop:
+; CHECK:   vfmsubadd231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+define <4 x double> @fmsubaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fmaddpd_loop:
+; CHECK:   vfmadd231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+define <4 x double> @fmaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubpd_loop:
+; CHECK:   vfmsub231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+define <4 x double> @fmsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x double> %c.addr.0
+}
+
+declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
+declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+
+; CHECK-LABEL: fmaddsubps_loop:
+; CHECK:   vfmaddsub231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+define <8 x float> @fmaddsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <8 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubaddps_loop:
+; CHECK:   vfmsubadd231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+define <8 x float> @fmsubaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <8 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fmaddps_loop:
+; CHECK:   vfmadd231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+define <8 x float> @fmaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <8 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubps_loop:
+; CHECK:   vfmsub231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+define <8 x float> @fmsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <8 x float> %c.addr.0
+}
+
+declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
diff --git a/test/CodeGen/X86/fma-intrinsics-x86.ll b/test/CodeGen/X86/fma-intrinsics-x86.ll
new file mode 100644
index 0000000..8814363
--- /dev/null
+++ b/test/CodeGen/X86/fma-intrinsics-x86.ll
@@ -0,0 +1,493 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
+
+; VFMADD
+define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ss:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_sd:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
+
+define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ps:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_pd:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
+
+define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps_256:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ps_256:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
+
+define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd_256:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_pd_256:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+; VFMSUB
+define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ss:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ss:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmsub_sd:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_sd:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
+
+define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ps:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_pd:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
+
+define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps_256:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ps_256:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
+
+define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd_256:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_pd_256:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+; VFNMADD
+define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ss:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ss:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_sd:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_sd:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
+
+define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ps:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_pd:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
+
+define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps_256:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ps_256:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
+
+define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd_256:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_pd_256:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+; VFNMSUB
+define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ss:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ss:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_sd:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_sd:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
+
+define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ps:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_pd:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
+
+define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps_256:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ps_256:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
+
+define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd_256:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_pd_256:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+; VFMADDSUB
+define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_ps:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_pd:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
+
+define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps_256:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_ps_256:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
+
+define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd_256:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_pd_256:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+; VFMSUBADD
+define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmsubadd213ps %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_ps:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmsubadd213pd %xmm2, %xmm1, %xmm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_pd:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
+
+define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps_256:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmsubadd213ps %ymm2, %ymm1, %ymm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_ps_256:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
+
+define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd_256:
+; CHECK-FMA:       # BB#0:
+; CHECK-FMA-NEXT:    vfmsubadd213pd %ymm2, %ymm1, %ymm0
+; CHECK-FMA-NEXT:    retq
+;
+; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_pd_256:
+; CHECK-FMA4:       # BB#0:
+; CHECK-FMA4-NEXT:    vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-FMA4-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/fma-intrinsics-x86_64.ll b/test/CodeGen/X86/fma-intrinsics-x86_64.ll
deleted file mode 100644
index aadd731..0000000
--- a/test/CodeGen/X86/fma-intrinsics-x86_64.ll
+++ /dev/null
@@ -1,278 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma | FileCheck %s --check-prefix=CHECK-FMA --check-prefix=CHECK
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK-FMA --check-prefix=CHECK
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s --check-prefix=CHECK-FMA4 --check-prefix=CHECK
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s --check-prefix=CHECK-FMA4 --check-prefix=CHECK
-
-; VFMADD
-define < 4 x float > @test_x86_fma_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK-FMA4: vfmaddss
-  ; CHECK-FMA: vfmadd213ss
-  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK-FMA4: vfmaddsd
-  ; CHECK-FMA: vfmadd213sd
-  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 4 x float > @test_x86_fma_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK-FMA4: vfmaddps
-  ; CHECK-FMA: vfmadd213ps
-  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK-FMA4: vfmaddpd
-  ; CHECK-FMA: vfmadd213pd
-  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 8 x float > @test_x86_fma_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
-  ; CHECK-FMA4: vfmaddps
-  ; CHECK-FMA: vfmadd213ps
-  ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
-  ret < 8 x float > %res
-}
-declare < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-
-define < 4 x double > @test_x86_fma_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
-  ; CHECK-FMA4: vfmaddpd
-  ; CHECK-FMA: vfmadd213pd
-  ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
-  ret < 4 x double > %res
-}
-declare < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
-
-; VFMSUB
-define < 4 x float > @test_x86_fma_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK-FMA4: vfmsubss
-  ; CHECK-FMA: vfmsub213ss
-  %res = call < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK-FMA4: vfmsubsd
-  ; CHECK-FMA: vfmsub213sd
-  %res = call < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 4 x float > @test_x86_fma_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK-FMA4: vfmsubps
-  ; CHECK-FMA: vfmsub213ps
-  %res = call < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK-FMA4: vfmsubpd
-  ; CHECK-FMA: vfmsub213pd
-  %res = call < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 8 x float > @test_x86_fma_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
-  ; CHECK-FMA4: vfmsubps
-  ; CHECK-FMA: vfmsub213ps
-  ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
-  ret < 8 x float > %res
-}
-declare < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-
-define < 4 x double > @test_x86_fma_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
-  ; CHECK-FMA4: vfmsubpd
-  ; CHECK-FMA: vfmsub213pd
-  ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
-  ret < 4 x double > %res
-}
-declare < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
-
-; VFNMADD
-define < 4 x float > @test_x86_fma_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK-FMA4: vfnmaddss
-  ; CHECK-FMA: vfnmadd213ss
-  %res = call < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK-FMA4: vfnmaddsd
-  ; CHECK-FMA: vfnmadd213sd
-  %res = call < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 4 x float > @test_x86_fma_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK-FMA4: vfnmaddps
-  ; CHECK-FMA: vfnmadd213ps
-  %res = call < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK-FMA4: vfnmaddpd
-  ; CHECK-FMA: vfnmadd213pd
-  %res = call < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 8 x float > @test_x86_fma_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
-  ; CHECK-FMA4: vfnmaddps
-  ; CHECK-FMA: vfnmadd213ps
-  ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
-  ret < 8 x float > %res
-}
-declare < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-
-define < 4 x double > @test_x86_fma_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
-  ; CHECK-FMA4: vfnmaddpd
-  ; CHECK-FMA: vfnmadd213pd
-  ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
-  ret < 4 x double > %res
-}
-declare < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
-
-; VFNMSUB
-define < 4 x float > @test_x86_fma_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK-FMA4: vfnmsubss
-  ; CHECK-FMA: vfnmsub213ss
-  %res = call < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK-FMA4: vfnmsubsd
-  ; CHECK-FMA: vfnmsub213sd
-  %res = call < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 4 x float > @test_x86_fma_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK-FMA4: vfnmsubps
-  ; CHECK-FMA: vfnmsub213ps
-  %res = call < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK-FMA4: vfnmsubpd
-  ; CHECK-FMA: vfnmsub213pd
-  %res = call < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 8 x float > @test_x86_fma_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
-  ; CHECK-FMA4: vfnmsubps
-  ; CHECK-FMA: vfnmsub213ps
-  ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
-  ret < 8 x float > %res
-}
-declare < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-
-define < 4 x double > @test_x86_fma_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
-  ; CHECK-FMA4: vfnmsubpd
-  ; CHECK-FMA: vfnmsub213pd
-  ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
-  ret < 4 x double > %res
-}
-declare < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
-
-; VFMADDSUB
-define < 4 x float > @test_x86_fma_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK-FMA4: vfmaddsubps
-  ; CHECK-FMA: vfmaddsub213ps
-  %res = call < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK-FMA4: vfmaddsubpd
-  ; CHECK-FMA: vfmaddsub213pd
-  %res = call < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 8 x float > @test_x86_fma_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
-  ; CHECK-FMA4: vfmaddsubps
-  ; CHECK-FMA: vfmaddsub213ps
-  ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
-  ret < 8 x float > %res
-}
-declare < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-
-define < 4 x double > @test_x86_fma_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
-  ; CHECK-FMA4: vfmaddsubpd
-  ; CHECK-FMA: vfmaddsub213pd
-  ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
-  ret < 4 x double > %res
-}
-declare < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
-
-; VFMSUBADD
-define < 4 x float > @test_x86_fma_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK-FMA4: vfmsubaddps
-  ; CHECK-FMA: vfmsubadd213ps
-  %res = call < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK-FMA4: vfmsubaddpd
-  ; CHECK-FMA: vfmsubadd213pd
-  %res = call < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 8 x float > @test_x86_fma_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
-  ; CHECK-FMA4: vfmsubaddps
-  ; CHECK-FMA: vfmsubadd213ps
-  ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
-  ret < 8 x float > %res
-}
-declare < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-
-define < 4 x double > @test_x86_fma_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
-  ; CHECK-FMA4: vfmsubaddpd
-  ; CHECK-FMA: vfmsubadd213pd
-  ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
-  ret < 4 x double > %res
-}
-declare < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
diff --git a/test/CodeGen/X86/fma-phi-213-to-231.ll b/test/CodeGen/X86/fma-phi-213-to-231.ll
index 9715bc7..34acdfe 100644
--- a/test/CodeGen/X86/fma-phi-213-to-231.ll
+++ b/test/CodeGen/X86/fma-phi-213-to-231.ll
@@ -1,246 +1,37 @@
-; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.10.0"
-
-; CHECK-LABEL: fmaddsubpd_loop
-; CHECK: [[BODYLBL:LBB.+]]:
-; CHECK:   vfmaddsub231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-; CHECK: [[INCLBL:LBB.+]]:
-; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
-; CHECK:   cmpl  {{%.+}}, [[INDREG]]
-; CHECK:   jl    [[BODYLBL]]
-define <4 x double> @fmaddsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
-entry:
-  br label %for.cond
-
-for.cond:
-  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
-  %cmp = icmp slt i32 %i.0, %iter
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:
-  br label %for.inc
-
-for.inc:
-  %0 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
-  %inc = add nsw i32 %i.0, 1
-  br label %for.cond
-
-for.end:
-  ret <4 x double> %c.addr.0
-}
-
-; CHECK-LABEL: fmsubaddpd_loop
-; CHECK: [[BODYLBL:LBB.+]]:
-; CHECK:   vfmsubadd231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-; CHECK: [[INCLBL:LBB.+]]:
-; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
-; CHECK:   cmpl  {{%.+}}, [[INDREG]]
-; CHECK:   jl    [[BODYLBL]]
-define <4 x double> @fmsubaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
-entry:
-  br label %for.cond
-
-for.cond:
-  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
-  %cmp = icmp slt i32 %i.0, %iter
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:
-  br label %for.inc
-
-for.inc:
-  %0 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
-  %inc = add nsw i32 %i.0, 1
-  br label %for.cond
-
-for.end:
-  ret <4 x double> %c.addr.0
-}
-
-; CHECK-LABEL: fmaddpd_loop
-; CHECK: [[BODYLBL:LBB.+]]:
-; CHECK:   vfmadd231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-; CHECK: [[INCLBL:LBB.+]]:
-; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
-; CHECK:   cmpl  {{%.+}}, [[INDREG]]
-; CHECK:   jl    [[BODYLBL]]
-define <4 x double> @fmaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
-entry:
-  br label %for.cond
-
-for.cond:
-  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
-  %cmp = icmp slt i32 %i.0, %iter
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:
-  br label %for.inc
-
-for.inc:
-  %0 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
-  %inc = add nsw i32 %i.0, 1
-  br label %for.cond
-
-for.end:
-  ret <4 x double> %c.addr.0
-}
-
-; CHECK-LABEL: fmsubpd_loop
-; CHECK: [[BODYLBL:LBB.+]]:
-; CHECK:   vfmsub231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-; CHECK: [[INCLBL:LBB.+]]:
-; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
-; CHECK:   cmpl  {{%.+}}, [[INDREG]]
-; CHECK:   jl    [[BODYLBL]]
-define <4 x double> @fmsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
-entry:
-  br label %for.cond
-
-for.cond:
-  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
-  %cmp = icmp slt i32 %i.0, %iter
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:
-  br label %for.inc
-
-for.inc:
-  %0 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
-  %inc = add nsw i32 %i.0, 1
-  br label %for.cond
-
-for.end:
-  ret <4 x double> %c.addr.0
-}
-
-declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
-declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
-declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
-declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
-
-
-; CHECK-LABEL: fmaddsubps_loop
-; CHECK: [[BODYLBL:LBB.+]]:
-; CHECK:   vfmaddsub231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-; CHECK: [[INCLBL:LBB.+]]:
-; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
-; CHECK:   cmpl  {{%.+}}, [[INDREG]]
-; CHECK:   jl    [[BODYLBL]]
-define <8 x float> @fmaddsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
-entry:
-  br label %for.cond
-
-for.cond:
-  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
-  %cmp = icmp slt i32 %i.0, %iter
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:
-  br label %for.inc
-
-for.inc:
-  %0 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
-  %inc = add nsw i32 %i.0, 1
-  br label %for.cond
-
-for.end:
-  ret <8 x float> %c.addr.0
-}
-
-; CHECK-LABEL: fmsubaddps_loop
-; CHECK: [[BODYLBL:LBB.+]]:
-; CHECK:   vfmsubadd231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-; CHECK: [[INCLBL:LBB.+]]:
-; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
-; CHECK:   cmpl  {{%.+}}, [[INDREG]]
-; CHECK:   jl    [[BODYLBL]]
-define <8 x float> @fmsubaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
-entry:
-  br label %for.cond
-
-for.cond:
-  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
-  %cmp = icmp slt i32 %i.0, %iter
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:
-  br label %for.inc
-
-for.inc:
-  %0 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
-  %inc = add nsw i32 %i.0, 1
-  br label %for.cond
-
-for.end:
-  ret <8 x float> %c.addr.0
-}
-
-; CHECK-LABEL: fmaddps_loop
-; CHECK: [[BODYLBL:LBB.+]]:
-; CHECK:   vfmadd231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-; CHECK: [[INCLBL:LBB.+]]:
-; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
-; CHECK:   cmpl  {{%.+}}, [[INDREG]]
-; CHECK:   jl    [[BODYLBL]]
-define <8 x float> @fmaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
-entry:
-  br label %for.cond
-
-for.cond:
-  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
-  %cmp = icmp slt i32 %i.0, %iter
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:
-  br label %for.inc
-
-for.inc:
-  %0 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
-  %inc = add nsw i32 %i.0, 1
-  br label %for.cond
-
-for.end:
-  ret <8 x float> %c.addr.0
-}
-
-; CHECK-LABEL: fmsubps_loop
-; CHECK: [[BODYLBL:LBB.+]]:
-; CHECK:   vfmsub231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-; CHECK: [[INCLBL:LBB.+]]:
-; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
-; CHECK:   cmpl  {{%.+}}, [[INDREG]]
-; CHECK:   jl    [[BODYLBL]]
-define <8 x float> @fmsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
-entry:
-  br label %for.cond
-
-for.cond:
-  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
-  %cmp = icmp slt i32 %i.0, %iter
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:
-  br label %for.inc
-
-for.inc:
-  %0 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
-  %inc = add nsw i32 %i.0, 1
-  br label %for.cond
-
-for.end:
-  ret <8 x float> %c.addr.0
-}
-
-declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
-declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
-declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
-declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
+; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=+fma,-fma4  | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma4  | FileCheck %s
+
+; Test FMA3 variant selection
+
+; CHECK-LABEL: fma3_select231ssX:
+; CHECK: vfmadd231ss %xmm
+define float @fma3_select231ssX(float %x, float %y) {
+entry:
+  br label %while.body
+while.body:
+  %acc.01 = phi float [ 0.000000e+00, %entry ], [ %acc, %while.body ]
+  %acc = call float @llvm.fma.f32(float %x, float %y, float %acc.01)
+  %b = fcmp ueq float %acc, 0.0
+  br i1 %b, label %while.body, label %while.end
+while.end:
+  ret float %acc
+}
+
+; CHECK-LABEL: fma3_select231pdY:
+; CHECK: vfmadd231pd %ymm
+define <4 x double> @fma3_select231pdY(<4 x double> %x, <4 x double> %y) {
+entry:
+  br label %while.body
+while.body:
+  %acc.04 = phi <4 x double> [ zeroinitializer, %entry ], [ %add, %while.body ]
+  %add = call <4 x double> @llvm.fma.v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %acc.04)
+  %vecext = extractelement <4 x double> %add, i32 0
+  %cmp = fcmp oeq double %vecext, 0.000000e+00
+  br i1 %cmp, label %while.body, label %while.end
+while.end:
+  ret <4 x double> %add
+}
+
+declare float @llvm.fma.f32(float, float, float)
+declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
diff --git a/test/CodeGen/X86/fma.ll b/test/CodeGen/X86/fma.ll
index 2eb152b..b91479c 100644
--- a/test/CodeGen/X86/fma.ll
+++ b/test/CodeGen/X86/fma.ll
@@ -1,80 +1,47 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=+fma,-fma4  | FileCheck %s --check-prefix=CHECK-FMA-INST
-; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=-fma,-fma4  | FileCheck %s --check-prefix=CHECK-FMA-CALL
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-INST
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10  -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-CALL
-; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma4  | FileCheck %s --check-prefix=CHECK-FMA-INST
-; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-CALL
-
-; CHECK: test_f32
+; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=+fma,-fma4  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=-fma,-fma4  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-CALL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10  -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-CALL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10  -mattr=+avx512f,-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma4  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-CALL
+
+; CHECK-LABEL: test_f32:
 ; CHECK-FMA-INST: vfmadd213ss
 ; CHECK-FMA-CALL: fmaf
-
-define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp {
+define float @test_f32(float %a, float %b, float %c) #0 {
 entry:
-  %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
+  %call = call float @llvm.fma.f32(float %a, float %b, float %c)
   ret float %call
 }
 
-; CHECK: test_f64
+; CHECK-LABEL: test_f64:
 ; CHECK-FMA-INST: vfmadd213sd
 ; CHECK-FMA-CALL: fma
-
-define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp {
+define double @test_f64(double %a, double %b, double %c) #0 {
 entry:
-  %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
+  %call = call double @llvm.fma.f64(double %a, double %b, double %c)
   ret double %call
 }
 
-; CHECK: test_f80
+; CHECK-LABEL: test_f80:
 ; CHECK: fmal
-
-define x86_fp80 @test_f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone ssp {
+define x86_fp80 @test_f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) #0 {
 entry:
-  %call = tail call x86_fp80 @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone
+  %call = call x86_fp80 @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
   ret x86_fp80 %call
 }
 
-; CHECK: test_f32_cst
-; CHECK-NOT: fma
-define float @test_f32_cst() nounwind readnone ssp {
+; CHECK-LABEL: test_f32_cst:
+; CHECK-NOT: vfmadd
+define float @test_f32_cst() #0 {
 entry:
-  %call = tail call float @llvm.fma.f32(float 3.0, float 3.0, float 3.0) nounwind readnone
+  %call = call float @llvm.fma.f32(float 3.0, float 3.0, float 3.0)
   ret float %call
 }
 
-; Test FMA3 variant selection
-; CHECK-FMA-INST: fma3_select231ssX:
-; CHECK-FMA-INST: vfmadd231ss %xmm
-define float @fma3_select231ssX(float %x, float %y) #0 {
-entry:
-  br label %while.body
-while.body:                                       ; preds = %while.body, %while.body
-  %acc.01 = phi float [ 0.000000e+00, %entry ], [ %acc, %while.body ]
-  %acc = tail call float @llvm.fma.f32(float %x, float %y, float %acc.01) nounwind readnone
-  %b = fcmp ueq float %acc, 0.0
-  br i1 %b, label %while.body, label %while.end
-while.end:                                        ; preds = %while.body, %entry
-  ret float %acc
-}
-
-; Test FMA3 variant selection
-; CHECK-FMA-INST: fma3_select231pdY:
-; CHECK-FMA-INST: vfmadd231pd %ymm
-define <4 x double> @fma3_select231pdY(<4 x double> %x, <4 x double> %y) #0 {
-entry:
-  br label %while.body
-while.body:                                       ; preds = %entry, %while.body
-  %acc.04 = phi <4 x double> [ zeroinitializer, %entry ], [ %add, %while.body ]
-  %add = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %acc.04)
-  %vecext = extractelement <4 x double> %add, i32 0
-  %cmp = fcmp oeq double %vecext, 0.000000e+00
-  br i1 %cmp, label %while.body, label %while.end
-
-while.end:                                        ; preds = %while.body
-  ret <4 x double> %add
-}
+declare float @llvm.fma.f32(float, float, float)
+declare double @llvm.fma.f64(double, double, double)
+declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80)
 
-declare float @llvm.fma.f32(float, float, float) nounwind readnone
-declare double @llvm.fma.f64(double, double, double) nounwind readnone
-declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) nounwind readnone
-declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/fma3-intrinsics.ll b/test/CodeGen/X86/fma3-intrinsics.ll
deleted file mode 100755
index fa9c252..0000000
--- a/test/CodeGen/X86/fma3-intrinsics.ll
+++ /dev/null
@@ -1,150 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma,+fma4 | FileCheck %s
-; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
-
-define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
-  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
-  ; CHECK: fmadd213ss (%r8), [[XMM1]], [[XMM0]]
-  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-
-define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmadd213ps
-  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-
-define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
-  ; CHECK: fmadd213ps {{.*\(%r.*}}, %ymm
-  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
-  ret <8 x float> %res
-}
-declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
-
-define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
-  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
-  ; CHECK: fnmadd213ss (%r8), [[XMM1]], [[XMM0]]
-  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-
-define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmadd213ps
-  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-
-define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
-  ; CHECK: fnmadd213ps {{.*\(%r.*}}, %ymm
-  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
-  ret <8 x float> %res
-}
-declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
-
-
-define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
-  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
-  ; CHECK: fmsub213ss (%r8), [[XMM1]], [[XMM0]]
-  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-
-define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmsub213ps
-  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-
-define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
-  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
-  ; CHECK: fnmsub213ss (%r8), [[XMM1]], [[XMM0]]
-  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-
-define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmsub213ps
-  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-
-;;;;
-
-define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
-  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
-  ; CHECK: fmadd213sd (%r8), [[XMM1]], [[XMM0]]
-  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
-  ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_x86_fmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmadd213pd
-  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
-  ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
-  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
-  ; CHECK: fnmadd213sd (%r8), [[XMM1]], [[XMM0]]
-  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
-  ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_x86_fnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmadd213pd
-  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
-  ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-
-
-
-define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
-  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
-  ; CHECK: fmsub213sd (%r8), [[XMM1]], [[XMM0]]
-  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
-  ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmsub213pd
-  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
-  ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
-  ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
-  ; CHECK: fnmsub213sd (%r8), [[XMM1]], [[XMM0]]
-  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
-  ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_x86_fnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmsub213pd
-  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
-  ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
diff --git a/test/CodeGen/X86/fold-load-binops.ll b/test/CodeGen/X86/fold-load-binops.ll
new file mode 100644
index 0000000..6d501c7
--- /dev/null
+++ b/test/CodeGen/X86/fold-load-binops.ll
@@ -0,0 +1,142 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
+
+; Verify that we're folding the load into the math instruction.
+; This pattern is generated out of the simplest intrinsics usage:
+;  _mm_add_ss(a, _mm_load_ss(b));
+
+define <4 x float> @addss(<4 x float> %va, float* %pb) {
+; SSE-LABEL: addss:
+; SSE:       # BB#0:
+; SSE-NEXT:    addss (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: addss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddss (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+    %a = extractelement <4 x float> %va, i32 0
+    %b = load float, float* %pb
+    %r = fadd float %a, %b
+    %vr = insertelement <4 x float> %va, float %r, i32 0
+    ret <4 x float> %vr
+}
+
+define <2 x double> @addsd(<2 x double> %va, double* %pb) {
+; SSE-LABEL: addsd:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: addsd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsd (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+    %a = extractelement <2 x double> %va, i32 0
+    %b = load double, double* %pb
+    %r = fadd double %a, %b
+    %vr = insertelement <2 x double> %va, double %r, i32 0
+    ret <2 x double> %vr
+}
+
+define <4 x float> @subss(<4 x float> %va, float* %pb) {
+; SSE-LABEL: subss:
+; SSE:       # BB#0:
+; SSE-NEXT:    subss (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: subss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubss (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+    %a = extractelement <4 x float> %va, i32 0
+    %b = load float, float* %pb
+    %r = fsub float %a, %b
+    %vr = insertelement <4 x float> %va, float %r, i32 0
+    ret <4 x float> %vr
+}
+
+define <2 x double> @subsd(<2 x double> %va, double* %pb) {
+; SSE-LABEL: subsd:
+; SSE:       # BB#0:
+; SSE-NEXT:    subsd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: subsd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubsd (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+    %a = extractelement <2 x double> %va, i32 0
+    %b = load double, double* %pb
+    %r = fsub double %a, %b
+    %vr = insertelement <2 x double> %va, double %r, i32 0
+    ret <2 x double> %vr
+}
+
+define <4 x float> @mulss(<4 x float> %va, float* %pb) {
+; SSE-LABEL: mulss:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulss (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: mulss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulss (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+    %a = extractelement <4 x float> %va, i32 0
+    %b = load float, float* %pb
+    %r = fmul float %a, %b
+    %vr = insertelement <4 x float> %va, float %r, i32 0
+    ret <4 x float> %vr
+}
+
+define <2 x double> @mulsd(<2 x double> %va, double* %pb) {
+; SSE-LABEL: mulsd:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulsd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: mulsd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulsd (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+    %a = extractelement <2 x double> %va, i32 0
+    %b = load double, double* %pb
+    %r = fmul double %a, %b
+    %vr = insertelement <2 x double> %va, double %r, i32 0
+    ret <2 x double> %vr
+}
+
+define <4 x float> @divss(<4 x float> %va, float* %pb) {
+; SSE-LABEL: divss:
+; SSE:       # BB#0:
+; SSE-NEXT:    divss (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: divss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivss (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+    %a = extractelement <4 x float> %va, i32 0
+    %b = load float, float* %pb
+    %r = fdiv float %a, %b
+    %vr = insertelement <4 x float> %va, float %r, i32 0
+    ret <4 x float> %vr
+}
+
+define <2 x double> @divsd(<2 x double> %va, double* %pb) {
+; SSE-LABEL: divsd:
+; SSE:       # BB#0:
+; SSE-NEXT:    divsd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: divsd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivsd (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+    %a = extractelement <2 x double> %va, i32 0
+    %b = load double, double* %pb
+    %r = fdiv double %a, %b
+    %vr = insertelement <2 x double> %va, double %r, i32 0
+    ret <2 x double> %vr
+}
diff --git a/test/CodeGen/X86/fold-vector-sext-crash2.ll b/test/CodeGen/X86/fold-vector-sext-crash2.ll
new file mode 100644
index 0000000..44c8361
--- /dev/null
+++ b/test/CodeGen/X86/fold-vector-sext-crash2.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+; DAGCombiner crashes during sext folding
+
+define <2 x i256> @test_sext1() {
+  %Se = sext <2 x i8> <i8 -100, i8 -99> to <2 x i256>
+  %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> <i32 1, i32 3>
+  ret <2 x i256> %Shuff
+
+  ; X64-LABEL: test_sext1
+  ; X64:       movq $-1
+  ; X64-NEXT:  movq $-1
+  ; X64-NEXT:  movq $-1
+  ; X64-NEXT:  movq $-99
+
+  ; X32-LABEL: test_sext1
+  ; X32:       movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-99
+}
+
+define <2 x i256> @test_sext2() {
+  %Se = sext <2 x i128> <i128 -2000, i128 -1999> to <2 x i256>
+  %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> <i32 1, i32 3>
+  ret <2 x i256> %Shuff
+
+  ; X64-LABEL: test_sext2
+  ; X64:       movq $-1
+  ; X64-NEXT:  movq $-1
+  ; X64-NEXT:  movq $-1
+  ; X64-NEXT:  movq $-1999
+
+  ; X32-LABEL: test_sext2
+  ; X32:       movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-1999
+}
+
+define <2 x i256> @test_zext1() {
+  %Se = zext <2 x i8> <i8 -1, i8 -2> to <2 x i256>
+  %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> <i32 1, i32 3>
+  ret <2 x i256> %Shuff
+
+  ; X64-LABEL: test_zext1
+  ; X64:       movq $0
+  ; X64-NEXT:  movq $0
+  ; X64-NEXT:  movq $0
+  ; X64-NEXT:  movq $254
+
+  ; X32-LABEL: test_zext1
+  ; X32:       movl $0
+  ; X32-NEXT:  movl $0
+  ; X32-NEXT:  movl $0
+  ; X32-NEXT:  movl $0
+  ; X32-NEXT:  movl $0
+  ; X32-NEXT:  movl $0
+  ; X32-NEXT:  movl $0
+  ; X32-NEXT:  movl $254
+}
+
+define <2 x i256> @test_zext2() {
+  %Se = zext <2 x i128> <i128 -1, i128 -2> to <2 x i256>
+  %Shuff = shufflevector <2 x i256> zeroinitializer, <2 x i256> %Se, <2 x i32> <i32 1, i32 3>
+  ret <2 x i256> %Shuff
+
+  ; X64-LABEL: test_zext2
+  ; X64:       movq $0
+  ; X64-NEXT:  movq $0
+  ; X64-NEXT:  movq $-1
+  ; X64-NEXT:  movq $-2
+
+  ; X32-LABEL: test_zext2
+  ; X32:       movl $0
+  ; X32-NEXT:  movl $0
+  ; X32-NEXT:  movl $0
+  ; X32-NEXT:  movl $0
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-1
+  ; X32-NEXT:  movl $-2
+}
diff --git a/test/CodeGen/X86/fold-vector-shl-crash.ll b/test/CodeGen/X86/fold-vector-shl-crash.ll
new file mode 100644
index 0000000..9f81e44
--- /dev/null
+++ b/test/CodeGen/X86/fold-vector-shl-crash.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86    | FileCheck %s
+
+;CHECK-LABEL: test
+define <2 x i256> @test() {
+  %S = shufflevector <2 x i256> zeroinitializer, <2 x i256> <i256 -1, i256 -1>, <2 x i32> <i32 0, i32 2>
+  %B = shl <2 x i256> %S, <i256 -1, i256 -1> ; DAG Combiner crashes here
+  ret <2 x i256> %B
+}
diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll
index 4f503af..27af573 100644
--- a/test/CodeGen/X86/fp-fast.ll
+++ b/test/CodeGen/X86/fp-fast.ll
@@ -114,81 +114,3 @@ define float @test11(float %a) {
   ret float %t2
 }
 
-; Verify that the first two adds are independent regardless of how the inputs are 
-; commuted. The destination registers are used as source registers for the third add.
-
-define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds1:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    retq
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %t0, %x2
-  %t2 = fadd float %t1, %x3
-  ret float %t2
-}
-
-define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds2:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    retq
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %x2, %t0
-  %t2 = fadd float %t1, %x3
-  ret float %t2
-}
-
-define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds3:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    retq
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %t0, %x2
-  %t2 = fadd float %x3, %t1
-  ret float %t2
-}
-
-define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds4:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    retq
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %x2, %t0
-  %t2 = fadd float %x3, %t1
-  ret float %t2
-}
-
-; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
-; produced because that would cost more compile time.
-
-define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
-; CHECK-LABEL: reassociate_adds5:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm5, %xmm4, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm7, %xmm6, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    retq
-  %t0 = fadd float %x0, %x1
-  %t1 = fadd float %t0, %x2
-  %t2 = fadd float %t1, %x3
-  %t3 = fadd float %t2, %x4
-  %t4 = fadd float %t3, %x5
-  %t5 = fadd float %t4, %x6
-  %t6 = fadd float %t5, %x7
-  ret float %t6
-}
diff --git a/test/CodeGen/X86/implicit-null-check-negative.ll b/test/CodeGen/X86/implicit-null-check-negative.ll
index e0210d9..8fbed9f 100644
--- a/test/CodeGen/X86/implicit-null-check-negative.ll
+++ b/test/CodeGen/X86/implicit-null-check-negative.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-apple-macosx -O3 -debug-only=faultmaps -enable-implicit-null-checks < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-macosx -O3 -debug-only=faultmaps -enable-implicit-null-checks < %s 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
 ; List cases where we should *not* be emitting implicit null checks.
@@ -10,7 +10,7 @@ define i32 @imp_null_check_load(i32* %x, i32* %y) {
   %c = icmp eq i32* %x, null
 ; It isn't legal to move the load from %x from "not_null" to here --
 ; the store to %y could be aliasing it.
-  br i1 %c, label %is_null, label %not_null
+  br i1 %c, label %is_null, label %not_null, !make.implicit !0
 
  is_null:
   ret i32 42
@@ -24,7 +24,7 @@ define i32 @imp_null_check_load(i32* %x, i32* %y) {
 define i32 @imp_null_check_gep_load(i32* %x) {
  entry:
   %c = icmp eq i32* %x, null
-  br i1 %c, label %is_null, label %not_null
+  br i1 %c, label %is_null, label %not_null, !make.implicit !0
 
  is_null:
   ret i32 42
@@ -38,8 +38,7 @@ define i32 @imp_null_check_gep_load(i32* %x) {
 }
 
 define i32 @imp_null_check_load_no_md(i32* %x) {
-; Everything is okay except that the !never.executed metadata is
-; missing.
+; This is fine, except it is missing the !make.implicit metadata.
  entry:
   %c = icmp eq i32* %x, null
   br i1 %c, label %is_null, label %not_null
@@ -51,3 +50,5 @@ define i32 @imp_null_check_load_no_md(i32* %x) {
   %t = load i32, i32* %x
   ret i32 %t
 }
+
+!0 = !{}
diff --git a/test/CodeGen/X86/implicit-null-check.ll b/test/CodeGen/X86/implicit-null-check.ll
index f4c5398..1d1b36b 100644
--- a/test/CodeGen/X86/implicit-null-check.ll
+++ b/test/CodeGen/X86/implicit-null-check.ll
@@ -1,5 +1,15 @@
 ; RUN: llc -O3 -mtriple=x86_64-apple-macosx -enable-implicit-null-checks < %s | FileCheck %s
 
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -enable-implicit-null-checks \
+; RUN:    | llvm-mc -triple x86_64-apple-macosx -filetype=obj -o - \
+; RUN:    | llvm-objdump -triple x86_64-apple-macosx -fault-map-section - \
+; RUN:    | FileCheck %s -check-prefix OBJDUMP
+
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -enable-implicit-null-checks \
+; RUN:    | llvm-mc -triple x86_64-unknown-linux-gnu -filetype=obj -o - \
+; RUN:    | llvm-objdump -triple x86_64-unknown-linux-gnu -fault-map-section - \
+; RUN:    | FileCheck %s -check-prefix OBJDUMP
+
 define i32 @imp_null_check_load(i32* %x) {
 ; CHECK-LABEL: _imp_null_check_load:
 ; CHECK: Ltmp1:
@@ -11,7 +21,7 @@ define i32 @imp_null_check_load(i32* %x) {
 
  entry:
   %c = icmp eq i32* %x, null
-  br i1 %c, label %is_null, label %not_null
+  br i1 %c, label %is_null, label %not_null, !make.implicit !0
 
  is_null:
   ret i32 42
@@ -32,7 +42,7 @@ define i32 @imp_null_check_gep_load(i32* %x) {
 
  entry:
   %c = icmp eq i32* %x, null
-  br i1 %c, label %is_null, label %not_null
+  br i1 %c, label %is_null, label %not_null, !make.implicit !0
 
  is_null:
   ret i32 42
@@ -55,7 +65,7 @@ define i32 @imp_null_check_add_result(i32* %x, i32 %p) {
 
  entry:
   %c = icmp eq i32* %x, null
-  br i1 %c, label %is_null, label %not_null
+  br i1 %c, label %is_null, label %not_null, !make.implicit !0
 
  is_null:
   ret i32 42
@@ -66,6 +76,8 @@ define i32 @imp_null_check_add_result(i32* %x, i32 %p) {
   ret i32 %p1
 }
 
+!0 = !{}
+
 ; CHECK-LABEL: __LLVM_FaultMaps:
 
 ; Version:
@@ -116,3 +128,13 @@ define i32 @imp_null_check_add_result(i32* %x, i32 %p) {
 ; CHECK-NEXT: .long Ltmp1-_imp_null_check_load
 ; Fault[0].HandlerOffset:
 ; CHECK-NEXT: .long Ltmp0-_imp_null_check_load
+
+; OBJDUMP: FaultMap table:
+; OBJDUMP-NEXT: Version: 0x1
+; OBJDUMP-NEXT: NumFunctions: 3
+; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1
+; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 5
+; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1
+; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 7
+; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1
+; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 3
diff --git a/test/CodeGen/X86/machine-combiner.ll b/test/CodeGen/X86/machine-combiner.ll
new file mode 100644
index 0000000..d4cd59f
--- /dev/null
+++ b/test/CodeGen/X86/machine-combiner.ll
@@ -0,0 +1,99 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s
+
+; Verify that the first two adds are independent regardless of how the inputs are
+; commuted. The destination registers are used as source registers for the third add.
+
+define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %t0, %x2
+  %t2 = fadd float %t1, %x3
+  ret float %t2
+}
+
+define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %x2, %t0
+  %t2 = fadd float %t1, %x3
+  ret float %t2
+}
+
+define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %t0, %x2
+  %t2 = fadd float %x3, %t1
+  ret float %t2
+}
+
+define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds4:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %x2, %t0
+  %t2 = fadd float %x3, %t1
+  ret float %t2
+}
+
+; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
+; produced because that would cost more compile time.
+
+define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
+; CHECK-LABEL: reassociate_adds5:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm5, %xmm4, %xmm1
+; CHECK-NEXT:    vaddss %xmm6, %xmm1, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm7, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %t0, %x2
+  %t2 = fadd float %t1, %x3
+  %t3 = fadd float %t2, %x4
+  %t4 = fadd float %t3, %x5
+  %t5 = fadd float %t4, %x6
+  %t6 = fadd float %t5, %x7
+  ret float %t6
+}
+
+; Verify that we only need two associative operations to reassociate the operands.
+; Also, we should reassociate such that the result of the high latency division
+; is used by the final 'add' rather than reassociating the %x3 operand with the
+; division. The latter reassociation would not improve anything.
+ 
+define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds6:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vdivss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %t0 = fdiv float %x0, %x1
+  %t1 = fadd float %x2, %t0
+  %t2 = fadd float %x3, %t1
+  ret float %t2
+}
+
diff --git a/test/CodeGen/X86/movtopush.ll b/test/CodeGen/X86/movtopush.ll
index f89e524..b02f9ec 100644
--- a/test/CodeGen/X86/movtopush.ll
+++ b/test/CodeGen/X86/movtopush.ll
@@ -2,11 +2,15 @@
 ; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
 ; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED 
 
+%class.Class = type { i32 }
+%struct.s = type { i64 }
+
 declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
 declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
+declare x86_thiscallcc void @thiscall(%class.Class* %class, i32 %a, i32 %b, i32 %c, i32 %d)
 declare void @oneparam(i32 %a)
 declare void @eightparams(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
-
+declare void @struct(%struct.s* byval %a, i32 %b, i32 %c, i32 %d)
 
 ; Here, we should have a reserved frame, so we don't expect pushes
 ; NORMAL-LABEL: test1:
@@ -108,13 +112,12 @@ entry:
   ret void
 }
 
-; We don't support weird calling conventions
+; We support weird calling conventions
 ; NORMAL-LABEL: test4:
-; NORMAL: subl    $12, %esp
-; NORMAL-NEXT: movl    $4, 8(%esp)
-; NORMAL-NEXT: movl    $3, 4(%esp)
-; NORMAL-NEXT: movl    $1, (%esp)
-; NORMAL-NEXT: movl    $2, %eax
+; NORMAL: movl    $2, %eax
+; NORMAL-NEXT: pushl   $4
+; NORMAL-NEXT: pushl   $3
+; NORMAL-NEXT: pushl   $1
 ; NORMAL-NEXT: call
 ; NORMAL-NEXT: addl $12, %esp
 define void @test4() optsize {
@@ -123,6 +126,20 @@ entry:
   ret void
 }
 
+; NORMAL-LABEL: test4b:
+; NORMAL: movl 4(%esp), %ecx
+; NORMAL-NEXT: pushl   $4
+; NORMAL-NEXT: pushl   $3
+; NORMAL-NEXT: pushl   $2
+; NORMAL-NEXT: pushl   $1
+; NORMAL-NEXT: call
+; NORMAL-NEXT: ret
+define void @test4b(%class.Class* %f) optsize {
+entry:
+  call x86_thiscallcc void @thiscall(%class.Class* %f, i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
 ; When there is no reserved call frame, check that additional alignment
 ; is added when the pushes don't add up to the required alignment.
 ; ALIGNED-LABEL: test5:
@@ -229,20 +246,27 @@ entry:
 ; NORMAL-NEXT: pushl $1
 ; NORMAL-NEXT: call
 ; NORMAL-NEXT: addl $16, %esp
-; NORMAL-NEXT: subl $16, %esp
-; NORMAL-NEXT: leal 16(%esp), [[EAX:%e..]]
-; NORMAL-NEXT: movl    [[EAX]], 12(%esp)
-; NORMAL-NEXT: movl    $7, 8(%esp)
-; NORMAL-NEXT: movl    $6, 4(%esp)
-; NORMAL-NEXT: movl    $5, (%esp)
+; NORMAL-NEXT: subl $20, %esp
+; NORMAL-NEXT: movl 20(%esp), [[E1:%e..]]
+; NORMAL-NEXT: movl 24(%esp), [[E2:%e..]]
+; NORMAL-NEXT: movl    [[E2]], 4(%esp)
+; NORMAL-NEXT: movl    [[E1]], (%esp)
+; NORMAL-NEXT: leal 32(%esp), [[E3:%e..]]
+; NORMAL-NEXT: movl    [[E3]], 16(%esp)
+; NORMAL-NEXT: leal 28(%esp), [[E4:%e..]]
+; NORMAL-NEXT: movl    [[E4]], 12(%esp)
+; NORMAL-NEXT: movl    $6, 8(%esp)
 ; NORMAL-NEXT: call
-; NORMAL-NEXT: addl $16, %esp
+; NORMAL-NEXT: addl $20, %esp
 define void @test9() optsize {
 entry:
   %p = alloca i32, align 4
+  %q = alloca i32, align 4
+  %s = alloca %struct.s, align 4  
   call void @good(i32 1, i32 2, i32 3, i32 4)
-  %0 = ptrtoint i32* %p to i32
-  call void @good(i32 5, i32 6, i32 7, i32 %0)
+  %pv = ptrtoint i32* %p to i32
+  %qv = ptrtoint i32* %q to i32
+  call void @struct(%struct.s* byval %s, i32 6, i32 %qv, i32 %pv)
   ret void
 }
 
@@ -291,28 +315,17 @@ define void @test11() optsize {
 ; Converting one mov into a push isn't worth it when 
 ; doing so forces too much overhead for other calls.
 ; NORMAL-LABEL: test12:
-; NORMAL: subl    $16, %esp
-; NORMAL-NEXT: movl    $4, 8(%esp)
-; NORMAL-NEXT: movl    $3, 4(%esp)
-; NORMAL-NEXT: movl    $1, (%esp)
-; NORMAL-NEXT: movl    $2, %eax
-; NORMAL-NEXT: calll _inreg
-; NORMAL-NEXT: movl    $8, 12(%esp)
+; NORMAL: movl    $8, 12(%esp)
 ; NORMAL-NEXT: movl    $7, 8(%esp)
 ; NORMAL-NEXT: movl    $6, 4(%esp)
 ; NORMAL-NEXT: movl    $5, (%esp)
 ; NORMAL-NEXT: calll _good
-; NORMAL-NEXT: movl    $12, 8(%esp)
-; NORMAL-NEXT: movl    $11, 4(%esp)
-; NORMAL-NEXT: movl    $9, (%esp)
-; NORMAL-NEXT: movl    $10, %eax
-; NORMAL-NEXT: calll _inreg
-; NORMAL-NEXT: addl $16, %esp
 define void @test12() optsize {
 entry:
-  call void @inreg(i32 1, i32 2, i32 3, i32 4)
+  %s = alloca %struct.s, align 4  
+  call void @struct(%struct.s* %s, i32 2, i32 3, i32 4)
   call void @good(i32 5, i32 6, i32 7, i32 8)
-  call void @inreg(i32 9, i32 10, i32 11, i32 12)
+  call void @struct(%struct.s* %s, i32 10, i32 11, i32 12)
   ret void
 }
 
@@ -324,13 +337,12 @@ entry:
 ; NORMAL-NEXT: pushl    $1
 ; NORMAL-NEXT: calll _good
 ; NORMAL-NEXT: addl    $16, %esp
-; NORMAL-NEXT: subl    $12, %esp
-; NORMAL-NEXT: movl    $8, 8(%esp)
-; NORMAL-NEXT: movl    $7, 4(%esp)
-; NORMAL-NEXT: movl    $5, (%esp)
-; NORMAL-NEXT: movl    $6, %eax
-; NORMAL-NEXT: calll _inreg
-; NORMAL-NEXT: addl    $12, %esp
+; NORMAL-NEXT: subl    $20, %esp
+; NORMAL: movl    $8, 16(%esp)
+; NORMAL-NEXT: movl    $7, 12(%esp)
+; NORMAL-NEXT: movl    $6, 8(%esp)
+; NORMAL-NEXT: calll _struct
+; NORMAL-NEXT: addl    $20, %esp
 ; NORMAL-NEXT: pushl    $12
 ; NORMAL-NEXT: pushl    $11
 ; NORMAL-NEXT: pushl    $10
@@ -339,8 +351,9 @@ entry:
 ; NORMAL-NEXT: addl $16, %esp
 define void @test12b() optsize {
 entry:
-  call void @good(i32 1, i32 2, i32 3, i32 4)
-  call void @inreg(i32 5, i32 6, i32 7, i32 8)
+  %s = alloca %struct.s, align 4  
+  call void @good(i32 1, i32 2, i32 3, i32 4)  
+  call void @struct(%struct.s* %s, i32 6, i32 7, i32 8)
   call void @good(i32 9, i32 10, i32 11, i32 12)
   ret void
 }
diff --git a/test/CodeGen/X86/or-branch.ll b/test/CodeGen/X86/or-branch.ll
index ae3ed3f..9db948a 100644
--- a/test/CodeGen/X86/or-branch.ll
+++ b/test/CodeGen/X86/or-branch.ll
@@ -1,19 +1,28 @@
-; RUN: llc < %s -march=x86  | not grep set
+; RUN: llc < %s -mtriple=i386-unknown-unknown -jump-is-expensive=0 | FileCheck %s --check-prefix=JUMP2
+; RUN: llc < %s -mtriple=i386-unknown-unknown -jump-is-expensive=1 | FileCheck %s --check-prefix=JUMP1
 
 define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
+; JUMP2-LABEL: foo:
+; JUMP2-DAG:     jl
+; JUMP2-DAG:     je
+;
+; JUMP1-LABEL: foo:
+; JUMP1-DAG:     sete
+; JUMP1-DAG:     setl
+; JUMP1:         orb
+; JUMP1:         jne
 entry:
-	%tmp = tail call i32 (...) @bar( )		; <i32> [#uses=0]
-	%tmp.upgrd.1 = icmp eq i32 %X, 0		; <i1> [#uses=1]
-	%tmp3 = icmp slt i32 %Y, 5		; <i1> [#uses=1]
-	%tmp4 = or i1 %tmp3, %tmp.upgrd.1		; <i1> [#uses=1]
-	br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock
+  %tmp1 = icmp eq i32 %X, 0
+  %tmp3 = icmp slt i32 %Y, 5
+  %tmp4 = or i1 %tmp3, %tmp1
+  br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock
 
-cond_true:		; preds = %entry
-	%tmp5 = tail call i32 (...) @bar( )		; <i32> [#uses=0]
-	ret void
+cond_true:
+  %tmp5 = tail call i32 (...) @bar( )
+  ret void
 
-UnifiedReturnBlock:		; preds = %entry
-	ret void
+UnifiedReturnBlock:
+  ret void
 }
 
 declare i32 @bar(...)
diff --git a/test/CodeGen/X86/pr23900.ll b/test/CodeGen/X86/pr23900.ll
new file mode 100644
index 0000000..cbc7716
--- /dev/null
+++ b/test/CodeGen/X86/pr23900.ll
@@ -0,0 +1,29 @@
+; RUN: llc -filetype=obj %s -o %t.o
+; RUN: llvm-nm %t.o | FileCheck %s
+
+; Test that it doesn't crash (and produces an object file).
+; This use to pass a symbol with a null name to code that expected a valid
+; C string.
+
+; CHECK:         U __CxxFrameHandler3
+; CHECK:         T f
+; CHECK:         t f.cleanup
+; CHECK:         U g
+; CHECK:         U h
+
+
+target triple = "x86_64-pc-windows-msvc18.0.0"
+define void @f(i32 %x) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+  invoke void @h()
+          to label %invoke.cont unwind label %lpad
+invoke.cont:
+  ret void
+lpad:
+ landingpad { i8*, i32 }
+          cleanup
+  call void @g(i32 %x)
+  ret void
+}
+declare void @h()
+declare i32 @__CxxFrameHandler3(...)
+declare void @g(i32 %x)
diff --git a/test/CodeGen/X86/recip-fastmath.ll b/test/CodeGen/X86/recip-fastmath.ll
index 7f1521a..8e02dad 100644
--- a/test/CodeGen/X86/recip-fastmath.ll
+++ b/test/CodeGen/X86/recip-fastmath.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE
 
@@ -14,11 +14,11 @@ define float @reciprocal_estimate(float %x) #0 {
   %div = fdiv fast float 1.0, %x
   ret float %div
 
-; CHECK-LABEL: reciprocal_estimate:
-; CHECK: movss
-; CHECK-NEXT: divss
-; CHECK-NEXT: movaps
-; CHECK-NEXT: retq
+; NORECIP-LABEL: reciprocal_estimate:
+; NORECIP: movss
+; NORECIP-NEXT: divss
+; NORECIP-NEXT: movaps
+; NORECIP-NEXT: retq
 
 ; RECIP-LABEL: reciprocal_estimate:
 ; RECIP: vrcpss
@@ -45,11 +45,11 @@ define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
   %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <4 x float> %div
 
-; CHECK-LABEL: reciprocal_estimate_v4f32:
-; CHECK: movaps
-; CHECK-NEXT: divps
-; CHECK-NEXT: movaps
-; CHECK-NEXT: retq
+; NORECIP-LABEL: reciprocal_estimate_v4f32:
+; NORECIP: movaps
+; NORECIP-NEXT: divps
+; NORECIP-NEXT: movaps
+; NORECIP-NEXT: retq
 
 ; RECIP-LABEL: reciprocal_estimate_v4f32:
 ; RECIP: vrcpps
@@ -76,14 +76,14 @@ define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
   %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <8 x float> %div
 
-; CHECK-LABEL: reciprocal_estimate_v8f32:
-; CHECK: movaps
-; CHECK: movaps
-; CHECK-NEXT: divps
-; CHECK-NEXT: divps
-; CHECK-NEXT: movaps
-; CHECK-NEXT: movaps
-; CHECK-NEXT: retq
+; NORECIP-LABEL: reciprocal_estimate_v8f32:
+; NORECIP: movaps
+; NORECIP: movaps
+; NORECIP-NEXT: divps
+; NORECIP-NEXT: divps
+; NORECIP-NEXT: movaps
+; NORECIP-NEXT: movaps
+; NORECIP-NEXT: retq
 
 ; RECIP-LABEL: reciprocal_estimate_v8f32:
 ; RECIP: vrcpps
diff --git a/test/CodeGen/X86/rrlist-livereg-corrutpion.ll b/test/CodeGen/X86/rrlist-livereg-corrutpion.ll
new file mode 100644
index 0000000..7191e04
--- /dev/null
+++ b/test/CodeGen/X86/rrlist-livereg-corrutpion.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK-LABEL: test
+define i64 @test(i64 %a, i256 %b, i1 %c) {
+  %u = zext i64 %a to i256
+  %s = add i256 %u, 1
+  %o = trunc i256 %s to i1
+  %j = add i256 %s, 1
+  %i = icmp ule i64 %a, 1
+  %f = select i1 %o, i256 undef, i256 %j
+  %d = select i1 %i, i256 %f, i256 1
+  %e = add i256 %b, 1
+  %n = select i1 %c, i256 %e, i256 %b
+  %m = trunc i256 %n to i64
+  %h = add i64 %m, 1
+  %r = zext i64 %h to i256
+  %v = lshr i256 %d, %r
+  %t = trunc i256 %v to i1
+  %q = shl i256 1, %r
+  %p = and i256 %d, %q
+  %w = icmp ule i256 %n, 1
+  %y = select i1 %t, i256 undef, i256 %p
+  %x = select i1 %w, i256 %y, i256 %d
+  %z = trunc i256 %x to i64
+  ret i64 %z
+}
diff --git a/test/CodeGen/X86/sdiv-exact.ll b/test/CodeGen/X86/sdiv-exact.ll
index 4f8d3f0..a6ace5b 100644
--- a/test/CodeGen/X86/sdiv-exact.ll
+++ b/test/CodeGen/X86/sdiv-exact.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86 < %s | FileCheck %s
+; RUN: llc -march=x86 -mattr=+sse2 < %s | FileCheck %s
 
 define i32 @test1(i32 %x) {
   %div = sdiv exact i32 %x, 25
@@ -16,3 +16,14 @@ define i32 @test2(i32 %x) {
 ; CHECK-NEXT: imull	$-1431655765
 ; CHECK-NEXT: ret
 }
+
+define <4 x i32> @test3(<4 x i32> %x) {
+  %div = sdiv exact <4 x i32> %x, <i32 24, i32 24, i32 24, i32 24>
+  ret <4 x i32> %div
+; CHECK-LABEL: test3:
+; CHECK: psrad	$3,
+; CHECK: pmuludq
+; CHECK: pmuludq
+; CHECK-NOT: psrad
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/seh-catch-all-win32.ll b/test/CodeGen/X86/seh-catch-all-win32.ll
index 28b0bca..423b991 100644
--- a/test/CodeGen/X86/seh-catch-all-win32.ll
+++ b/test/CodeGen/X86/seh-catch-all-win32.ll
@@ -12,7 +12,7 @@ declare i32 @llvm.eh.typeid.for(i8*)
 declare i8* @llvm.frameaddress(i32)
 declare i8* @llvm.framerecover(i8*, i8*, i32)
 declare void @llvm.frameescape(...)
-declare i8* @llvm.x86.seh.exceptioninfo(i8*, i8*)
+declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
 
 define i32 @main() personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
 entry:
@@ -43,14 +43,16 @@ eh.resume:                                        ; preds = %lpad
 
 define internal i32 @"filt$main"() {
 entry:
-  %0 = tail call i8* @llvm.frameaddress(i32 1)
-  %1 = tail call i8* @llvm.framerecover(i8* bitcast (i32 ()* @main to i8*), i8* %0, i32 0)
-  %__exceptioncode = bitcast i8* %1 to i32*
-  %2 = tail call i8* @llvm.x86.seh.exceptioninfo(i8* bitcast (i32 ()* @main to i8*), i8* %0)
-  %3 = bitcast i8* %2 to i32**
-  %4 = load i32*, i32** %3, align 4
-  %5 = load i32, i32* %4, align 4
-  store i32 %5, i32* %__exceptioncode, align 4
+  %ebp = tail call i8* @llvm.frameaddress(i32 1)
+  %parentfp = tail call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %ebp)
+  %code.i8 = tail call i8* @llvm.framerecover(i8* bitcast (i32 ()* @main to i8*), i8* %parentfp, i32 0)
+  %__exceptioncode = bitcast i8* %code.i8 to i32*
+  %info.addr = getelementptr inbounds i8, i8* %ebp, i32 -20
+  %0 = bitcast i8* %info.addr to i32***
+  %1 = load i32**, i32*** %0, align 4
+  %2 = load i32*, i32** %1, align 4
+  %3 = load i32, i32* %2, align 4
+  store i32 %3, i32* %__exceptioncode, align 4
   ret i32 1
 }
 
@@ -76,10 +78,17 @@ entry:
 ; CHECK: calll _printf
 
 ; CHECK: .section .xdata,"dr"
+; CHECK: Lmain$parent_frame_offset = Lmain$frame_escape_1
 ; CHECK: L__ehtable$main
 ; CHECK-NEXT: .long -1
 ; CHECK-NEXT: .long _filt$main
 ; CHECK-NEXT: .long Ltmp{{[0-9]+}}
 
 ; CHECK-LABEL: _filt$main:
-; CHECK: movl
+; CHECK: pushl %ebp
+; CHECK: movl %esp, %ebp
+; CHECK: movl (%ebp), %[[oldebp:[a-z]+]]
+; CHECK: movl -20(%[[oldebp]]), %[[ehinfo:[a-z]+]]
+; CHECK: movl (%[[ehinfo]]), %[[ehrec:[a-z]+]]
+; CHECK: movl (%[[ehrec]]), %[[ehcode:[a-z]+]]
+; CHECK: movl %[[ehcode]], {{.*}}(%{{.*}})
diff --git a/test/CodeGen/X86/seh-filter-no-personality.ll b/test/CodeGen/X86/seh-filter-no-personality.ll
new file mode 100644
index 0000000..87bc9c9
--- /dev/null
+++ b/test/CodeGen/X86/seh-filter-no-personality.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s
+
+; Mostly make sure that llvm.x86.seh.recoverfp doesn't crash if the parent
+; function lacks a personality.
+
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
+
+define i32 @main() {
+entry:
+  ret i32 0
+}
+
+define internal i32 @"filt$main"() {
+entry:
+  %ebp = tail call i8* @llvm.frameaddress(i32 1)
+  %parentfp = tail call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %ebp)
+  %info.addr = getelementptr inbounds i8, i8* %ebp, i32 -20
+  %0 = bitcast i8* %info.addr to i32***
+  %1 = load i32**, i32*** %0, align 4
+  %2 = load i32*, i32** %1, align 4
+  %3 = load i32, i32* %2, align 4
+  %matches = icmp eq i32 %3, u0xC0000005
+  %r = zext i1 %matches to i32
+  ret i32 %r
+}
+
+; CHECK: _main:
+; CHECK: xorl %eax, %eax
+; CHECK: retl
+
+; CHECK: _filt$main:
+; CHECK: retl
diff --git a/test/CodeGen/X86/seh-safe-div-win32.ll b/test/CodeGen/X86/seh-safe-div-win32.ll
index 0f76ec0..b1bcde2 100644
--- a/test/CodeGen/X86/seh-safe-div-win32.ll
+++ b/test/CodeGen/X86/seh-safe-div-win32.ll
@@ -122,27 +122,30 @@ entry:
 ;     ...
 ;   } EXCEPTION_RECORD;
 
-; FIXME: Use llvm.eh.exceptioninfo for this.
-declare i32 @safe_div_filt0()
-declare i32 @safe_div_filt1()
-; define i32 @safe_div_filt0() {
-;   %eh_ptrs_c = bitcast i8* %eh_ptrs to i32**
-;   %eh_rec = load i32*, i32** %eh_ptrs_c
-;   %eh_code = load i32, i32* %eh_rec
-;   ; EXCEPTION_ACCESS_VIOLATION = 0xC0000005
-;   %cmp = icmp eq i32 %eh_code, 3221225477
-;   %filt.res = zext i1 %cmp to i32
-;   ret i32 %filt.res
-; }
-; define i32 @safe_div_filt1() {
-;   %eh_ptrs_c = bitcast i8* %eh_ptrs to i32**
-;   %eh_rec = load i32*, i32** %eh_ptrs_c
-;   %eh_code = load i32, i32* %eh_rec
-;   ; EXCEPTION_INT_DIVIDE_BY_ZERO = 0xC0000094
-;   %cmp = icmp eq i32 %eh_code, 3221225620
-;   %filt.res = zext i1 %cmp to i32
-;   ret i32 %filt.res
-; }
+define i32 @safe_div_filt0() {
+  %ebp = call i8* @llvm.frameaddress(i32 1)
+  %eh_ptrs.addr.i8 = getelementptr inbounds i8, i8* %ebp, i32 -20
+  %eh_ptrs.addr = bitcast i8* %eh_ptrs.addr.i8 to i32***
+  %eh_ptrs = load i32**, i32*** %eh_ptrs.addr
+  %eh_rec = load i32*, i32** %eh_ptrs
+  %eh_code = load i32, i32* %eh_rec
+  ; EXCEPTION_ACCESS_VIOLATION = 0xC0000005
+  %cmp = icmp eq i32 %eh_code, 3221225477
+  %filt.res = zext i1 %cmp to i32
+  ret i32 %filt.res
+}
+define i32 @safe_div_filt1() {
+  %ebp = call i8* @llvm.frameaddress(i32 1)
+  %eh_ptrs.addr.i8 = getelementptr inbounds i8, i8* %ebp, i32 -20
+  %eh_ptrs.addr = bitcast i8* %eh_ptrs.addr.i8 to i32***
+  %eh_ptrs = load i32**, i32*** %eh_ptrs.addr
+  %eh_rec = load i32*, i32** %eh_ptrs
+  %eh_code = load i32, i32* %eh_rec
+  ; EXCEPTION_INT_DIVIDE_BY_ZERO = 0xC0000094
+  %cmp = icmp eq i32 %eh_code, 3221225620
+  %filt.res = zext i1 %cmp to i32
+  ret i32 %filt.res
+}
 
 @str_result = internal constant [21 x i8] c"safe_div result: %d\0A\00"
 
@@ -170,3 +173,4 @@ declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind
 declare void @puts(i8*)
 declare void @printf(i8*, ...)
 declare void @abort()
+declare i8* @llvm.frameaddress(i32)
diff --git a/test/CodeGen/X86/shift-combine.ll b/test/CodeGen/X86/shift-combine.ll
index ec62bcd..4330104 100644
--- a/test/CodeGen/X86/shift-combine.ll
+++ b/test/CodeGen/X86/shift-combine.ll
@@ -17,3 +17,62 @@ entry:
   ret i32 %tmp5
 }
 
+define i32* @test_exact1(i32 %a, i32 %b, i32* %x)  {
+; CHECK-LABEL: test_exact1:
+; CHECK: sarl %
+
+  %sub = sub i32 %b, %a
+  %shr = ashr exact i32 %sub, 3
+  %gep = getelementptr inbounds i32, i32* %x, i32 %shr
+  ret i32* %gep
+}
+
+define i32* @test_exact2(i32 %a, i32 %b, i32* %x)  {
+; CHECK-LABEL: test_exact2:
+; CHECK: sarl %
+
+  %sub = sub i32 %b, %a
+  %shr = ashr exact i32 %sub, 3
+  %gep = getelementptr inbounds i32, i32* %x, i32 %shr
+  ret i32* %gep
+}
+
+define i32* @test_exact3(i32 %a, i32 %b, i32* %x)  {
+; CHECK-LABEL: test_exact3:
+; CHECK-NOT: sarl
+
+  %sub = sub i32 %b, %a
+  %shr = ashr exact i32 %sub, 2
+  %gep = getelementptr inbounds i32, i32* %x, i32 %shr
+  ret i32* %gep
+}
+
+define i32* @test_exact4(i32 %a, i32 %b, i32* %x)  {
+; CHECK-LABEL: test_exact4:
+; CHECK: shrl %
+
+  %sub = sub i32 %b, %a
+  %shr = lshr exact i32 %sub, 3
+  %gep = getelementptr inbounds i32, i32* %x, i32 %shr
+  ret i32* %gep
+}
+
+define i32* @test_exact5(i32 %a, i32 %b, i32* %x)  {
+; CHECK-LABEL: test_exact5:
+; CHECK: shrl %
+
+  %sub = sub i32 %b, %a
+  %shr = lshr exact i32 %sub, 3
+  %gep = getelementptr inbounds i32, i32* %x, i32 %shr
+  ret i32* %gep
+}
+
+define i32* @test_exact6(i32 %a, i32 %b, i32* %x)  {
+; CHECK-LABEL: test_exact6:
+; CHECK-NOT: shrl
+
+  %sub = sub i32 %b, %a
+  %shr = lshr exact i32 %sub, 2
+  %gep = getelementptr inbounds i32, i32* %x, i32 %shr
+  ret i32* %gep
+}
diff --git a/test/CodeGen/X86/sqrt-fastmath.ll b/test/CodeGen/X86/sqrt-fastmath.ll
index 373fa53..0f8d9f4 100644
--- a/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/test/CodeGen/X86/sqrt-fastmath.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!sqrtf,!vec-sqrtf,!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=sqrtf,vec-sqrtf | FileCheck %s --check-prefix=ESTIMATE
 
 declare double @__sqrt_finite(double) #0
@@ -10,10 +10,10 @@ declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
 
 
 define double @fd(double %d) #0 {
-; CHECK-LABEL: fd:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    sqrtsd %xmm0, %xmm0
-; CHECK-NEXT:    retq
+; NORECIP-LABEL: fd:
+; NORECIP:       # BB#0:
+; NORECIP-NEXT:    sqrtsd %xmm0, %xmm0
+; NORECIP-NEXT:    retq
 ;
 ; ESTIMATE-LABEL: fd:
 ; ESTIMATE:       # BB#0:
@@ -25,10 +25,10 @@ define double @fd(double %d) #0 {
 
 
 define float @ff(float %f) #0 {
-; CHECK-LABEL: ff:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    sqrtss %xmm0, %xmm0
-; CHECK-NEXT:    retq
+; NORECIP-LABEL: ff:
+; NORECIP:       # BB#0:
+; NORECIP-NEXT:    sqrtss %xmm0, %xmm0
+; NORECIP-NEXT:    retq
 ;
 ; ESTIMATE-LABEL: ff:
 ; ESTIMATE:       # BB#0:
@@ -49,11 +49,11 @@ define float @ff(float %f) #0 {
 
 
 define x86_fp80 @fld(x86_fp80 %ld) #0 {
-; CHECK-LABEL: fld:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    fsqrt
-; CHECK-NEXT:    retq
+; NORECIP-LABEL: fld:
+; NORECIP:       # BB#0:
+; NORECIP-NEXT:    fldt {{[0-9]+}}(%rsp)
+; NORECIP-NEXT:    fsqrt
+; NORECIP-NEXT:    retq
 ;
 ; ESTIMATE-LABEL: fld:
 ; ESTIMATE:       # BB#0:
@@ -67,12 +67,12 @@ define x86_fp80 @fld(x86_fp80 %ld) #0 {
 
 
 define float @reciprocal_square_root(float %x) #0 {
-; CHECK-LABEL: reciprocal_square_root:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    sqrtss %xmm0, %xmm1
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    divss %xmm1, %xmm0
-; CHECK-NEXT:    retq
+; NORECIP-LABEL: reciprocal_square_root:
+; NORECIP:       # BB#0:
+; NORECIP-NEXT:    sqrtss %xmm0, %xmm1
+; NORECIP-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; NORECIP-NEXT:    divss %xmm1, %xmm0
+; NORECIP-NEXT:    retq
 ;
 ; ESTIMATE-LABEL: reciprocal_square_root:
 ; ESTIMATE:       # BB#0:
@@ -89,12 +89,12 @@ define float @reciprocal_square_root(float %x) #0 {
 }
 
 define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 {
-; CHECK-LABEL: reciprocal_square_root_v4f32:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    sqrtps %xmm0, %xmm1
-; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; CHECK-NEXT:    divps %xmm1, %xmm0
-; CHECK-NEXT:    retq
+; NORECIP-LABEL: reciprocal_square_root_v4f32:
+; NORECIP:       # BB#0:
+; NORECIP-NEXT:    sqrtps %xmm0, %xmm1
+; NORECIP-NEXT:    movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; NORECIP-NEXT:    divps %xmm1, %xmm0
+; NORECIP-NEXT:    retq
 ;
 ; ESTIMATE-LABEL: reciprocal_square_root_v4f32:
 ; ESTIMATE:       # BB#0:
@@ -111,15 +111,15 @@ define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 {
 }
 
 define <8 x float> @reciprocal_square_root_v8f32(<8 x float> %x) #0 {
-; CHECK-LABEL: reciprocal_square_root_v8f32:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    sqrtps %xmm1, %xmm2
-; CHECK-NEXT:    sqrtps %xmm0, %xmm3
-; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; CHECK-NEXT:    movaps %xmm1, %xmm0
-; CHECK-NEXT:    divps %xmm3, %xmm0
-; CHECK-NEXT:    divps %xmm2, %xmm1
-; CHECK-NEXT:    retq
+; NORECIP-LABEL: reciprocal_square_root_v8f32:
+; NORECIP:       # BB#0:
+; NORECIP-NEXT:    sqrtps %xmm1, %xmm2
+; NORECIP-NEXT:    sqrtps %xmm0, %xmm3
+; NORECIP-NEXT:    movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; NORECIP-NEXT:    movaps %xmm1, %xmm0
+; NORECIP-NEXT:    divps %xmm3, %xmm0
+; NORECIP-NEXT:    divps %xmm2, %xmm1
+; NORECIP-NEXT:    retq
 ;
 ; ESTIMATE-LABEL: reciprocal_square_root_v8f32:
 ; ESTIMATE:       # BB#0:
diff --git a/test/CodeGen/X86/stack-folding-fp-sse42.ll b/test/CodeGen/X86/stack-folding-fp-sse42.ll
index 95f0c3d..63acf5f 100644
--- a/test/CodeGen/X86/stack-folding-fp-sse42.ll
+++ b/test/CodeGen/X86/stack-folding-fp-sse42.ll
@@ -314,7 +314,13 @@ define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
 }
 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
 
-; TODO stack_fold_cvtsd2ss
+define float @stack_fold_cvtsd2ss(double %a0) optsize {
+  ;CHECK-LABEL: stack_fold_cvtsd2ss
+  ;CHECK:       cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = fptrunc double %a0 to float
+  ret float %2
+}
 
 define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize {
   ;CHECK-LABEL: stack_fold_cvtsd2ss_int
diff --git a/test/CodeGen/X86/stack-folding-int-avx2.ll b/test/CodeGen/X86/stack-folding-int-avx2.ll
index e930d24..a164fbb 100644
--- a/test/CodeGen/X86/stack-folding-int-avx2.ll
+++ b/test/CodeGen/X86/stack-folding-int-avx2.ll
@@ -867,9 +867,21 @@ define <8 x i32> @stack_fold_pshufd(<8 x i32> %a0) {
   ret <8 x i32> %2
 }
 
-; TODO stack_fold_pshufhw
+define <16 x i16> @stack_fold_vpshufhw(<16 x i16> %a0) {
+  ;CHECK-LABEL: stack_fold_vpshufhw
+  ;CHECK:       vpshufhw $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12>
+  ret <16 x i16> %2
+}
 
-; TODO stack_fold_pshuflw
+define <16 x i16> @stack_fold_vpshuflw(<16 x i16> %a0) {
+  ;CHECK-LABEL: stack_fold_vpshuflw
+  ;CHECK:       vpshuflw $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i16> %2
+}
 
 define <32 x i8> @stack_fold_psignb(<32 x i8> %a0, <32 x i8> %a1) {
   ;CHECK-LABEL: stack_fold_psignb
diff --git a/test/CodeGen/X86/statepoint-stackmap-format.ll b/test/CodeGen/X86/statepoint-stackmap-format.ll
index 6bb0d89..e18476c 100644
--- a/test/CodeGen/X86/statepoint-stackmap-format.ll
+++ b/test/CodeGen/X86/statepoint-stackmap-format.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -mtriple="x86_64-pc-linux-gnu" | FileCheck %s
+; RUN: llc < %s -mtriple="x86_64-pc-win64-coff" | FileCheck %s
+
 ; This test is a sanity check to ensure statepoints are generating StackMap
 ; sections correctly.  This is not intended to be a rigorous test of the 
 ; StackMap format (see the stackmap tests for that).
 
 target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
 
 declare zeroext i1 @return_i1()
 
diff --git a/test/CodeGen/X86/system-intrinsics-64.ll b/test/CodeGen/X86/system-intrinsics-64.ll
new file mode 100644
index 0000000..96c4417
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-64.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+define void @test_fxsave(i8* %ptr) {
+; CHECK-LABEL: test_fxsave
+; CHECK: fxsave
+  call void @llvm.x86.fxsave(i8* %ptr)
+  ret void;
+}
+declare void @llvm.x86.fxsave(i8*)
+
+define void @test_fxsave64(i8* %ptr) {
+; CHECK-LABEL: test_fxsave64
+; CHECK: fxsave64
+  call void @llvm.x86.fxsave64(i8* %ptr)
+  ret void;
+}
+declare void @llvm.x86.fxsave64(i8*)
+
+define void @test_fxrstor(i8* %ptr) {
+; CHECK-LABEL: test_fxrstor
+; CHECK: fxrstor
+  call void @llvm.x86.fxrstor(i8* %ptr)
+  ret void;
+}
+declare void @llvm.x86.fxrstor(i8*)
+
+define void @test_fxrstor64(i8* %ptr) {
+; CHECK-LABEL: test_fxrstor64
+; CHECK: fxrstor64
+  call void @llvm.x86.fxrstor64(i8* %ptr)
+  ret void;
+}
+declare void @llvm.x86.fxrstor64(i8*)
diff --git a/test/CodeGen/X86/system-intrinsics.ll b/test/CodeGen/X86/system-intrinsics.ll
new file mode 100644
index 0000000..84fcd05
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown   | FileCheck %s
+
+define void @test_fxsave(i8* %ptr) {
+; CHECK-LABEL: test_fxsave
+; CHECK: fxsave
+  call void @llvm.x86.fxsave(i8* %ptr)
+  ret void;
+}
+declare void @llvm.x86.fxsave(i8*)
+
+define void @test_fxrstor(i8* %ptr) {
+; CHECK-LABEL: test_fxrstor
+; CHECK: fxrstor
+  call void @llvm.x86.fxrstor(i8* %ptr)
+  ret void;
+}
+declare void @llvm.x86.fxrstor(i8*)
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index b5ca027..5779cf3 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -25,8 +25,7 @@ define i32 @test2(i32 inreg %a, i32 inreg %b, i32 %c, i32 %d) nounwind {
 entry:
 ; CHECK-LABEL: test2:
 ; CHECK: leal
-; CHECK-NOT: leal
-; CHECK-NOT: mov
+; CHECK-NEXT: addl
 ; CHECK-NEXT: addl
 ; CHECK-NEXT: ret
  %add = add i32 %b, %a
diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll
index 8dded07..ca8be65 100644
--- a/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/test/CodeGen/X86/vec_int_to_fp.ll
@@ -50,31 +50,15 @@ define <2 x double> @sitofp_2vf64_i32(<4 x i32> %a) {
 define <2 x double> @sitofp_2vf64_i16(<8 x i16> %a) {
 ; SSE2-LABEL: sitofp_2vf64_i16:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm1, %rax
-; SSE2-NEXT:    movswq %ax, %rax
-; SSE2-NEXT:    movd %xmm0, %rcx
-; SSE2-NEXT:    movswq %cx, %rcx
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2sdq %rcx, %xmm0
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    cvtsi2sdq %rax, %xmm1
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: sitofp_2vf64_i16:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; AVX-NEXT:    vmovq %xmm0, %rax
-; AVX-NEXT:    movswq %ax, %rax
-; AVX-NEXT:    vpextrq $1, %xmm0, %rcx
-; AVX-NEXT:    movswq %cx, %rcx
-; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vcvtsi2sdq %rcx, %xmm0, %xmm0
-; AVX-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm1
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
+; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
   %cvt = sitofp <2 x i16> %shuf to <2 x double>
@@ -86,30 +70,14 @@ define <2 x double> @sitofp_2vf64_i8(<16 x i8> %a) {
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm1, %rax
-; SSE2-NEXT:    movsbq %al, %rax
-; SSE2-NEXT:    movd %xmm0, %rcx
-; SSE2-NEXT:    movsbq %cl, %rcx
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2sdq %rcx, %xmm0
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    cvtsi2sdq %rax, %xmm1
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    psrad $24, %xmm0
+; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: sitofp_2vf64_i8:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT:    vmovq %xmm0, %rax
-; AVX-NEXT:    movsbq %al, %rax
-; AVX-NEXT:    vpextrq $1, %xmm0, %rcx
-; AVX-NEXT:    movsbq %cl, %rcx
-; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vcvtsi2sdq %rcx, %xmm0, %xmm0
-; AVX-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm1
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
+; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
   %cvt = sitofp <2 x i8> %shuf to <2 x double>
diff --git a/test/CodeGen/X86/vec_shift8.ll b/test/CodeGen/X86/vec_shift8.ll
deleted file mode 100644
index 9d19f66..0000000
--- a/test/CodeGen/X86/vec_shift8.ll
+++ /dev/null
@@ -1,527 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
-
-;
-; Vectorized integer shifts
-;
-
-define <2 x i64> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind readnone ssp {
-entry:
-; ALL-NOT: shll
-;
-; SSE2:       psllw   $12, %xmm1
-; SSE2-NEXT:  movdqa  %xmm1, %xmm2
-; SSE2-NEXT:  psraw   $15, %xmm2
-; SSE2-NEXT:  movdqa  %xmm2, %xmm3
-; SSE2-NEXT:  pandn   %xmm0, %xmm3
-; SSE2-NEXT:  psllw   $8, %xmm0
-; SSE2-NEXT:  pand    %xmm2, %xmm0
-; SSE2-NEXT:  por     %xmm3, %xmm0
-; SSE2-NEXT:  paddw   %xmm1, %xmm1
-; SSE2-NEXT:  movdqa  %xmm1, %xmm2
-; SSE2-NEXT:  psraw   $15, %xmm2
-; SSE2-NEXT:  movdqa  %xmm2, %xmm3
-; SSE2-NEXT:  pandn   %xmm0, %xmm3
-; SSE2-NEXT:  psllw   $4, %xmm0
-; SSE2-NEXT:  pand    %xmm2, %xmm0
-; SSE2-NEXT:  por     %xmm3, %xmm0
-; SSE2-NEXT:  paddw   %xmm1, %xmm1
-; SSE2-NEXT:  movdqa  %xmm1, %xmm2
-; SSE2-NEXT:  psraw   $15, %xmm2
-; SSE2-NEXT:  movdqa  %xmm2, %xmm3
-; SSE2-NEXT:  pandn   %xmm0, %xmm3
-; SSE2-NEXT:  psllw   $2, %xmm0
-; SSE2-NEXT:  pand    %xmm2, %xmm0
-; SSE2-NEXT:  por     %xmm3, %xmm0
-; SSE2-NEXT:  paddw   %xmm1, %xmm1
-; SSE2-NEXT:  psraw   $15, %xmm1
-; SSE2-NEXT:  movdqa  %xmm1, %xmm2
-; SSE2-NEXT:  pandn   %xmm0, %xmm2
-; SSE2-NEXT:  psllw   $1, %xmm0
-; SSE2-NEXT:  pand    %xmm1, %xmm0
-; SSE2-NEXT:  por     %xmm2, %xmm0
-; SSE2-NEXT:  retq
-;
-; SSE41:      movdqa   %xmm0, %xmm2
-; SSE41-NEXT: movdqa   %xmm1, %xmm0
-; SSE41-NEXT: psllw    $12, %xmm0
-; SSE41-NEXT: psllw    $4, %xmm1
-; SSE41-NEXT: por      %xmm0, %xmm1
-; SSE41-NEXT: movdqa   %xmm1, %xmm3
-; SSE41-NEXT: paddw    %xmm3, %xmm3
-; SSE41-NEXT: movdqa   %xmm2, %xmm4
-; SSE41-NEXT: psllw    $8, %xmm4
-; SSE41-NEXT: movdqa   %xmm1, %xmm0
-; SSE41-NEXT: pblendvb %xmm4, %xmm2
-; SSE41-NEXT: movdqa   %xmm2, %xmm1
-; SSE41-NEXT: psllw    $4, %xmm1
-; SSE41-NEXT: movdqa   %xmm3, %xmm0
-; SSE41-NEXT: pblendvb %xmm1, %xmm2
-; SSE41-NEXT: movdqa   %xmm2, %xmm1
-; SSE41-NEXT: psllw    $2, %xmm1
-; SSE41-NEXT: paddw    %xmm3, %xmm3
-; SSE41-NEXT: movdqa   %xmm3, %xmm0
-; SSE41-NEXT: pblendvb %xmm1, %xmm2
-; SSE41-NEXT: movdqa   %xmm2, %xmm1
-; SSE41-NEXT: psllw    $1, %xmm1
-; SSE41-NEXT: paddw    %xmm3, %xmm3
-; SSE41-NEXT: movdqa   %xmm3, %xmm0
-; SSE41-NEXT: pblendvb %xmm1, %xmm2
-; SSE41-NEXT: movdqa   %xmm2, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX:        vpsllw    $12, %xmm1, %xmm2
-; AVX-NEXT:   vpsllw    $4, %xmm1, %xmm1
-; AVX-NEXT:   vpor      %xmm2, %xmm1, %xmm1
-; AVX-NEXT:   vpaddw    %xmm1, %xmm1, %xmm2
-; AVX-NEXT:   vpsllw    $8, %xmm0, %xmm3
-; AVX-NEXT:   vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; AVX-NEXT:   vpsllw    $4, %xmm0, %xmm1
-; AVX-NEXT:   vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX-NEXT:   vpsllw    $2, %xmm0, %xmm1
-; AVX-NEXT:   vpaddw    %xmm2, %xmm2, %xmm2
-; AVX-NEXT:   vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX-NEXT:   vpsllw    $1, %xmm0, %xmm1
-; AVX-NEXT:   vpaddw    %xmm2, %xmm2, %xmm2
-; AVX-NEXT:   vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX-NEXT:   retq
-  %shl = shl <8 x i16> %r, %a
-  %tmp2 = bitcast <8 x i16> %shl to <2 x i64>
-  ret <2 x i64> %tmp2
-}
-
-define <2 x i64> @shl_16i8(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
-entry:
-; SSE2:       psllw   $5, %xmm1
-; SSE2-NEXT:  pxor    %xmm2, %xmm2
-; SSE2-NEXT:  pxor    %xmm3, %xmm3
-; SSE2-NEXT:  pcmpgtb %xmm1, %xmm3
-; SSE2-NEXT:  movdqa  %xmm3, %xmm4
-; SSE2-NEXT:  pandn   %xmm0, %xmm4
-; SSE2-NEXT:  psllw   $4, %xmm0
-; SSE2-NEXT:  pand    {{.*}}(%rip), %xmm0
-; SSE2-NEXT:  pand    %xmm3, %xmm0
-; SSE2-NEXT:  por     %xmm4, %xmm0
-; SSE2-NEXT:  paddb   %xmm1, %xmm1
-; SSE2-NEXT:  pxor    %xmm3, %xmm3
-; SSE2-NEXT:  pcmpgtb %xmm1, %xmm3
-; SSE2-NEXT:  movdqa  %xmm3, %xmm4
-; SSE2-NEXT:  pandn   %xmm0, %xmm4
-; SSE2-NEXT:  psllw   $2, %xmm0
-; SSE2-NEXT:  pand    {{.*}}(%rip), %xmm0
-; SSE2-NEXT:  pand    %xmm3, %xmm0
-; SSE2-NEXT:  por     %xmm4, %xmm0
-; SSE2-NEXT:  paddb   %xmm1, %xmm1
-; SSE2-NEXT:  pcmpgtb %xmm1, %xmm2
-; SSE2-NEXT:  movdqa  %xmm2, %xmm1
-; SSE2-NEXT:  pandn   %xmm0, %xmm1
-; SSE2-NEXT:  paddb   %xmm0, %xmm0
-; SSE2-NEXT:  pand    %xmm2, %xmm0
-; SSE2-NEXT:  por     %xmm1, %xmm0
-; SSE2-NEXT:  retq
-;
-; SSE41:      movdqa   %xmm0, %xmm2
-; SSE41-NEXT: psllw    $5, %xmm1
-; SSE41-NEXT: movdqa   %xmm2, %xmm3
-; SSE41-NEXT: psllw    $4, %xmm3
-; SSE41-NEXT: pand     {{.*}}(%rip), %xmm3
-; SSE41-NEXT: movdqa   %xmm1, %xmm0
-; SSE41-NEXT: pblendvb %xmm3, %xmm2
-; SSE41-NEXT: movdqa   %xmm2, %xmm3
-; SSE41-NEXT: psllw    $2, %xmm3
-; SSE41-NEXT: pand     {{.*}}(%rip), %xmm3
-; SSE41-NEXT: paddb    %xmm1, %xmm1
-; SSE41-NEXT: movdqa   %xmm1, %xmm0
-; SSE41-NEXT: pblendvb %xmm3, %xmm2
-; SSE41-NEXT: movdqa   %xmm2, %xmm3
-; SSE41-NEXT: paddb    %xmm3, %xmm3
-; SSE41-NEXT: paddb    %xmm1, %xmm1
-; SSE41-NEXT: movdqa   %xmm1, %xmm0
-; SSE41-NEXT: pblendvb %xmm3, %xmm2
-; SSE41-NEXT: movdqa   %xmm2, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX:        vpsllw    $5, %xmm1, %xmm1
-; AVX-NEXT:   vpsllw    $4, %xmm0, %xmm2
-; AVX-NEXT:   vpand     {{.*}}(%rip), %xmm2, %xmm2
-; AVX-NEXT:   vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
-; AVX-NEXT:   vpsllw    $2, %xmm0, %xmm2
-; AVX-NEXT:   vpand     {{.*}}(%rip), %xmm2, %xmm2
-; AVX-NEXT:   vpaddb    %xmm1, %xmm1, %xmm1
-; AVX-NEXT:   vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
-; AVX-NEXT:   vpaddb    %xmm0, %xmm0, %xmm2
-; AVX-NEXT:   vpaddb    %xmm1, %xmm1, %xmm1
-; AVX-NEXT:   vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
-; AVX-NEXT:   retq
-  %shl = shl <16 x i8> %r, %a
-  %tmp2 = bitcast <16 x i8> %shl to <2 x i64>
-  ret <2 x i64> %tmp2
-}
-
-define <2 x i64> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind readnone ssp {
-entry:
-; ALL-NOT: sarw
-;
-; SSE2:       psllw   $12, %xmm1
-; SSE2-NEXT:  movdqa  %xmm1, %xmm2
-; SSE2-NEXT:  psraw   $15, %xmm2
-; SSE2-NEXT:  movdqa  %xmm2, %xmm3
-; SSE2-NEXT:  pandn   %xmm0, %xmm3
-; SSE2-NEXT:  psraw   $8, %xmm0
-; SSE2-NEXT:  pand    %xmm2, %xmm0
-; SSE2-NEXT:  por     %xmm3, %xmm0
-; SSE2-NEXT:  paddw   %xmm1, %xmm1
-; SSE2-NEXT:  movdqa  %xmm1, %xmm2
-; SSE2-NEXT:  psraw   $15, %xmm2
-; SSE2-NEXT:  movdqa  %xmm2, %xmm3
-; SSE2-NEXT:  pandn   %xmm0, %xmm3
-; SSE2-NEXT:  psraw   $4, %xmm0
-; SSE2-NEXT:  pand    %xmm2, %xmm0
-; SSE2-NEXT:  por     %xmm3, %xmm0
-; SSE2-NEXT:  paddw   %xmm1, %xmm1
-; SSE2-NEXT:  movdqa  %xmm1, %xmm2
-; SSE2-NEXT:  psraw   $15, %xmm2
-; SSE2-NEXT:  movdqa  %xmm2, %xmm3
-; SSE2-NEXT:  pandn   %xmm0, %xmm3
-; SSE2-NEXT:  psraw   $2, %xmm0
-; SSE2-NEXT:  pand    %xmm2, %xmm0
-; SSE2-NEXT:  por     %xmm3, %xmm0
-; SSE2-NEXT:  paddw   %xmm1, %xmm1
-; SSE2-NEXT:  psraw   $15, %xmm1
-; SSE2-NEXT:  movdqa  %xmm1, %xmm2
-; SSE2-NEXT:  pandn   %xmm0, %xmm2
-; SSE2-NEXT:  psraw   $1, %xmm0
-; SSE2-NEXT:  pand    %xmm1, %xmm0
-; SSE2-NEXT:  por     %xmm2, %xmm0
-; SSE2-NEXT:  retq
-;
-; SSE41:      movdqa    %xmm0, %xmm2
-; SSE41-NEXT: movdqa    %xmm1, %xmm0
-; SSE41-NEXT: psllw     $12, %xmm0
-; SSE41-NEXT: psllw     $4, %xmm1
-; SSE41-NEXT: por       %xmm0, %xmm1
-; SSE41-NEXT: movdqa    %xmm1, %xmm3
-; SSE41-NEXT: paddw     %xmm3, %xmm3
-; SSE41-NEXT: movdqa    %xmm2, %xmm4
-; SSE41-NEXT: psraw     $8, %xmm4
-; SSE41-NEXT: movdqa    %xmm1, %xmm0
-; SSE41-NEXT: pblendvb  %xmm4, %xmm2
-; SSE41-NEXT: movdqa    %xmm2, %xmm1
-; SSE41-NEXT: psraw     $4, %xmm1
-; SSE41-NEXT: movdqa    %xmm3, %xmm0
-; SSE41-NEXT: pblendvb  %xmm1, %xmm2
-; SSE41-NEXT: movdqa    %xmm2, %xmm1
-; SSE41-NEXT: psraw     $2, %xmm1
-; SSE41-NEXT: paddw     %xmm3, %xmm3
-; SSE41-NEXT: movdqa    %xmm3, %xmm0
-; SSE41-NEXT: pblendvb  %xmm1, %xmm2
-; SSE41-NEXT: movdqa    %xmm2, %xmm1
-; SSE41-NEXT: psraw     $1, %xmm1
-; SSE41-NEXT: paddw     %xmm3, %xmm3
-; SSE41-NEXT: movdqa    %xmm3, %xmm0
-; SSE41-NEXT: pblendvb  %xmm1, %xmm2
-; SSE41-NEXT: movdqa    %xmm2, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX:        vpsllw    $12, %xmm1, %xmm2
-; AVX-NEXT:   vpsllw    $4, %xmm1, %xmm1
-; AVX-NEXT:   vpor      %xmm2, %xmm1, %xmm1
-; AVX-NEXT:   vpaddw    %xmm1, %xmm1, %xmm2
-; AVX-NEXT:   vpsraw    $8, %xmm0, %xmm3
-; AVX-NEXT:   vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; AVX-NEXT:   vpsraw    $4, %xmm0, %xmm1
-; AVX-NEXT:   vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX-NEXT:   vpsraw    $2, %xmm0, %xmm1
-; AVX-NEXT:   vpaddw    %xmm2, %xmm2, %xmm2
-; AVX-NEXT:   vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX-NEXT:   vpsraw    $1, %xmm0, %xmm1
-; AVX-NEXT:   vpaddw    %xmm2, %xmm2, %xmm2
-; AVX-NEXT:   vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX-NEXT:   retq
-  %ashr = ashr <8 x i16> %r, %a
-  %tmp2 = bitcast <8 x i16> %ashr to <2 x i64>
-  ret <2 x i64> %tmp2
-}
-
-define <2 x i64> @ashr_16i8(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
-entry:
-; ALL-NOT: sarb
-;
-; SSE2:       punpckhbw {{.*#}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE2-NEXT:  psllw     $5, %xmm1
-; SSE2-NEXT:  punpckhbw {{.*#}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
-; SSE2-NEXT:  pxor      %xmm3, %xmm3
-; SSE2-NEXT:  pxor      %xmm5, %xmm5
-; SSE2-NEXT:  pcmpgtw   %xmm4, %xmm5
-; SSE2-NEXT:  movdqa    %xmm5, %xmm6
-; SSE2-NEXT:  pandn     %xmm2, %xmm6
-; SSE2-NEXT:  psraw     $4, %xmm2
-; SSE2-NEXT:  pand      %xmm5, %xmm2
-; SSE2-NEXT:  por       %xmm6, %xmm2
-; SSE2-NEXT:  paddw     %xmm4, %xmm4
-; SSE2-NEXT:  pxor      %xmm5, %xmm5
-; SSE2-NEXT:  pcmpgtw   %xmm4, %xmm5
-; SSE2-NEXT:  movdqa    %xmm5, %xmm6
-; SSE2-NEXT:  pandn     %xmm2, %xmm6
-; SSE2-NEXT:  psraw     $2, %xmm2
-; SSE2-NEXT:  pand      %xmm5, %xmm2
-; SSE2-NEXT:  por       %xmm6, %xmm2
-; SSE2-NEXT:  paddw     %xmm4, %xmm4
-; SSE2-NEXT:  pxor      %xmm5, %xmm5
-; SSE2-NEXT:  pcmpgtw   %xmm4, %xmm5
-; SSE2-NEXT:  movdqa    %xmm5, %xmm4
-; SSE2-NEXT:  pandn     %xmm2, %xmm4
-; SSE2-NEXT:  psraw     $1, %xmm2
-; SSE2-NEXT:  pand      %xmm5, %xmm2
-; SSE2-NEXT:  por       %xmm4, %xmm2
-; SSE2-NEXT:  psrlw     $8, %xmm2
-; SSE2-NEXT:  punpcklbw {{.*#}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:  punpcklbw {{.*#}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:  pxor      %xmm4, %xmm4
-; SSE2-NEXT:  pcmpgtw   %xmm1, %xmm4
-; SSE2-NEXT:  movdqa    %xmm4, %xmm5
-; SSE2-NEXT:  pandn     %xmm0, %xmm5
-; SSE2-NEXT:  psraw     $4, %xmm0
-; SSE2-NEXT:  pand      %xmm4, %xmm0
-; SSE2-NEXT:  por       %xmm5, %xmm0
-; SSE2-NEXT:  paddw     %xmm1, %xmm1
-; SSE2-NEXT:  pxor      %xmm4, %xmm4
-; SSE2-NEXT:  pcmpgtw   %xmm1, %xmm4
-; SSE2-NEXT:  movdqa    %xmm4, %xmm5
-; SSE2-NEXT:  pandn     %xmm0, %xmm5
-; SSE2-NEXT:  psraw     $2, %xmm0
-; SSE2-NEXT:  pand      %xmm4, %xmm0
-; SSE2-NEXT:  por       %xmm5, %xmm0
-; SSE2-NEXT:  paddw     %xmm1, %xmm1
-; SSE2-NEXT:  pcmpgtw   %xmm1, %xmm3
-; SSE2-NEXT:  movdqa    %xmm3, %xmm1
-; SSE2-NEXT:  pandn     %xmm0, %xmm1
-; SSE2-NEXT:  psraw     $1, %xmm0
-; SSE2-NEXT:  pand      %xmm3, %xmm0
-; SSE2-NEXT:  por       %xmm1, %xmm0
-; SSE2-NEXT:  psrlw     $8, %xmm0
-; SSE2-NEXT:  packuswb  %xmm2, %xmm0
-; SSE2-NEXT:  retq
-;
-; SSE41:      movdqa    %xmm0, %xmm2
-; SSE41-NEXT: psllw     $5, %xmm1
-; SSE41-NEXT: punpckhbw {{.*#}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; SSE41-NEXT: punpckhbw {{.*#}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
-; SSE41-NEXT: movdqa    %xmm3, %xmm4
-; SSE41-NEXT: psraw     $4, %xmm4
-; SSE41-NEXT: pblendvb  %xmm4, %xmm3
-; SSE41-NEXT: movdqa    %xmm3, %xmm4
-; SSE41-NEXT: psraw     $2, %xmm4
-; SSE41-NEXT: paddw     %xmm0, %xmm0
-; SSE41-NEXT: pblendvb  %xmm4, %xmm3
-; SSE41-NEXT: movdqa    %xmm3, %xmm4
-; SSE41-NEXT: psraw     $1, %xmm4
-; SSE41-NEXT: paddw     %xmm0, %xmm0
-; SSE41-NEXT: pblendvb  %xmm4, %xmm3
-; SSE41-NEXT: psrlw     $8, %xmm3
-; SSE41-NEXT: punpcklbw {{.*#}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE41-NEXT: punpcklbw {{.*#}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE41-NEXT: movdqa    %xmm1, %xmm2
-; SSE41-NEXT: psraw     $4, %xmm2
-; SSE41-NEXT: pblendvb  %xmm2, %xmm1
-; SSE41-NEXT: movdqa    %xmm1, %xmm2
-; SSE41-NEXT: psraw     $2, %xmm2
-; SSE41-NEXT: paddw     %xmm0, %xmm0
-; SSE41-NEXT: pblendvb  %xmm2, %xmm1
-; SSE41-NEXT: movdqa    %xmm1, %xmm2
-; SSE41-NEXT: psraw     $1, %xmm2
-; SSE41-NEXT: paddw     %xmm0, %xmm0
-; SSE41-NEXT: pblendvb  %xmm2, %xmm1
-; SSE41-NEXT: psrlw     $8, %xmm1
-; SSE41-NEXT: packuswb  %xmm3, %xmm1
-; SSE41-NEXT: movdqa    %xmm1, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX:        vpsllw     $5, %xmm1, %xmm1
-; AVX-NEXT:   vpunpckhbw {{.*#}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; AVX-NEXT:   vpunpckhbw {{.*#}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX-NEXT:   vpsraw     $4, %xmm3, %xmm4
-; AVX-NEXT:   vpblendvb  %xmm2, %xmm4, %xmm3, %xmm3
-; AVX-NEXT:   vpsraw     $2, %xmm3, %xmm4
-; AVX-NEXT:   vpaddw     %xmm2, %xmm2, %xmm2
-; AVX-NEXT:   vpblendvb  %xmm2, %xmm4, %xmm3, %xmm3
-; AVX-NEXT:   vpsraw     $1, %xmm3, %xmm4
-; AVX-NEXT:   vpaddw     %xmm2, %xmm2, %xmm2
-; AVX-NEXT:   vpblendvb  %xmm2, %xmm4, %xmm3, %xmm2
-; AVX-NEXT:   vpsrlw     $8, %xmm2, %xmm2
-; AVX-NEXT:   vpunpcklbw {{.*#}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX-NEXT:   vpunpcklbw {{.*#}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX-NEXT:   vpsraw     $4, %xmm0, %xmm3
-; AVX-NEXT:   vpblendvb  %xmm1, %xmm3, %xmm0, %xmm0
-; AVX-NEXT:   vpsraw     $2, %xmm0, %xmm3
-; AVX-NEXT:   vpaddw     %xmm1, %xmm1, %xmm1
-; AVX-NEXT:   vpblendvb  %xmm1, %xmm3, %xmm0, %xmm0
-; AVX-NEXT:   vpsraw     $1, %xmm0, %xmm3
-; AVX-NEXT:   vpaddw     %xmm1, %xmm1, %xmm1
-; AVX-NEXT:   vpblendvb  %xmm1, %xmm3, %xmm0, %xmm0
-; AVX-NEXT:   vpsrlw     $8, %xmm0, %xmm0
-; AVX-NEXT:   vpackuswb  %xmm2, %xmm0, %xmm0
-; AVX-NEXT:   retq
-  %ashr = ashr <16 x i8> %r, %a
-  %tmp2 = bitcast <16 x i8> %ashr to <2 x i64>
-  ret <2 x i64> %tmp2
-}
-
-define <2 x i64> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind readnone ssp {
-entry:
-; ALL-NOT: shrl
-;
-; SSE2:       psllw   $12, %xmm1
-; SSE2-NEXT:  movdqa  %xmm1, %xmm2
-; SSE2-NEXT:  psraw   $15, %xmm2
-; SSE2-NEXT:  movdqa  %xmm2, %xmm3
-; SSE2-NEXT:  pandn   %xmm0, %xmm3
-; SSE2-NEXT:  psrlw   $8, %xmm0
-; SSE2-NEXT:  pand    %xmm2, %xmm0
-; SSE2-NEXT:  por     %xmm3, %xmm0
-; SSE2-NEXT:  paddw   %xmm1, %xmm1
-; SSE2-NEXT:  movdqa  %xmm1, %xmm2
-; SSE2-NEXT:  psraw   $15, %xmm2
-; SSE2-NEXT:  movdqa  %xmm2, %xmm3
-; SSE2-NEXT:  pandn   %xmm0, %xmm3
-; SSE2-NEXT:  psrlw   $4, %xmm0
-; SSE2-NEXT:  pand    %xmm2, %xmm0
-; SSE2-NEXT:  por     %xmm3, %xmm0
-; SSE2-NEXT:  paddw   %xmm1, %xmm1
-; SSE2-NEXT:  movdqa  %xmm1, %xmm2
-; SSE2-NEXT:  psraw   $15, %xmm2
-; SSE2-NEXT:  movdqa  %xmm2, %xmm3
-; SSE2-NEXT:  pandn   %xmm0, %xmm3
-; SSE2-NEXT:  psrlw   $2, %xmm0
-; SSE2-NEXT:  pand    %xmm2, %xmm0
-; SSE2-NEXT:  por     %xmm3, %xmm0
-; SSE2-NEXT:  paddw   %xmm1, %xmm1
-; SSE2-NEXT:  psraw   $15, %xmm1
-; SSE2-NEXT:  movdqa  %xmm1, %xmm2
-; SSE2-NEXT:  pandn   %xmm0, %xmm2
-; SSE2-NEXT:  psrlw   $1, %xmm0
-; SSE2-NEXT:  pand    %xmm1, %xmm0
-; SSE2-NEXT:  por     %xmm2, %xmm0
-; SSE2-NEXT:  retq
-;
-; SSE41:      movdqa    %xmm0, %xmm2
-; SSE41-NEXT: movdqa    %xmm1, %xmm0
-; SSE41-NEXT: psllw     $12, %xmm0
-; SSE41-NEXT: psllw     $4, %xmm1
-; SSE41-NEXT: por       %xmm0, %xmm1
-; SSE41-NEXT: movdqa    %xmm1, %xmm3
-; SSE41-NEXT: paddw     %xmm3, %xmm3
-; SSE41-NEXT: movdqa    %xmm2, %xmm4
-; SSE41-NEXT: psrlw     $8, %xmm4
-; SSE41-NEXT: movdqa    %xmm1, %xmm0
-; SSE41-NEXT: pblendvb  %xmm4, %xmm2
-; SSE41-NEXT: movdqa    %xmm2, %xmm1
-; SSE41-NEXT: psrlw     $4, %xmm1
-; SSE41-NEXT: movdqa    %xmm3, %xmm0
-; SSE41-NEXT: pblendvb  %xmm1, %xmm2
-; SSE41-NEXT: movdqa    %xmm2, %xmm1
-; SSE41-NEXT: psrlw     $2, %xmm1
-; SSE41-NEXT: paddw     %xmm3, %xmm3
-; SSE41-NEXT: movdqa    %xmm3, %xmm0
-; SSE41-NEXT: pblendvb  %xmm1, %xmm2
-; SSE41-NEXT: movdqa    %xmm2, %xmm1
-; SSE41-NEXT: psrlw     $1, %xmm1
-; SSE41-NEXT: paddw     %xmm3, %xmm3
-; SSE41-NEXT: movdqa    %xmm3, %xmm0
-; SSE41-NEXT: pblendvb  %xmm1, %xmm2
-; SSE41-NEXT: movdqa    %xmm2, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX:        vpsllw    $12, %xmm1, %xmm2
-; AVX-NEXT:   vpsllw    $4, %xmm1, %xmm1
-; AVX-NEXT:   vpor      %xmm2, %xmm1, %xmm1
-; AVX-NEXT:   vpaddw    %xmm1, %xmm1, %xmm2
-; AVX-NEXT:   vpsrlw    $8, %xmm0, %xmm3
-; AVX-NEXT:   vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; AVX-NEXT:   vpsrlw    $4, %xmm0, %xmm1
-; AVX-NEXT:   vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX-NEXT:   vpsrlw    $2, %xmm0, %xmm1
-; AVX-NEXT:   vpaddw    %xmm2, %xmm2, %xmm2
-; AVX-NEXT:   vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX-NEXT:   vpsrlw    $1, %xmm0, %xmm1
-; AVX-NEXT:   vpaddw    %xmm2, %xmm2, %xmm2
-; AVX-NEXT:   vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
-; AVX-NEXT:   retq
-  %lshr = lshr <8 x i16> %r, %a
-  %tmp2 = bitcast <8 x i16> %lshr to <2 x i64>
-  ret <2 x i64> %tmp2
-}
-
-define <2 x i64> @lshr_16i8(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
-entry:
-; ALL-NOT: shrb
-;
-; SSE2:       psllw   $5, %xmm1
-; SSE2-NEXT:  pxor    %xmm2, %xmm2
-; SSE2-NEXT:  pxor    %xmm3, %xmm3
-; SSE2-NEXT:  pcmpgtb %xmm1, %xmm3
-; SSE2-NEXT:  movdqa  %xmm3, %xmm4
-; SSE2-NEXT:  pandn   %xmm0, %xmm4
-; SSE2-NEXT:  psrlw   $4, %xmm0
-; SSE2-NEXT:  pand    {{.*}}(%rip), %xmm0
-; SSE2-NEXT:  pand    %xmm3, %xmm0
-; SSE2-NEXT:  por     %xmm4, %xmm0
-; SSE2-NEXT:  paddb   %xmm1, %xmm1
-; SSE2-NEXT:  pxor    %xmm3, %xmm3
-; SSE2-NEXT:  pcmpgtb %xmm1, %xmm3
-; SSE2-NEXT:  movdqa  %xmm3, %xmm4
-; SSE2-NEXT:  pandn   %xmm0, %xmm4
-; SSE2-NEXT:  psrlw   $2, %xmm0
-; SSE2-NEXT:  pand    {{.*}}(%rip), %xmm0
-; SSE2-NEXT:  pand    %xmm3, %xmm0
-; SSE2-NEXT:  por     %xmm4, %xmm0
-; SSE2-NEXT:  paddb   %xmm1, %xmm1
-; SSE2-NEXT:  pcmpgtb %xmm1, %xmm2
-; SSE2-NEXT:  movdqa  %xmm2, %xmm1
-; SSE2-NEXT:  pandn   %xmm0, %xmm1
-; SSE2-NEXT:  psrlw   $1, %xmm0
-; SSE2-NEXT:  pand    {{.*}}(%rip), %xmm0
-; SSE2-NEXT:  pand    %xmm2, %xmm0
-; SSE2-NEXT:  por     %xmm1, %xmm0
-; SSE2-NEXT:  retq
-;
-; SSE41:      movdqa   %xmm0, %xmm2
-; SSE41-NEXT: psllw    $5, %xmm1
-; SSE41-NEXT: movdqa   %xmm2, %xmm3
-; SSE41-NEXT: psrlw    $4, %xmm3
-; SSE41-NEXT: pand     {{.*}}(%rip), %xmm3
-; SSE41-NEXT: movdqa   %xmm1, %xmm0
-; SSE41-NEXT: pblendvb %xmm3, %xmm2
-; SSE41-NEXT: movdqa   %xmm2, %xmm3
-; SSE41-NEXT: psrlw    $2, %xmm3
-; SSE41-NEXT: pand     {{.*}}(%rip), %xmm3
-; SSE41-NEXT: paddb    %xmm1, %xmm1
-; SSE41-NEXT: movdqa   %xmm1, %xmm0
-; SSE41-NEXT: pblendvb %xmm3, %xmm2
-; SSE41-NEXT: movdqa   %xmm2, %xmm3
-; SSE41-NEXT: psrlw    $1, %xmm3
-; SSE41-NEXT: pand     {{.*}}(%rip), %xmm3
-; SSE41-NEXT: paddb    %xmm1, %xmm1
-; SSE41-NEXT: movdqa   %xmm1, %xmm0
-; SSE41-NEXT: pblendvb %xmm3, %xmm2
-; SSE41-NEXT: movdqa   %xmm2, %xmm0
-; SSE41-NEXT: retq
-;
-; AVX:        vpsllw    $5, %xmm1, %xmm1
-; AVX-NEXT:   vpsrlw    $4, %xmm0, %xmm2
-; AVX-NEXT:   vpand     {{.*}}(%rip), %xmm2, %xmm2
-; AVX-NEXT:   vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
-; AVX-NEXT:   vpsrlw    $2, %xmm0, %xmm2
-; AVX-NEXT:   vpand     {{.*}}(%rip), %xmm2, %xmm2
-; AVX-NEXT:   vpaddb    %xmm1, %xmm1, %xmm1
-; AVX-NEXT:   vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
-; AVX-NEXT:   vpsrlw    $1, %xmm0, %xmm2
-; AVX-NEXT:   vpand     {{.*}}(%rip), %xmm2, %xmm2
-; AVX-NEXT:   vpaddb    %xmm1, %xmm1, %xmm1
-; AVX-NEXT:   vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
-; AVX-NEXT:   retq
-  %lshr = lshr <16 x i8> %r, %a
-  %tmp2 = bitcast <16 x i8> %lshr to <2 x i64>
-  ret <2 x i64> %tmp2
-}
diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll
index e6acc7e..aafc05b 100644
--- a/test/CodeGen/X86/vector-sext.ll
+++ b/test/CodeGen/X86/vector-sext.ll
@@ -117,6 +117,46 @@ entry:
   ret <4 x i64>%B
 }
 
+define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_2i8_to_i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    psraw $8, %xmm0
+; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_2i8_to_i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    psraw $8, %xmm0
+; SSSE3-NEXT:    movd %xmm0, %eax
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_2i8_to_i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
+; SSE41-NEXT:    movd %xmm0, %eax
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: sext_2i8_to_i32:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxbw %xmm0, %xmm0
+; AVX-NEXT:    vmovd %xmm0, %eax
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_2i8_to_i32:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41:         pmovsxbw %xmm0, %xmm0
+; X32-SSE41-NEXT:    movd %xmm0, %eax
+; X32-SSE41-NEXT:    popl %edx
+; X32-SSE41-NEXT:    retl
+entry:
+  %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %Ex = sext <2 x i8> %Shuf to <2 x i16>
+  %Bc = bitcast <2 x i16> %Ex to i32
+  ret i32 %Bc
+}
+
 define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
 ; SSE2-LABEL: load_sext_test1:
 ; SSE2:       # BB#0: # %entry
diff --git a/test/CodeGen/X86/vector-shift-ashr-128.ll b/test/CodeGen/X86/vector-shift-ashr-128.ll
new file mode 100644
index 0000000..4fd2f8b
--- /dev/null
+++ b/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -0,0 +1,1041 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+
+;
+; Variable Shifts
+;
+
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: var_shift_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd       %xmm0, %rax
+; SSE2-NEXT:    movd       %xmm1, %rcx
+; SSE2-NEXT:    sarq       %cl, %rax
+; SSE2-NEXT:    movd       %rax, %xmm2
+; SSE2-NEXT:    pshufd     {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movd       %xmm0, %rax
+; SSE2-NEXT:    pshufd     {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movd       %xmm0, %rcx
+; SSE2-NEXT:    sarq       %cl, %rax
+; SSE2-NEXT:    movd       %rax, %xmm0
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE2-NEXT:    movdqa     %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_shift_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pextrq     $1, %xmm0, %rax
+; SSE41-NEXT:    pextrq     $1, %xmm1, %rcx
+; SSE41-NEXT:    sarq       %cl, %rax
+; SSE41-NEXT:    movd       %rax, %xmm2
+; SSE41-NEXT:    movd       %xmm0, %rax
+; SSE41-NEXT:    movd       %xmm1, %rcx
+; SSE41-NEXT:    sarq       %cl, %rax
+; SSE41-NEXT:    movd       %rax, %xmm0
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: var_shift_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpextrq     $1, %xmm0, %rax
+; AVX-NEXT:    vpextrq     $1, %xmm1, %rcx
+; AVX-NEXT:    sarq        %cl, %rax
+; AVX-NEXT:    vmovq       %rax, %xmm2
+; AVX-NEXT:    vmovq       %xmm0, %rax
+; AVX-NEXT:    vmovq       %xmm1, %rcx
+; AVX-NEXT:    sarq        %cl, %rax
+; AVX-NEXT:    vmovq       %rax, %xmm0
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX-NEXT:    retq
+  %shift = ashr <2 x i64> %a, %b
+  ret <2 x i64> %shift
+}
+
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: var_shift_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; SSE2-NEXT:    movd      %xmm2, %eax
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm2 = xmm1[3,1,2,3]
+; SSE2-NEXT:    movd      %xmm2, %ecx
+; SSE2-NEXT:    sarl      %cl, %eax
+; SSE2-NEXT:    movd      %eax, %xmm2
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm3 = xmm0[1,1,2,3]
+; SSE2-NEXT:    movd      %xmm3, %eax
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm3 = xmm1[1,1,2,3]
+; SSE2-NEXT:    movd      %xmm3, %ecx
+; SSE2-NEXT:    sarl      %cl, %eax
+; SSE2-NEXT:    movd      %eax, %xmm3
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE2-NEXT:    movd      %xmm0, %eax
+; SSE2-NEXT:    movd      %xmm1, %ecx
+; SSE2-NEXT:    sarl      %cl, %eax
+; SSE2-NEXT:    movd      %eax, %xmm2
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movd      %xmm0, %eax
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movd      %xmm0, %ecx
+; SSE2-NEXT:    sarl      %cl, %eax
+; SSE2-NEXT:    movd      %eax, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-NEXT:    movdqa     %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_shift_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pextrd $1, %xmm0, %eax
+; SSE41-NEXT:    pextrd $1, %xmm1, %ecx
+; SSE41-NEXT:    sarl   %cl, %eax
+; SSE41-NEXT:    movd   %xmm0, %edx
+; SSE41-NEXT:    movd   %xmm1, %ecx
+; SSE41-NEXT:    sarl   %cl, %edx
+; SSE41-NEXT:    movd   %edx, %xmm2
+; SSE41-NEXT:    pinsrd $1, %eax, %xmm2
+; SSE41-NEXT:    pextrd $2, %xmm0, %eax
+; SSE41-NEXT:    pextrd $2, %xmm1, %ecx
+; SSE41-NEXT:    sarl   %cl, %eax
+; SSE41-NEXT:    pinsrd $2, %eax, %xmm2
+; SSE41-NEXT:    pextrd $3, %xmm0, %eax
+; SSE41-NEXT:    pextrd $3, %xmm1, %ecx
+; SSE41-NEXT:    sarl   %cl, %eax
+; SSE41-NEXT:    pinsrd $3, %eax, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: var_shift_v4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpextrd $1, %xmm0, %eax
+; AVX1-NEXT:    vpextrd $1, %xmm1, %ecx
+; AVX1-NEXT:    sarl    %cl, %eax
+; AVX1-NEXT:    vmovd   %xmm0, %edx
+; AVX1-NEXT:    vmovd   %xmm1, %ecx
+; AVX1-NEXT:    sarl    %cl, %edx
+; AVX1-NEXT:    vmovd   %edx, %xmm2
+; AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
+; AVX1-NEXT:    vpextrd $2, %xmm1, %ecx
+; AVX1-NEXT:    sarl    %cl, %eax
+; AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT:    vpextrd $3, %xmm0, %eax
+; AVX1-NEXT:    vpextrd $3, %xmm1, %ecx
+; AVX1-NEXT:    sarl    %cl, %eax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %shift = ashr <4 x i32> %a, %b
+  ret <4 x i32> %shift
+}
+
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: var_shift_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psllw  $12, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psraw  $15, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pandn  %xmm0, %xmm3
+; SSE2-NEXT:    psraw  $8, %xmm0
+; SSE2-NEXT:    pand   %xmm2, %xmm0
+; SSE2-NEXT:    por    %xmm3, %xmm0
+; SSE2-NEXT:    paddw  %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psraw  $15, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pandn  %xmm0, %xmm3
+; SSE2-NEXT:    psraw  $4, %xmm0
+; SSE2-NEXT:    pand   %xmm2, %xmm0
+; SSE2-NEXT:    por    %xmm3, %xmm0
+; SSE2-NEXT:    paddw  %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psraw  $15, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pandn  %xmm0, %xmm3
+; SSE2-NEXT:    psraw  $2, %xmm0
+; SSE2-NEXT:    pand   %xmm2, %xmm0
+; SSE2-NEXT:    por    %xmm3, %xmm0
+; SSE2-NEXT:    paddw  %xmm1, %xmm1
+; SSE2-NEXT:    psraw  $15, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pandn  %xmm0, %xmm2
+; SSE2-NEXT:    psraw  $1, %xmm0
+; SSE2-NEXT:    pand   %xmm1, %xmm0
+; SSE2-NEXT:    por    %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_shift_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa   %xmm0, %xmm2
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    psllw    $12, %xmm0
+; SSE41-NEXT:    psllw    $4, %xmm1
+; SSE41-NEXT:    por      %xmm0, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm3
+; SSE41-NEXT:    paddw    %xmm3, %xmm3
+; SSE41-NEXT:    movdqa   %xmm2, %xmm4
+; SSE41-NEXT:    psraw    $8, %xmm4
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm4, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm1
+; SSE41-NEXT:    psraw    $4, %xmm1
+; SSE41-NEXT:    movdqa   %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm1
+; SSE41-NEXT:    psraw    $2, %xmm1
+; SSE41-NEXT:    paddw    %xmm3, %xmm3
+; SSE41-NEXT:    movdqa   %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm1
+; SSE41-NEXT:    psraw    $1, %xmm1
+; SSE41-NEXT:    paddw    %xmm3, %xmm3
+; SSE41-NEXT:    movdqa   %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: var_shift_v8i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsllw    $12, %xmm1, %xmm2
+; AVX1-NEXT:    vpsllw    $4, %xmm1, %xmm1
+; AVX1-NEXT:    vpor      %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddw    %xmm1, %xmm1, %xmm2
+; AVX1-NEXT:    vpsraw    $8, %xmm0, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw    $4, %xmm0, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw    $2, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddw    %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw    $1, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddw    %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v8i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    vpsravd   %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb   {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT:    vpermq    {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %shift = ashr <8 x i16> %a, %b
+  ret <8 x i16> %shift
+}
+
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: var_shift_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; SSE2-NEXT:    psllw     $5, %xmm1
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
+; SSE2-NEXT:    pxor      %xmm3, %xmm3
+; SSE2-NEXT:    pxor      %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtw   %xmm4, %xmm5
+; SSE2-NEXT:    movdqa    %xmm5, %xmm6
+; SSE2-NEXT:    pandn     %xmm2, %xmm6
+; SSE2-NEXT:    psraw     $4, %xmm2
+; SSE2-NEXT:    pand      %xmm5, %xmm2
+; SSE2-NEXT:    por       %xmm6, %xmm2
+; SSE2-NEXT:    paddw     %xmm4, %xmm4
+; SSE2-NEXT:    pxor      %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtw   %xmm4, %xmm5
+; SSE2-NEXT:    movdqa    %xmm5, %xmm6
+; SSE2-NEXT:    pandn     %xmm2, %xmm6
+; SSE2-NEXT:    psraw     $2, %xmm2
+; SSE2-NEXT:    pand      %xmm5, %xmm2
+; SSE2-NEXT:    por       %xmm6, %xmm2
+; SSE2-NEXT:    paddw     %xmm4, %xmm4
+; SSE2-NEXT:    pxor      %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtw   %xmm4, %xmm5
+; SSE2-NEXT:    movdqa    %xmm5, %xmm4
+; SSE2-NEXT:    pandn     %xmm2, %xmm4
+; SSE2-NEXT:    psraw     $1, %xmm2
+; SSE2-NEXT:    pand      %xmm5, %xmm2
+; SSE2-NEXT:    por       %xmm4, %xmm2
+; SSE2-NEXT:    psrlw     $8, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pxor      %xmm4, %xmm4
+; SSE2-NEXT:    pcmpgtw   %xmm1, %xmm4
+; SSE2-NEXT:    movdqa    %xmm4, %xmm5
+; SSE2-NEXT:    pandn     %xmm0, %xmm5
+; SSE2-NEXT:    psraw     $4, %xmm0
+; SSE2-NEXT:    pand      %xmm4, %xmm0
+; SSE2-NEXT:    por       %xmm5, %xmm0
+; SSE2-NEXT:    paddw     %xmm1, %xmm1
+; SSE2-NEXT:    pxor      %xmm4, %xmm4
+; SSE2-NEXT:    pcmpgtw   %xmm1, %xmm4
+; SSE2-NEXT:    movdqa    %xmm4, %xmm5
+; SSE2-NEXT:    pandn     %xmm0, %xmm5
+; SSE2-NEXT:    psraw     $2, %xmm0
+; SSE2-NEXT:    pand      %xmm4, %xmm0
+; SSE2-NEXT:    por       %xmm5, %xmm0
+; SSE2-NEXT:    paddw     %xmm1, %xmm1
+; SSE2-NEXT:    pcmpgtw   %xmm1, %xmm3
+; SSE2-NEXT:    movdqa    %xmm3, %xmm1
+; SSE2-NEXT:    pandn     %xmm0, %xmm1
+; SSE2-NEXT:    psraw     $1, %xmm0
+; SSE2-NEXT:    pand      %xmm3, %xmm0
+; SSE2-NEXT:    por       %xmm1, %xmm0
+; SSE2-NEXT:    psrlw     $8, %xmm0
+; SSE2-NEXT:    packuswb  %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_shift_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa    %xmm0, %xmm2
+; SSE41-NEXT:    psllw     $5, %xmm1
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; SSE41-NEXT:    movdqa    %xmm3, %xmm4
+; SSE41-NEXT:    psraw     $4, %xmm4
+; SSE41-NEXT:    pblendvb  %xmm4, %xmm3
+; SSE41-NEXT:    movdqa    %xmm3, %xmm4
+; SSE41-NEXT:    psraw     $2, %xmm4
+; SSE41-NEXT:    paddw     %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb  %xmm4, %xmm3
+; SSE41-NEXT:    movdqa    %xmm3, %xmm4
+; SSE41-NEXT:    psraw     $1, %xmm4
+; SSE41-NEXT:    paddw     %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb  %xmm4, %xmm3
+; SSE41-NEXT:    psrlw     $8, %xmm3
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE41-NEXT:    movdqa    %xmm1, %xmm2
+; SSE41-NEXT:    psraw     $4, %xmm2
+; SSE41-NEXT:    pblendvb  %xmm2, %xmm1
+; SSE41-NEXT:    movdqa    %xmm1, %xmm2
+; SSE41-NEXT:    psraw     $2, %xmm2
+; SSE41-NEXT:    paddw     %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb  %xmm2, %xmm1
+; SSE41-NEXT:    movdqa    %xmm1, %xmm2
+; SSE41-NEXT:    psraw     $1, %xmm2
+; SSE41-NEXT:    paddw     %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb  %xmm2, %xmm1
+; SSE41-NEXT:    psrlw     $8, %xmm1
+; SSE41-NEXT:    packuswb  %xmm3, %xmm1
+; SSE41-NEXT:    movdqa    %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: var_shift_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsllw     $5, %xmm1, %xmm1
+; AVX-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX-NEXT:    vpsraw     $4, %xmm3, %xmm4
+; AVX-NEXT:    vpblendvb  %xmm2, %xmm4, %xmm3, %xmm3
+; AVX-NEXT:    vpsraw     $2, %xmm3, %xmm4
+; AVX-NEXT:    vpaddw     %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendvb  %xmm2, %xmm4, %xmm3, %xmm3
+; AVX-NEXT:    vpsraw     $1, %xmm3, %xmm4
+; AVX-NEXT:    vpaddw     %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendvb  %xmm2, %xmm4, %xmm3, %xmm2
+; AVX-NEXT:    vpsrlw     $8, %xmm2, %xmm2
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX-NEXT:    vpsraw     $4, %xmm0, %xmm3
+; AVX-NEXT:    vpblendvb  %xmm1, %xmm3, %xmm0, %xmm0
+; AVX-NEXT:    vpsraw     $2, %xmm0, %xmm3
+; AVX-NEXT:    vpaddw     %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb  %xmm1, %xmm3, %xmm0, %xmm0
+; AVX-NEXT:    vpsraw     $1, %xmm0, %xmm3
+; AVX-NEXT:    vpaddw     %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb  %xmm1, %xmm3, %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw     $8, %xmm0, %xmm0
+; AVX-NEXT:    vpackuswb  %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = ashr <16 x i8> %a, %b
+  ret <16 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: splatvar_shift_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd     {{.*#+}} xmm2 = xmm1[0,1,0,1]
+; SSE2-NEXT:    movd       %xmm0, %rax
+; SSE2-NEXT:    movd       %xmm2, %rcx
+; SSE2-NEXT:    sarq       %cl, %rax
+; SSE2-NEXT:    movd       %rax, %xmm1
+; SSE2-NEXT:    pshufd     {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movd       %xmm0, %rax
+; SSE2-NEXT:    pshufd     {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE2-NEXT:    movd       %xmm0, %rcx
+; SSE2-NEXT:    sarq       %cl, %rax
+; SSE2-NEXT:    movd       %rax, %xmm0
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT:    movdqa     %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: splatvar_shift_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufd     {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; SSE41-NEXT:    pextrq     $1, %xmm0, %rax
+; SSE41-NEXT:    pextrq     $1, %xmm1, %rcx
+; SSE41-NEXT:    sarq       %cl, %rax
+; SSE41-NEXT:    movd       %rax, %xmm2
+; SSE41-NEXT:    movd       %xmm0, %rax
+; SSE41-NEXT:    movd       %xmm1, %rcx
+; SSE41-NEXT:    sarq       %cl, %rax
+; SSE41-NEXT:    movd       %rax, %xmm0
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: splatvar_shift_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufd     {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; AVX1-NEXT:    vpextrq     $1, %xmm0, %rax
+; AVX1-NEXT:    vpextrq     $1, %xmm1, %rcx
+; AVX1-NEXT:    sarq        %cl, %rax
+; AVX1-NEXT:    vmovq       %rax, %xmm2
+; AVX1-NEXT:    vmovq       %xmm0, %rax
+; AVX1-NEXT:    vmovq       %xmm1, %rcx
+; AVX1-NEXT:    sarq        %cl, %rax
+; AVX1-NEXT:    vmovq       %rax, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT:    vpextrq      $1, %xmm0, %rax
+; AVX2-NEXT:    vpextrq      $1, %xmm1, %rcx
+; AVX2-NEXT:    sarq         %cl, %rax
+; AVX2-NEXT:    vmovq        %rax, %xmm2
+; AVX2-NEXT:    vmovq        %xmm0, %rax
+; AVX2-NEXT:    vmovq        %xmm1, %rcx
+; AVX2-NEXT:    sarq         %cl, %rax
+; AVX2-NEXT:    vmovq        %rax, %xmm0
+; AVX2-NEXT:    vpunpcklqdq  {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT:    retq
+  %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
+  %shift = ashr <2 x i64> %a, %splat
+  ret <2 x i64> %shift
+}
+
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: splatvar_shift_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm2, %xmm2
+; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; SSE2-NEXT:    psrad %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: splatvar_shift_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; SSE41-NEXT:    psrad %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: splatvar_shift_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shift = ashr <4 x i32> %a, %splat
+  ret <4 x i32> %shift
+}
+
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: splatvar_shift_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd   %xmm1, %eax
+; SSE2-NEXT:    movzwl %ax, %eax
+; SSE2-NEXT:    movd   %eax, %xmm1
+; SSE2-NEXT:    psraw  %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: splatvar_shift_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; SSE41-NEXT:    psraw %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: splatvar_shift_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+  %shift = ashr <8 x i16> %a, %splat
+  ret <8 x i16> %shift
+}
+
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: splatvar_shift_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; SSE2-NEXT:    pshuflw   {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw   {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; SSE2-NEXT:    psllw     $5, %xmm3
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15]
+; SSE2-NEXT:    pxor      %xmm2, %xmm2
+; SSE2-NEXT:    pxor      %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtw   %xmm4, %xmm5
+; SSE2-NEXT:    movdqa    %xmm5, %xmm6
+; SSE2-NEXT:    pandn     %xmm1, %xmm6
+; SSE2-NEXT:    psraw     $4, %xmm1
+; SSE2-NEXT:    pand      %xmm5, %xmm1
+; SSE2-NEXT:    por       %xmm6, %xmm1
+; SSE2-NEXT:    paddw     %xmm4, %xmm4
+; SSE2-NEXT:    pxor      %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtw   %xmm4, %xmm5
+; SSE2-NEXT:    movdqa    %xmm5, %xmm6
+; SSE2-NEXT:    pandn     %xmm1, %xmm6
+; SSE2-NEXT:    psraw     $2, %xmm1
+; SSE2-NEXT:    pand      %xmm5, %xmm1
+; SSE2-NEXT:    por       %xmm6, %xmm1
+; SSE2-NEXT:    paddw     %xmm4, %xmm4
+; SSE2-NEXT:    pxor      %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtw   %xmm4, %xmm5
+; SSE2-NEXT:    movdqa    %xmm5, %xmm4
+; SSE2-NEXT:    pandn     %xmm1, %xmm4
+; SSE2-NEXT:    psraw     $1, %xmm1
+; SSE2-NEXT:    pand      %xmm5, %xmm1
+; SSE2-NEXT:    por       %xmm4, %xmm1
+; SSE2-NEXT:    psrlw     $8, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}}  xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklbw {{.*#+}}  xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pxor      %xmm4, %xmm4
+; SSE2-NEXT:    pcmpgtw   %xmm3, %xmm4
+; SSE2-NEXT:    movdqa    %xmm4, %xmm5
+; SSE2-NEXT:    pandn     %xmm0, %xmm5
+; SSE2-NEXT:    psraw     $4, %xmm0
+; SSE2-NEXT:    pand      %xmm4, %xmm0
+; SSE2-NEXT:    por       %xmm5, %xmm0
+; SSE2-NEXT:    paddw     %xmm3, %xmm3
+; SSE2-NEXT:    pxor      %xmm4, %xmm4
+; SSE2-NEXT:    pcmpgtw   %xmm3, %xmm4
+; SSE2-NEXT:    movdqa    %xmm4, %xmm5
+; SSE2-NEXT:    pandn     %xmm0, %xmm5
+; SSE2-NEXT:    psraw     $2, %xmm0
+; SSE2-NEXT:    pand      %xmm4, %xmm0
+; SSE2-NEXT:    por       %xmm5, %xmm0
+; SSE2-NEXT:    paddw     %xmm3, %xmm3
+; SSE2-NEXT:    pcmpgtw   %xmm3, %xmm2
+; SSE2-NEXT:    movdqa    %xmm2, %xmm3
+; SSE2-NEXT:    pandn     %xmm0, %xmm3
+; SSE2-NEXT:    psraw     $1, %xmm0
+; SSE2-NEXT:    pand      %xmm2, %xmm0
+; SSE2-NEXT:    por       %xmm3, %xmm0
+; SSE2-NEXT:    psrlw     $8, %xmm0
+; SSE2-NEXT:    packuswb  %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: splatvar_shift_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa      %xmm0, %xmm2
+; SSE41-NEXT:    pxor        %xmm0, %xmm0
+; SSE41-NEXT:    pshufb      %xmm0, %xmm1
+; SSE41-NEXT:    psllw       $5, %xmm1
+; SSE41-NEXT:    punpckhbw   {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; SSE41-NEXT:    punpckhbw   {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; SSE41-NEXT:    movdqa      %xmm3, %xmm4
+; SSE41-NEXT:    psraw       $4, %xmm4
+; SSE41-NEXT:    pblendvb    %xmm4, %xmm3
+; SSE41-NEXT:    movdqa      %xmm3, %xmm4
+; SSE41-NEXT:    psraw       $2, %xmm4
+; SSE41-NEXT:    paddw       %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb    %xmm4, %xmm3
+; SSE41-NEXT:    movdqa      %xmm3, %xmm4
+; SSE41-NEXT:    psraw       $1, %xmm4
+; SSE41-NEXT:    paddw       %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb    %xmm4, %xmm3
+; SSE41-NEXT:    psrlw       $8, %xmm3
+; SSE41-NEXT:    punpcklbw   {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE41-NEXT:    punpcklbw   {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE41-NEXT:    movdqa      %xmm1, %xmm2
+; SSE41-NEXT:    psraw       $4, %xmm2
+; SSE41-NEXT:    pblendvb    %xmm2, %xmm1
+; SSE41-NEXT:    movdqa      %xmm1, %xmm2
+; SSE41-NEXT:    psraw       $2, %xmm2
+; SSE41-NEXT:    paddw       %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb    %xmm2, %xmm1
+; SSE41-NEXT:    movdqa      %xmm1, %xmm2
+; SSE41-NEXT:    psraw       $1, %xmm2
+; SSE41-NEXT:    paddw       %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb    %xmm2, %xmm1
+; SSE41-NEXT:    psrlw       $8, %xmm1
+; SSE41-NEXT:    packuswb    %xmm3, %xmm1
+; SSE41-NEXT:    movdqa      %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: splatvar_shift_v16i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor      %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb    %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllw     $5, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX1-NEXT:    vpsraw     $4, %xmm3, %xmm4
+; AVX1-NEXT:    vpblendvb  %xmm2, %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpsraw     $2, %xmm3, %xmm4
+; AVX1-NEXT:    vpaddw     %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb  %xmm2, %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpsraw     $1, %xmm3, %xmm4
+; AVX1-NEXT:    vpaddw     %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb  %xmm2, %xmm4, %xmm3, %xmm2
+; AVX1-NEXT:    vpsrlw     $8, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpsraw     $4, %xmm0, %xmm3
+; AVX1-NEXT:    vpblendvb  %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw     $2, %xmm0, %xmm3
+; AVX1-NEXT:    vpaddw     %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb  %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw     $1, %xmm0, %xmm3
+; AVX1-NEXT:    vpaddw     %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb  %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw     $8, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb  %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v16i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
+; AVX2-NEXT:    vpsllw       $5, %xmm1, %xmm1
+; AVX2-NEXT:    vpunpckhbw   {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX2-NEXT:    vpunpckhbw   {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpsraw       $4, %xmm3, %xmm4
+; AVX2-NEXT:    vpblendvb    %xmm2, %xmm4, %xmm3, %xmm3
+; AVX2-NEXT:    vpsraw       $2, %xmm3, %xmm4
+; AVX2-NEXT:    vpaddw       %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vpblendvb    %xmm2, %xmm4, %xmm3, %xmm3
+; AVX2-NEXT:    vpsraw       $1, %xmm3, %xmm4
+; AVX2-NEXT:    vpaddw       %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vpblendvb    %xmm2, %xmm4, %xmm3, %xmm2
+; AVX2-NEXT:    vpsrlw       $8, %xmm2, %xmm2
+; AVX2-NEXT:    vpunpcklbw   {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX2-NEXT:    vpunpcklbw   {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX2-NEXT:    vpsraw       $4, %xmm0, %xmm3
+; AVX2-NEXT:    vpblendvb    %xmm1, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    vpsraw       $2, %xmm0, %xmm3
+; AVX2-NEXT:    vpaddw       %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpblendvb    %xmm1, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    vpsraw       $1, %xmm0, %xmm3
+; AVX2-NEXT:    vpaddw       %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpblendvb    %xmm1, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrlw       $8, %xmm0, %xmm0
+; AVX2-NEXT:    vpackuswb    %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+  %shift = ashr <16 x i8> %a, %splat
+  ret <16 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+; SSE2-LABEL: constant_shift_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd       %xmm0, %rax
+; SSE2-NEXT:    sarq       %rax
+; SSE2-NEXT:    movd       %rax, %xmm1
+; SSE2-NEXT:    pshufd     {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movd       %xmm0, %rax
+; SSE2-NEXT:    sarq       $7, %rax
+; SSE2-NEXT:    movd       %rax, %xmm0
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT:    movdqa     %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_shift_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pextrq     $1, %xmm0, %rax
+; SSE41-NEXT:    sarq       $7, %rax
+; SSE41-NEXT:    movd       %rax, %xmm1
+; SSE41-NEXT:    movd       %xmm0, %rax
+; SSE41-NEXT:    sarq       %rax
+; SSE41-NEXT:    movd       %rax, %xmm0
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: constant_shift_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpextrq     $1, %xmm0, %rax
+; AVX-NEXT:    sarq        $7, %rax
+; AVX-NEXT:    vmovq       %rax, %xmm1
+; AVX-NEXT:    vmovq       %xmm0, %rax
+; AVX-NEXT:    sarq        %rax
+; AVX-NEXT:    vmovq       %rax, %xmm0
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %shift = ashr <2 x i64> %a, <i64 1, i64 7>
+  ret <2 x i64> %shift
+}
+
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+; SSE2-LABEL: constant_shift_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSE2-NEXT:    movd      %xmm1, %eax
+; SSE2-NEXT:    sarl      $7, %eax
+; SSE2-NEXT:    movd      %eax, %xmm1
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSE2-NEXT:    movd      %xmm2, %eax
+; SSE2-NEXT:    sarl      $5, %eax
+; SSE2-NEXT:    movd      %eax, %xmm2
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT:    movd      %xmm0, %eax
+; SSE2-NEXT:    sarl      $4, %eax
+; SSE2-NEXT:    movd      %eax, %xmm1
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movd      %xmm0, %eax
+; SSE2-NEXT:    sarl      $6, %eax
+; SSE2-NEXT:    movd      %eax, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT:    movdqa    %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_shift_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pextrd $1, %xmm0, %eax
+; SSE41-NEXT:    sarl   $5, %eax
+; SSE41-NEXT:    movd   %xmm0, %ecx
+; SSE41-NEXT:    sarl   $4, %ecx
+; SSE41-NEXT:    movd   %ecx, %xmm1
+; SSE41-NEXT:    pinsrd $1, %eax, %xmm1
+; SSE41-NEXT:    pextrd $2, %xmm0, %eax
+; SSE41-NEXT:    sarl   $6, %eax
+; SSE41-NEXT:    pinsrd $2, %eax, %xmm1
+; SSE41-NEXT:    pextrd $3, %xmm0, %eax
+; SSE41-NEXT:    sarl   $7, %eax
+; SSE41-NEXT:    pinsrd $3, %eax, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: constant_shift_v4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpextrd $1, %xmm0, %eax
+; AVX1-NEXT:    sarl    $5, %eax
+; AVX1-NEXT:    vmovd   %xmm0, %ecx
+; AVX1-NEXT:    sarl    $4, %ecx
+; AVX1-NEXT:    vmovd   %ecx, %xmm1
+; AVX1-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
+; AVX1-NEXT:    sarl    $6, %eax
+; AVX1-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
+; AVX1-NEXT:    vpextrd $3, %xmm0, %eax
+; AVX1-NEXT:    sarl    $7, %eax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %shift = ashr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i32> %shift
+}
+
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+; SSE2-LABEL: constant_shift_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa    %xmm0, %xmm1
+; SSE2-NEXT:    psraw     $4, %xmm1
+; SSE2-NEXT:    movsd     {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm2 = xmm1[0,2,2,3]
+; SSE2-NEXT:    psraw     $2, %xmm1
+; SSE2-NEXT:    pshufd    {{.*#+}}  xmm0 = xmm1[1,3,2,3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT:    movdqa    {{.*#+}} xmm0 = [65535,0,65535,0,65535,0,65535,0]
+; SSE2-NEXT:    movdqa    %xmm2, %xmm1
+; SSE2-NEXT:    pand      %xmm0, %xmm1
+; SSE2-NEXT:    psraw     $1, %xmm2
+; SSE2-NEXT:    pandn     %xmm2, %xmm0
+; SSE2-NEXT:    por       %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_shift_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa   %xmm0, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    psraw    $8, %xmm2
+; SSE41-NEXT:    movaps   {{.*#+}} xmm0 = [0,4112,8224,12336,16448,20560,24672,28784]
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    psraw    $4, %xmm2
+; SSE41-NEXT:    movaps   {{.*#+}} xmm0 = [0,8224,16448,24672,32896,41120,49344,57568]
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    psraw    $2, %xmm2
+; SSE41-NEXT:    movaps   {{.*#+}} xmm0 = [0,16448,32896,49344,256,16704,33152,49600]
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    psraw    $1, %xmm2
+; SSE41-NEXT:    movaps   {{.*#+}} xmm0 = [0,32896,256,33152,512,33408,768,33664]
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: constant_shift_v8i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsraw    $8, %xmm0, %xmm1
+; AVX1-NEXT:    vmovdqa   {{.*}}(%rip), %xmm2  # xmm2 = [0,4112,8224,12336,16448,20560,24672,28784]
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw    $4, %xmm0, %xmm1
+; AVX1-NEXT:    vmovdqa   {{.*}}(%rip), %xmm2  # xmm2 = [0,8224,16448,24672,32896,41120,49344,57568]
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw    $2, %xmm0, %xmm1
+; AVX1-NEXT:    vmovdqa   {{.*}}(%rip), %xmm2  # xmm2 = [0,16448,32896,49344,256,16704,33152,49600]
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw    $1, %xmm0, %xmm1
+; AVX1-NEXT:    vmovdqa   {{.*}}(%rip), %xmm2  # xmm2 = [0,32896,256,33152,512,33408,768,33664]
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v8i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX2-NEXT:    vpsravd   %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb   {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT:    vpermq    {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  ret <8 x i16> %shift
+}
+
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+; SSE2-LABEL: constant_shift_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; SSE2-NEXT:    movdqa    {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; SSE2-NEXT:    psllw     $5, %xmm3
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15]
+; SSE2-NEXT:    pxor      %xmm2, %xmm2
+; SSE2-NEXT:    pxor      %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtw   %xmm4, %xmm5
+; SSE2-NEXT:    movdqa    %xmm5, %xmm6
+; SSE2-NEXT:    pandn     %xmm1, %xmm6
+; SSE2-NEXT:    psraw     $4, %xmm1
+; SSE2-NEXT:    pand      %xmm5, %xmm1
+; SSE2-NEXT:    por       %xmm6, %xmm1
+; SSE2-NEXT:    paddw     %xmm4, %xmm4
+; SSE2-NEXT:    pxor      %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtw   %xmm4, %xmm5
+; SSE2-NEXT:    movdqa    %xmm5, %xmm6
+; SSE2-NEXT:    pandn     %xmm1, %xmm6
+; SSE2-NEXT:    psraw     $2, %xmm1
+; SSE2-NEXT:    pand      %xmm5, %xmm1
+; SSE2-NEXT:    por       %xmm6, %xmm1
+; SSE2-NEXT:    paddw     %xmm4, %xmm4
+; SSE2-NEXT:    pxor      %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtw   %xmm4, %xmm5
+; SSE2-NEXT:    movdqa    %xmm5, %xmm4
+; SSE2-NEXT:    pandn     %xmm1, %xmm4
+; SSE2-NEXT:    psraw     $1, %xmm1
+; SSE2-NEXT:    pand      %xmm5, %xmm1
+; SSE2-NEXT:    por       %xmm4, %xmm1
+; SSE2-NEXT:    psrlw     $8, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pxor      %xmm4, %xmm4
+; SSE2-NEXT:    pcmpgtw   %xmm3, %xmm4
+; SSE2-NEXT:    movdqa    %xmm4, %xmm5
+; SSE2-NEXT:    pandn     %xmm0, %xmm5
+; SSE2-NEXT:    psraw     $4, %xmm0
+; SSE2-NEXT:    pand      %xmm4, %xmm0
+; SSE2-NEXT:    por       %xmm5, %xmm0
+; SSE2-NEXT:    paddw     %xmm3, %xmm3
+; SSE2-NEXT:    pxor      %xmm4, %xmm4
+; SSE2-NEXT:    pcmpgtw   %xmm3, %xmm4
+; SSE2-NEXT:    movdqa    %xmm4, %xmm5
+; SSE2-NEXT:    pandn     %xmm0, %xmm5
+; SSE2-NEXT:    psraw     $2, %xmm0
+; SSE2-NEXT:    pand      %xmm4, %xmm0
+; SSE2-NEXT:    por       %xmm5, %xmm0
+; SSE2-NEXT:    paddw     %xmm3, %xmm3
+; SSE2-NEXT:    pcmpgtw   %xmm3, %xmm2
+; SSE2-NEXT:    movdqa    %xmm2, %xmm3
+; SSE2-NEXT:    pandn     %xmm0, %xmm3
+; SSE2-NEXT:    psraw     $1, %xmm0
+; SSE2-NEXT:    pand      %xmm2, %xmm0
+; SSE2-NEXT:    por       %xmm3, %xmm0
+; SSE2-NEXT:    psrlw     $8, %xmm0
+; SSE2-NEXT:    packuswb  %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_shift_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa    %xmm0, %xmm1
+; SSE41-NEXT:    movdqa    {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; SSE41-NEXT:    psllw     $5, %xmm3
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15]
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; SSE41-NEXT:    movdqa    %xmm2, %xmm4
+; SSE41-NEXT:    psraw     $4, %xmm4
+; SSE41-NEXT:    pblendvb  %xmm4, %xmm2
+; SSE41-NEXT:    movdqa    %xmm2, %xmm4
+; SSE41-NEXT:    psraw     $2, %xmm4
+; SSE41-NEXT:    paddw     %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb  %xmm4, %xmm2
+; SSE41-NEXT:    movdqa    %xmm2, %xmm4
+; SSE41-NEXT:    psraw     $1, %xmm4
+; SSE41-NEXT:    paddw     %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb  %xmm4, %xmm2
+; SSE41-NEXT:    psrlw     $8, %xmm2
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    movdqa    %xmm1, %xmm3
+; SSE41-NEXT:    psraw     $4, %xmm3
+; SSE41-NEXT:    pblendvb  %xmm3, %xmm1
+; SSE41-NEXT:    movdqa    %xmm1, %xmm3
+; SSE41-NEXT:    psraw     $2, %xmm3
+; SSE41-NEXT:    paddw     %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb  %xmm3, %xmm1
+; SSE41-NEXT:    movdqa    %xmm1, %xmm3
+; SSE41-NEXT:    psraw     $1, %xmm3
+; SSE41-NEXT:    paddw     %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb  %xmm3, %xmm1
+; SSE41-NEXT:    psrlw     $8, %xmm1
+; SSE41-NEXT:    packuswb  %xmm2, %xmm1
+; SSE41-NEXT:    movdqa    %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: constant_shift_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa    {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX-NEXT:    vpsllw     $5, %xmm1, %xmm1
+; AVX-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX-NEXT:    vpsraw     $4, %xmm3, %xmm4
+; AVX-NEXT:    vpblendvb  %xmm2, %xmm4, %xmm3, %xmm3
+; AVX-NEXT:    vpsraw     $2, %xmm3, %xmm4
+; AVX-NEXT:    vpaddw     %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendvb  %xmm2, %xmm4, %xmm3, %xmm3
+; AVX-NEXT:    vpsraw     $1, %xmm3, %xmm4
+; AVX-NEXT:    vpaddw     %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendvb  %xmm2, %xmm4, %xmm3, %xmm2
+; AVX-NEXT:    vpsrlw     $8, %xmm2, %xmm2
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX-NEXT:    vpsraw     $4, %xmm0, %xmm3
+; AVX-NEXT:    vpblendvb  %xmm1, %xmm3, %xmm0, %xmm0
+; AVX-NEXT:    vpsraw     $2, %xmm0, %xmm3
+; AVX-NEXT:    vpaddw     %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb  %xmm1, %xmm3, %xmm0, %xmm0
+; AVX-NEXT:    vpsraw     $1, %xmm0, %xmm3
+; AVX-NEXT:    vpaddw     %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb  %xmm1, %xmm3, %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw     $8, %xmm0, %xmm0
+; AVX-NEXT:    vpackuswb  %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <16 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+; SSE2-LABEL: splatconstant_shift_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd       %xmm0, %rax
+; SSE2-NEXT:    sarq       $7, %rax
+; SSE2-NEXT:    movd       %rax, %xmm1
+; SSE2-NEXT:    pshufd     {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movd       %xmm0, %rax
+; SSE2-NEXT:    sarq       $7, %rax
+; SSE2-NEXT:    movd       %rax, %xmm0
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT:    movdqa     %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: splatconstant_shift_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pextrq     $1, %xmm0, %rax
+; SSE41-NEXT:    sarq       $7, %rax
+; SSE41-NEXT:    movd       %rax, %xmm1
+; SSE41-NEXT:    movd       %xmm0, %rax
+; SSE41-NEXT:    sarq       $7, %rax
+; SSE41-NEXT:    movd       %rax, %xmm0
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_shift_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpextrq     $1, %xmm0, %rax
+; AVX-NEXT:    sarq        $7, %rax
+; AVX-NEXT:    vmovq       %rax, %xmm1
+; AVX-NEXT:    vmovq       %xmm0, %rax
+; AVX-NEXT:    sarq        $7, %rax
+; AVX-NEXT:    vmovq       %rax, %xmm0
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+  ret <2 x i64> %shift
+}
+
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+; SSE-LABEL: splatconstant_shift_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    psrad $5, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_shift_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsrad $5, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+  ret <4 x i32> %shift
+}
+
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+; SSE-LABEL: splatconstant_shift_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    psraw $3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_shift_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsraw $3, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %shift
+}
+
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+; SSE-LABEL: splatconstant_shift_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    psrlw  $3, %xmm0
+; SSE-NEXT:    pand   {{.*}}(%rip), %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; SSE-NEXT:    pxor   %xmm1, %xmm0
+; SSE-NEXT:    psubb  %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_shift_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsrlw  $3, %xmm0, %xmm0
+; AVX-NEXT:    vpand   {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX-NEXT:    vpxor   %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubb  %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %shift
+}
diff --git a/test/CodeGen/X86/vector-shift-ashr-256.ll b/test/CodeGen/X86/vector-shift-ashr-256.ll
new file mode 100644
index 0000000..3fc377a
--- /dev/null
+++ b/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -0,0 +1,767 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+
+;
+; Variable Shifts
+;
+
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: var_shift_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrq $1, %xmm2, %rax
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpextrq $1, %xmm3, %rcx
+; AVX1-NEXT:    sarq %cl, %rax
+; AVX1-NEXT:    vmovq %rax, %xmm4
+; AVX1-NEXT:    vmovq %xmm2, %rax
+; AVX1-NEXT:    vmovq %xmm3, %rcx
+; AVX1-NEXT:    sarq %cl, %rax
+; AVX1-NEXT:    vmovq %rax, %xmm2
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
+; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX1-NEXT:    vpextrq $1, %xmm1, %rcx
+; AVX1-NEXT:    sarq %cl, %rax
+; AVX1-NEXT:    vmovq %rax, %xmm3
+; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    vmovq %xmm1, %rcx
+; AVX1-NEXT:    sarq %cl, %rax
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpextrq $1, %xmm2, %rax
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT:    vpextrq $1, %xmm3, %rcx
+; AVX2-NEXT:    sarq %cl, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm4
+; AVX2-NEXT:    vmovq %xmm2, %rax
+; AVX2-NEXT:    vmovq %xmm3, %rcx
+; AVX2-NEXT:    sarq %cl, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm2
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
+; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX2-NEXT:    vpextrq $1, %xmm1, %rcx
+; AVX2-NEXT:    sarq %cl, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm3
+; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    vmovq %xmm1, %rcx
+; AVX2-NEXT:    sarq %cl, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm0
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = ashr <4 x i64> %a, %b
+  ret <4 x i64> %shift
+}
+
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: var_shift_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrd $1, %xmm2, %eax
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpextrd $1, %xmm3, %ecx
+; AVX1-NEXT:    sarl %cl, %eax
+; AVX1-NEXT:    vmovd %xmm2, %edx
+; AVX1-NEXT:    vmovd %xmm3, %ecx
+; AVX1-NEXT:    sarl %cl, %edx
+; AVX1-NEXT:    vmovd %edx, %xmm4
+; AVX1-NEXT:    vpinsrd $1, %eax, %xmm4, %xmm4
+; AVX1-NEXT:    vpextrd $2, %xmm2, %eax
+; AVX1-NEXT:    vpextrd $2, %xmm3, %ecx
+; AVX1-NEXT:    sarl %cl, %eax
+; AVX1-NEXT:    vpinsrd $2, %eax, %xmm4, %xmm4
+; AVX1-NEXT:    vpextrd $3, %xmm2, %eax
+; AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
+; AVX1-NEXT:    sarl %cl, %eax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm4, %xmm2
+; AVX1-NEXT:    vpextrd $1, %xmm0, %eax
+; AVX1-NEXT:    vpextrd $1, %xmm1, %ecx
+; AVX1-NEXT:    sarl %cl, %eax
+; AVX1-NEXT:    vmovd %xmm0, %edx
+; AVX1-NEXT:    vmovd %xmm1, %ecx
+; AVX1-NEXT:    sarl %cl, %edx
+; AVX1-NEXT:    vmovd %edx, %xmm3
+; AVX1-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
+; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
+; AVX1-NEXT:    vpextrd $2, %xmm1, %ecx
+; AVX1-NEXT:    sarl %cl, %eax
+; AVX1-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
+; AVX1-NEXT:    vpextrd $3, %xmm0, %eax
+; AVX1-NEXT:    vpextrd $3, %xmm1, %ecx
+; AVX1-NEXT:    sarl %cl, %eax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = ashr <8 x i32> %a, %b
+  ret <8 x i32> %shift
+}
+
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: var_shift_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpsllw $12, %xmm2, %xmm3
+; AVX1-NEXT:    vpsllw $4, %xmm2, %xmm2
+; AVX1-NEXT:    vpor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpsraw $8, %xmm4, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
+; AVX1-NEXT:    vpsraw $4, %xmm2, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsraw $2, %xmm2, %xmm4
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsraw $1, %xmm2, %xmm4
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $12, %xmm1, %xmm3
+; AVX1-NEXT:    vpsllw $4, %xmm1, %xmm1
+; AVX1-NEXT:    vpor %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm3
+; AVX1-NEXT:    vpsraw $8, %xmm0, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw $4, %xmm0, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw $2, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw $1, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpsravd %ymm3, %ymm4, %ymm3
+; AVX2-NEXT:    vpsrld $16, %ymm3, %ymm3
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
+; AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = ashr <16 x i16> %a, %b
+  ret <16 x i16> %shift
+}
+
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: var_shift_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpsllw $5, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8],xmm4[8],xmm0[9],xmm4[9],xmm0[10],xmm4[10],xmm0[11],xmm4[11],xmm0[12],xmm4[12],xmm0[13],xmm4[13],xmm0[14],xmm4[14],xmm0[15],xmm4[15]
+; AVX1-NEXT:    vpsraw $4, %xmm5, %xmm6
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm6, %xmm5, %xmm5
+; AVX1-NEXT:    vpsraw $2, %xmm5, %xmm6
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm6, %xmm5, %xmm5
+; AVX1-NEXT:    vpsraw $1, %xmm5, %xmm6
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm6, %xmm5, %xmm3
+; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm4 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; AVX1-NEXT:    vpsraw $4, %xmm4, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpsraw $2, %xmm4, %xmm5
+; AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpsraw $1, %xmm4, %xmm5
+; AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
+; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX1-NEXT:    vpsraw $4, %xmm4, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpsraw $2, %xmm4, %xmm5
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpsraw $1, %xmm4, %xmm5
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm5, %xmm4, %xmm3
+; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpsraw $4, %xmm0, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw $2, %xmm0, %xmm4
+; AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw $1, %xmm0, %xmm4
+; AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX2-NEXT:    vpsraw $4, %ymm3, %ymm4
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX2-NEXT:    vpsraw $2, %ymm3, %ymm4
+; AVX2-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX2-NEXT:    vpsraw $1, %ymm3, %ymm4
+; AVX2-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX2-NEXT:    vpsraw $4, %ymm0, %ymm3
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsraw $2, %ymm0, %ymm3
+; AVX2-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsraw $1, %ymm0, %ymm3
+; AVX2-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = ashr <32 x i8> %a, %b
+  ret <32 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: splatvar_shift_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrq $1, %xmm2, %rdx
+; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX1-NEXT:    movb %al, %cl
+; AVX1-NEXT:    sarq %cl, %rdx
+; AVX1-NEXT:    vmovq %rdx, %xmm3
+; AVX1-NEXT:    vmovq %xmm2, %rsi
+; AVX1-NEXT:    vmovq %xmm1, %rdx
+; AVX1-NEXT:    movb %dl, %cl
+; AVX1-NEXT:    sarq %cl, %rsi
+; AVX1-NEXT:    vmovq %rsi, %xmm1
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; AVX1-NEXT:    vpextrq $1, %xmm0, %rsi
+; AVX1-NEXT:    movb %al, %cl
+; AVX1-NEXT:    sarq %cl, %rsi
+; AVX1-NEXT:    vmovq %rsi, %xmm2
+; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    movb %dl, %cl
+; AVX1-NEXT:    sarq %cl, %rax
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpextrq $1, %xmm2, %rax
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT:    vpextrq $1, %xmm3, %rcx
+; AVX2-NEXT:    sarq %cl, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm4
+; AVX2-NEXT:    vmovq %xmm2, %rax
+; AVX2-NEXT:    vmovq %xmm3, %rcx
+; AVX2-NEXT:    sarq %cl, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm2
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
+; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX2-NEXT:    vpextrq $1, %xmm1, %rcx
+; AVX2-NEXT:    sarq %cl, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm3
+; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    vmovq %xmm1, %rcx
+; AVX2-NEXT:    sarq %cl, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm0
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
+  %shift = ashr <4 x i64> %a, %splat
+  ret <4 x i64> %shift
+}
+
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: splatvar_shift_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsrad %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX2-NEXT:    vpsrad %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+  %shift = ashr <8 x i32> %a, %splat
+  ret <8 x i32> %shift
+}
+
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: splatvar_shift_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovd %xmm1, %eax
+; AVX1-NEXT:    movzwl %ax, %eax
+; AVX1-NEXT:    vmovd %eax, %xmm1
+; AVX1-NEXT:    vpsraw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovd %xmm1, %eax
+; AVX2-NEXT:    movzwl %ax, %eax
+; AVX2-NEXT:    vmovd %eax, %xmm1
+; AVX2-NEXT:    vpsraw %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+  %shift = ashr <16 x i16> %a, %splat
+  ret <16 x i16> %shift
+}
+
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: splatvar_shift_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15]
+; AVX1-NEXT:    vpsraw $4, %xmm4, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpsraw $2, %xmm4, %xmm5
+; AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm6
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpsraw $1, %xmm4, %xmm5
+; AVX1-NEXT:    vpaddw %xmm6, %xmm6, %xmm9
+; AVX1-NEXT:    vpblendvb %xmm9, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm8
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm3 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; AVX1-NEXT:    vpsraw $4, %xmm3, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpsraw $2, %xmm3, %xmm5
+; AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpsraw $1, %xmm3, %xmm5
+; AVX1-NEXT:    vpaddw %xmm4, %xmm4, %xmm7
+; AVX1-NEXT:    vpblendvb %xmm7, %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
+; AVX1-NEXT:    vpackuswb %xmm8, %xmm3, %xmm8
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX1-NEXT:    vpsraw $4, %xmm5, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm3, %xmm5, %xmm2
+; AVX1-NEXT:    vpsraw $2, %xmm2, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsraw $1, %xmm2, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm9, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpsraw $4, %xmm0, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw $2, %xmm0, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw $1, %xmm0, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm7, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm8, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
+; AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX2-NEXT:    vpsraw $4, %ymm3, %ymm4
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX2-NEXT:    vpsraw $2, %ymm3, %ymm4
+; AVX2-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX2-NEXT:    vpsraw $1, %ymm3, %ymm4
+; AVX2-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX2-NEXT:    vpsraw $4, %ymm0, %ymm3
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsraw $2, %ymm0, %ymm3
+; AVX2-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsraw $1, %ymm0, %ymm3
+; AVX2-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+  %shift = ashr <32 x i8> %a, %splat
+  ret <32 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+; AVX1-LABEL: constant_shift_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX1-NEXT:    sarq $62, %rax
+; AVX1-NEXT:    vmovq %rax, %xmm2
+; AVX1-NEXT:    vmovq %xmm1, %rax
+; AVX1-NEXT:    sarq $31, %rax
+; AVX1-NEXT:    vmovq %rax, %xmm1
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX1-NEXT:    sarq $7, %rax
+; AVX1-NEXT:    vmovq %rax, %xmm2
+; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    sarq %rax
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX2-NEXT:    sarq $62, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm2
+; AVX2-NEXT:    vmovq %xmm1, %rax
+; AVX2-NEXT:    sarq $31, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm1
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX2-NEXT:    sarq $7, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm2
+; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    sarq %rax
+; AVX2-NEXT:    vmovq %rax, %xmm0
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
+  ret <4 x i64> %shift
+}
+
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+; AVX1-LABEL: constant_shift_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpextrd $1, %xmm1, %eax
+; AVX1-NEXT:    sarl $9, %eax
+; AVX1-NEXT:    vmovd %xmm1, %ecx
+; AVX1-NEXT:    sarl $8, %ecx
+; AVX1-NEXT:    vmovd %ecx, %xmm2
+; AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT:    vpextrd $2, %xmm1, %eax
+; AVX1-NEXT:    sarl $8, %eax
+; AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT:    vpextrd $3, %xmm1, %eax
+; AVX1-NEXT:    sarl $7, %eax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
+; AVX1-NEXT:    vpextrd $1, %xmm0, %eax
+; AVX1-NEXT:    sarl $5, %eax
+; AVX1-NEXT:    vmovd %xmm0, %ecx
+; AVX1-NEXT:    sarl $4, %ecx
+; AVX1-NEXT:    vmovd %ecx, %xmm2
+; AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
+; AVX1-NEXT:    sarl $6, %eax
+; AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT:    vpextrd $3, %xmm0, %eax
+; AVX1-NEXT:    sarl $7, %eax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
+  ret <8 x i32> %shift
+}
+
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+; AVX1-LABEL: constant_shift_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsraw $8, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32896,37008,41120,45232,49344,53456,57568,61680]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsraw $4, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [256,8480,16704,24928,33152,41376,49600,57824]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsraw $2, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [512,16960,33408,49856,768,17216,33664,50112]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsraw $1, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1024,33920,1280,34176,1536,34432,1792,34688]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsraw $8, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,4112,8224,12336,16448,20560,24672,28784]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw $4, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,8224,16448,24672,32896,41120,49344,57568]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw $2, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,16448,32896,49344,256,16704,33152,49600]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw $1, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,32896,256,33152,512,33408,768,33664]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15]
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpsravd %ymm3, %ymm4, %ymm3
+; AVX2-NEXT:    vpsrld $16, %ymm3, %ymm3
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11]
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
+; AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+  ret <16 x i16> %shift
+}
+
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+; AVX1-LABEL: constant_shift_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15]
+; AVX1-NEXT:    vpsraw $4, %xmm4, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpsraw $2, %xmm4, %xmm5
+; AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm6
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpsraw $1, %xmm4, %xmm5
+; AVX1-NEXT:    vpaddw %xmm6, %xmm6, %xmm9
+; AVX1-NEXT:    vpblendvb %xmm9, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm8
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm3 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; AVX1-NEXT:    vpsraw $4, %xmm3, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpsraw $2, %xmm3, %xmm5
+; AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpsraw $1, %xmm3, %xmm5
+; AVX1-NEXT:    vpaddw %xmm4, %xmm4, %xmm7
+; AVX1-NEXT:    vpblendvb %xmm7, %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
+; AVX1-NEXT:    vpackuswb %xmm8, %xmm3, %xmm8
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX1-NEXT:    vpsraw $4, %xmm5, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm3, %xmm5, %xmm2
+; AVX1-NEXT:    vpsraw $2, %xmm2, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsraw $1, %xmm2, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm9, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpsraw $4, %xmm0, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw $2, %xmm0, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsraw $1, %xmm0, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm7, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm8, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX2-NEXT:    vpsraw $4, %ymm3, %ymm4
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX2-NEXT:    vpsraw $2, %ymm3, %ymm4
+; AVX2-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX2-NEXT:    vpsraw $1, %ymm3, %ymm4
+; AVX2-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX2-NEXT:    vpsraw $4, %ymm0, %ymm3
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsraw $2, %ymm0, %ymm3
+; AVX2-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsraw $1, %ymm0, %ymm3
+; AVX2-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <32 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+; AVX1-LABEL: splatconstant_shift_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX1-NEXT:    sarq $7, %rax
+; AVX1-NEXT:    vmovq %rax, %xmm2
+; AVX1-NEXT:    vmovq %xmm1, %rax
+; AVX1-NEXT:    sarq $7, %rax
+; AVX1-NEXT:    vmovq %rax, %xmm1
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX1-NEXT:    sarq $7, %rax
+; AVX1-NEXT:    vmovq %rax, %xmm2
+; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    sarq $7, %rax
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_shift_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX2-NEXT:    sarq $7, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm2
+; AVX2-NEXT:    vmovq %xmm1, %rax
+; AVX2-NEXT:    sarq $7, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm1
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX2-NEXT:    sarq $7, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm2
+; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    sarq $7, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm0
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+  ret <4 x i64> %shift
+}
+
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+; AVX1-LABEL: splatconstant_shift_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsrad $5, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsrad $5, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_shift_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrad $5, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i32> %shift
+}
+
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+; AVX1-LABEL: splatconstant_shift_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsraw $3, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsraw $3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_shift_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsraw $3, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <16 x i16> %shift
+}
+
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+; AVX1-LABEL: splatconstant_shift_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsrlw $3, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_shift_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlw  $3, %ymm0, %ymm0
+; AVX2-NEXT:    vpand   {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpxor   %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubb  %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %shift
+}
diff --git a/test/CodeGen/X86/vector-shift-lshr-128.ll b/test/CodeGen/X86/vector-shift-lshr-128.ll
new file mode 100644
index 0000000..f5a7e28
--- /dev/null
+++ b/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -0,0 +1,778 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+
+;
+; Variable Shifts
+;
+
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: var_shift_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psrlq  %xmm3, %xmm2
+; SSE2-NEXT:    psrlq  %xmm1, %xmm0
+; SSE2-NEXT:    movsd  {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSE2-NEXT:    movapd %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_shift_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa  %xmm0, %xmm2
+; SSE41-NEXT:    psrlq   %xmm1, %xmm2
+; SSE41-NEXT:    pshufd  {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE41-NEXT:    psrlq   %xmm1, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: var_shift_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsrlq   %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vpshufd  {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpsrlq   %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %shift = lshr <2 x i64> %a, %b
+  ret <2 x i64> %shift
+}
+
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: var_shift_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; SSE2-NEXT:    movd      %xmm2, %eax
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm2 = xmm1[3,1,2,3]
+; SSE2-NEXT:    movd      %xmm2, %ecx
+; SSE2-NEXT:    shrl      %cl, %eax
+; SSE2-NEXT:    movd      %eax, %xmm2
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm3 = xmm0[1,1,2,3]
+; SSE2-NEXT:    movd      %xmm3, %eax
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm3 = xmm1[1,1,2,3]
+; SSE2-NEXT:    movd      %xmm3, %ecx
+; SSE2-NEXT:    shrl      %cl, %eax
+; SSE2-NEXT:    movd      %eax, %xmm3
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE2-NEXT:    movd      %xmm0, %eax
+; SSE2-NEXT:    movd      %xmm1, %ecx
+; SSE2-NEXT:    shrl      %cl, %eax
+; SSE2-NEXT:    movd      %eax, %xmm2
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movd      %xmm0, %eax
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movd      %xmm0, %ecx
+; SSE2-NEXT:    shrl      %cl, %eax
+; SSE2-NEXT:    movd      %eax, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-NEXT:    movdqa     %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_shift_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pextrd $1, %xmm0, %eax
+; SSE41-NEXT:    pextrd $1, %xmm1, %ecx
+; SSE41-NEXT:    shrl   %cl, %eax
+; SSE41-NEXT:    movd   %xmm0, %edx
+; SSE41-NEXT:    movd   %xmm1, %ecx
+; SSE41-NEXT:    shrl   %cl, %edx
+; SSE41-NEXT:    movd   %edx, %xmm2
+; SSE41-NEXT:    pinsrd $1, %eax, %xmm2
+; SSE41-NEXT:    pextrd $2, %xmm0, %eax
+; SSE41-NEXT:    pextrd $2, %xmm1, %ecx
+; SSE41-NEXT:    shrl   %cl, %eax
+; SSE41-NEXT:    pinsrd $2, %eax, %xmm2
+; SSE41-NEXT:    pextrd $3, %xmm0, %eax
+; SSE41-NEXT:    pextrd $3, %xmm1, %ecx
+; SSE41-NEXT:    shrl   %cl, %eax
+; SSE41-NEXT:    pinsrd $3, %eax, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: var_shift_v4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpextrd $1, %xmm0, %eax
+; AVX1-NEXT:    vpextrd $1, %xmm1, %ecx
+; AVX1-NEXT:    shrl    %cl, %eax
+; AVX1-NEXT:    vmovd   %xmm0, %edx
+; AVX1-NEXT:    vmovd   %xmm1, %ecx
+; AVX1-NEXT:    shrl    %cl, %edx
+; AVX1-NEXT:    vmovd   %edx, %xmm2
+; AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
+; AVX1-NEXT:    vpextrd $2, %xmm1, %ecx
+; AVX1-NEXT:    shrl    %cl, %eax
+; AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT:    vpextrd $3, %xmm0, %eax
+; AVX1-NEXT:    vpextrd $3, %xmm1, %ecx
+; AVX1-NEXT:    shrl    %cl, %eax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %shift = lshr <4 x i32> %a, %b
+  ret <4 x i32> %shift
+}
+
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: var_shift_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psllw  $12, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psraw  $15, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pandn  %xmm0, %xmm3
+; SSE2-NEXT:    psrlw  $8, %xmm0
+; SSE2-NEXT:    pand   %xmm2, %xmm0
+; SSE2-NEXT:    por    %xmm3, %xmm0
+; SSE2-NEXT:    paddw  %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psraw  $15, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pandn  %xmm0, %xmm3
+; SSE2-NEXT:    psrlw  $4, %xmm0
+; SSE2-NEXT:    pand   %xmm2, %xmm0
+; SSE2-NEXT:    por    %xmm3, %xmm0
+; SSE2-NEXT:    paddw  %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psraw  $15, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pandn  %xmm0, %xmm3
+; SSE2-NEXT:    psrlw  $2, %xmm0
+; SSE2-NEXT:    pand   %xmm2, %xmm0
+; SSE2-NEXT:    por    %xmm3, %xmm0
+; SSE2-NEXT:    paddw  %xmm1, %xmm1
+; SSE2-NEXT:    psraw  $15, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pandn  %xmm0, %xmm2
+; SSE2-NEXT:    psrlw  $1, %xmm0
+; SSE2-NEXT:    pand   %xmm1, %xmm0
+; SSE2-NEXT:    por    %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_shift_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa   %xmm0, %xmm2
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    psllw    $12, %xmm0
+; SSE41-NEXT:    psllw    $4, %xmm1
+; SSE41-NEXT:    por      %xmm0, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm3
+; SSE41-NEXT:    paddw    %xmm3, %xmm3
+; SSE41-NEXT:    movdqa   %xmm2, %xmm4
+; SSE41-NEXT:    psrlw    $8, %xmm4
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm4, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm1
+; SSE41-NEXT:    psrlw    $4, %xmm1
+; SSE41-NEXT:    movdqa   %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm1
+; SSE41-NEXT:    psrlw    $2, %xmm1
+; SSE41-NEXT:    paddw    %xmm3, %xmm3
+; SSE41-NEXT:    movdqa   %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm1
+; SSE41-NEXT:    psrlw    $1, %xmm1
+; SSE41-NEXT:    paddw    %xmm3, %xmm3
+; SSE41-NEXT:    movdqa   %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: var_shift_v8i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsllw    $12, %xmm1, %xmm2
+; AVX1-NEXT:    vpsllw    $4, %xmm1, %xmm1
+; AVX1-NEXT:    vpor      %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddw    %xmm1, %xmm1, %xmm2
+; AVX1-NEXT:    vpsrlw    $8, %xmm0, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw    $4, %xmm0, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw    $2, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddw    %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw    $1, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddw    %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v8i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    vpsrlvd   %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb   {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT:    vpermq    {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %shift = lshr <8 x i16> %a, %b
+  ret <8 x i16> %shift
+}
+
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: var_shift_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:  psllw   $5, %xmm1
+; SSE2-NEXT:  pxor    %xmm2, %xmm2
+; SSE2-NEXT:  pxor    %xmm3, %xmm3
+; SSE2-NEXT:  pcmpgtb %xmm1, %xmm3
+; SSE2-NEXT:  movdqa  %xmm3, %xmm4
+; SSE2-NEXT:  pandn   %xmm0, %xmm4
+; SSE2-NEXT:  psrlw   $4, %xmm0
+; SSE2-NEXT:  pand    {{.*}}(%rip), %xmm0
+; SSE2-NEXT:  pand    %xmm3, %xmm0
+; SSE2-NEXT:  por     %xmm4, %xmm0
+; SSE2-NEXT:  paddb   %xmm1, %xmm1
+; SSE2-NEXT:  pxor    %xmm3, %xmm3
+; SSE2-NEXT:  pcmpgtb %xmm1, %xmm3
+; SSE2-NEXT:  movdqa  %xmm3, %xmm4
+; SSE2-NEXT:  pandn   %xmm0, %xmm4
+; SSE2-NEXT:  psrlw   $2, %xmm0
+; SSE2-NEXT:  pand    {{.*}}(%rip), %xmm0
+; SSE2-NEXT:  pand    %xmm3, %xmm0
+; SSE2-NEXT:  por     %xmm4, %xmm0
+; SSE2-NEXT:  paddb   %xmm1, %xmm1
+; SSE2-NEXT:  pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT:  movdqa  %xmm2, %xmm1
+; SSE2-NEXT:  pandn   %xmm0, %xmm1
+; SSE2-NEXT:  psrlw   $1, %xmm0
+; SSE2-NEXT:  pand    {{.*}}(%rip), %xmm0
+; SSE2-NEXT:  pand    %xmm2, %xmm0
+; SSE2-NEXT:  por     %xmm1, %xmm0
+; SSE2-NEXT:  retq
+;
+; SSE41-LABEL: var_shift_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa   %xmm0, %xmm2
+; SSE41-NEXT:    psllw    $5, %xmm1
+; SSE41-NEXT:    movdqa   %xmm2, %xmm3
+; SSE41-NEXT:    psrlw    $4, %xmm3
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm3
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm3, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm3
+; SSE41-NEXT:    psrlw    $2, %xmm3
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm3
+; SSE41-NEXT:    paddb    %xmm1, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm3, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm3
+; SSE41-NEXT:    psrlw    $1, %xmm3
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm3
+; SSE41-NEXT:    paddb    %xmm1, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm3, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: var_shift_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsllw    $5, %xmm1, %xmm1
+; AVX-NEXT:    vpsrlw    $4, %xmm0, %xmm2
+; AVX-NEXT:    vpand     {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw    $2, %xmm0, %xmm2
+; AVX-NEXT:    vpand     {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpaddb    %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw    $1, %xmm0, %xmm2
+; AVX-NEXT:    vpand     {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpaddb    %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = lshr <16 x i8> %a, %b
+  ret <16 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE-LABEL: splatvar_shift_v2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    psrlq %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatvar_shift_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
+  %shift = lshr <2 x i64> %a, %splat
+  ret <2 x i64> %shift
+}
+
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: splatvar_shift_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm2, %xmm2
+; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; SSE2-NEXT:    psrld %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: splatvar_shift_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; SSE41-NEXT:    psrld %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: splatvar_shift_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shift = lshr <4 x i32> %a, %splat
+  ret <4 x i32> %shift
+}
+
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: splatvar_shift_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd   %xmm1, %eax
+; SSE2-NEXT:    movzwl %ax, %eax
+; SSE2-NEXT:    movd   %eax, %xmm1
+; SSE2-NEXT:    psrlw  %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: splatvar_shift_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; SSE41-NEXT:    psrlw %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: splatvar_shift_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+  %shift = lshr <8 x i16> %a, %splat
+  ret <8 x i16> %shift
+}
+
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: splatvar_shift_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:  punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:  pshufd    {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; SSE2-NEXT:  pshuflw   {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:  pshufhw   {{.*#+}} xmm2 = xmm1[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:  psllw     $5, %xmm2
+; SSE2-NEXT:  pxor      %xmm1, %xmm1
+; SSE2-NEXT:  pxor      %xmm3, %xmm3
+; SSE2-NEXT:  pcmpgtb   %xmm2, %xmm3
+; SSE2-NEXT:  movdqa    %xmm3, %xmm4
+; SSE2-NEXT:  pandn     %xmm0, %xmm4
+; SSE2-NEXT:  psrlw     $4, %xmm0
+; SSE2-NEXT:  pand      {{.*}}(%rip), %xmm0
+; SSE2-NEXT:  pand      %xmm3, %xmm0
+; SSE2-NEXT:  por       %xmm4, %xmm0
+; SSE2-NEXT:  paddb     %xmm2, %xmm2
+; SSE2-NEXT:  pxor      %xmm3, %xmm3
+; SSE2-NEXT:  pcmpgtb   %xmm2, %xmm3
+; SSE2-NEXT:  movdqa    %xmm3, %xmm4
+; SSE2-NEXT:  pandn     %xmm0, %xmm4
+; SSE2-NEXT:  psrlw     $2, %xmm0
+; SSE2-NEXT:  pand      {{.*}}(%rip), %xmm0
+; SSE2-NEXT:  pand      %xmm3, %xmm0
+; SSE2-NEXT:  por       %xmm4, %xmm0
+; SSE2-NEXT:  paddb     %xmm2, %xmm2
+; SSE2-NEXT:  pcmpgtb   %xmm2, %xmm1
+; SSE2-NEXT:  movdqa    %xmm1, %xmm2
+; SSE2-NEXT:  pandn     %xmm0, %xmm2
+; SSE2-NEXT:  psrlw     $1, %xmm0
+; SSE2-NEXT:  pand      {{.*}}(%rip), %xmm0
+; SSE2-NEXT:  pand      %xmm1, %xmm0
+; SSE2-NEXT:  por       %xmm2, %xmm0
+; SSE2-NEXT:  retq
+;
+; SSE41-LABEL: splatvar_shift_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa   %xmm0, %xmm2
+; SSE41-NEXT:    pxor     %xmm0, %xmm0
+; SSE41-NEXT:    pshufb   %xmm0, %xmm1
+; SSE41-NEXT:    psllw    $5, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm3
+; SSE41-NEXT:    paddb    %xmm3, %xmm3
+; SSE41-NEXT:    movdqa   %xmm2, %xmm4
+; SSE41-NEXT:    psrlw    $4, %xmm4
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm4
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm4, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm1
+; SSE41-NEXT:    psrlw    $2, %xmm1
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    movdqa   %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm1
+; SSE41-NEXT:    psrlw    $1, %xmm1
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    paddb    %xmm3, %xmm3
+; SSE41-NEXT:    movdqa   %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: splatvar_shift_v16i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor     %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb   %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllw    $5, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddb    %xmm1, %xmm1, %xmm2
+; AVX1-NEXT:    vpsrlw    $4, %xmm0, %xmm3
+; AVX1-NEXT:    vpand     {{.*}}(%rip), %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw    $2, %xmm0, %xmm1
+; AVX1-NEXT:    vpand     {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw    $1, %xmm0, %xmm1
+; AVX1-NEXT:    vpand     {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vpaddb    %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v16i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
+; AVX2-NEXT:    vpsllw       $5, %xmm1, %xmm1
+; AVX2-NEXT:    vpsrlw       $4, %xmm0, %xmm2
+; AVX2-NEXT:    vpand        {{.*}}(%rip), %xmm2, %xmm2
+; AVX2-NEXT:    vpblendvb    %xmm1, %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrlw       $2, %xmm0, %xmm2
+; AVX2-NEXT:    vpand        {{.*}}(%rip), %xmm2, %xmm2
+; AVX2-NEXT:    vpaddb       %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpblendvb    %xmm1, %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrlw       $1, %xmm0, %xmm2
+; AVX2-NEXT:    vpand        {{.*}}(%rip), %xmm2, %xmm2
+; AVX2-NEXT:    vpaddb       %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpblendvb    %xmm1, %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+  %shift = lshr <16 x i8> %a, %splat
+  ret <16 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+; SSE2-LABEL: constant_shift_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrlq  $7, %xmm1
+; SSE2-NEXT:    psrlq  $1, %xmm0
+; SSE2-NEXT:    movsd  {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT:    movapd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_shift_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa  %xmm0, %xmm1
+; SSE41-NEXT:    psrlq   $7, %xmm1
+; SSE41-NEXT:    psrlq   $1, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: constant_shift_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsrlq  $7, %xmm0, %xmm1
+; AVX1-NEXT:    vpsrlq  $1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %shift = lshr <2 x i64> %a, <i64 1, i64 7>
+  ret <2 x i64> %shift
+}
+
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+; SSE2-LABEL: constant_shift_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; SSE2-NEXT:    movd      %xmm1, %eax
+; SSE2-NEXT:    shrl      $7, %eax
+; SSE2-NEXT:    movd      %eax, %xmm1
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; SSE2-NEXT:    movd      %xmm2, %eax
+; SSE2-NEXT:    shrl      $5, %eax
+; SSE2-NEXT:    movd      %eax, %xmm2
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT:    movd      %xmm0, %eax
+; SSE2-NEXT:    shrl      $4, %eax
+; SSE2-NEXT:    movd      %eax, %xmm1
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movd      %xmm0, %eax
+; SSE2-NEXT:    shrl      $6, %eax
+; SSE2-NEXT:    movd      %eax, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT:    movdqa    %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_shift_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pextrd $1, %xmm0, %eax
+; SSE41-NEXT:    shrl   $5, %eax
+; SSE41-NEXT:    movd   %xmm0, %ecx
+; SSE41-NEXT:    shrl   $4, %ecx
+; SSE41-NEXT:    movd   %ecx, %xmm1
+; SSE41-NEXT:    pinsrd $1, %eax, %xmm1
+; SSE41-NEXT:    pextrd $2, %xmm0, %eax
+; SSE41-NEXT:    shrl   $6, %eax
+; SSE41-NEXT:    pinsrd $2, %eax, %xmm1
+; SSE41-NEXT:    pextrd $3, %xmm0, %eax
+; SSE41-NEXT:    shrl   $7, %eax
+; SSE41-NEXT:    pinsrd $3, %eax, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: constant_shift_v4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpextrd $1, %xmm0, %eax
+; AVX1-NEXT:    shrl    $5, %eax
+; AVX1-NEXT:    vmovd   %xmm0, %ecx
+; AVX1-NEXT:    shrl    $4, %ecx
+; AVX1-NEXT:    vmovd   %ecx, %xmm1
+; AVX1-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
+; AVX1-NEXT:    shrl    $6, %eax
+; AVX1-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
+; AVX1-NEXT:    vpextrd $3, %xmm0, %eax
+; AVX1-NEXT:    shrl    $7, %eax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %shift = lshr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i32> %shift
+}
+
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+; SSE2-LABEL: constant_shift_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa    %xmm0, %xmm1
+; SSE2-NEXT:    psrlw     $4, %xmm1
+; SSE2-NEXT:    movsd     {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm2 = xmm1[0,2,2,3]
+; SSE2-NEXT:    psrlw     $2, %xmm1
+; SSE2-NEXT:    pshufd    {{.*#+}}  xmm0 = xmm1[1,3,2,3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT:    movdqa    {{.*#+}} xmm0 = [65535,0,65535,0,65535,0,65535,0]
+; SSE2-NEXT:    movdqa    %xmm2, %xmm1
+; SSE2-NEXT:    pand      %xmm0, %xmm1
+; SSE2-NEXT:    psrlw     $1, %xmm2
+; SSE2-NEXT:    pandn     %xmm2, %xmm0
+; SSE2-NEXT:    por       %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_shift_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa   %xmm0, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    psrlw    $8, %xmm2
+; SSE41-NEXT:    movaps   {{.*#+}} xmm0 = [0,4112,8224,12336,16448,20560,24672,28784]
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    psrlw    $4, %xmm2
+; SSE41-NEXT:    movaps   {{.*#+}} xmm0 = [0,8224,16448,24672,32896,41120,49344,57568]
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    psrlw    $2, %xmm2
+; SSE41-NEXT:    movaps   {{.*#+}} xmm0 = [0,16448,32896,49344,256,16704,33152,49600]
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    psrlw    $1, %xmm2
+; SSE41-NEXT:    movaps   {{.*#+}} xmm0 = [0,32896,256,33152,512,33408,768,33664]
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: constant_shift_v8i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsrlw    $8, %xmm0, %xmm1
+; AVX1-NEXT:    vmovdqa   {{.*}}(%rip), %xmm2  # xmm2 = [0,4112,8224,12336,16448,20560,24672,28784]
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw    $4, %xmm0, %xmm1
+; AVX1-NEXT:    vmovdqa   {{.*}}(%rip), %xmm2  # xmm2 = [0,8224,16448,24672,32896,41120,49344,57568]
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw    $2, %xmm0, %xmm1
+; AVX1-NEXT:    vmovdqa   {{.*}}(%rip), %xmm2  # xmm2 = [0,16448,32896,49344,256,16704,33152,49600]
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw    $1, %xmm0, %xmm1
+; AVX1-NEXT:    vmovdqa   {{.*}}(%rip), %xmm2  # xmm2 = [0,32896,256,33152,512,33408,768,33664]
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v8i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX2-NEXT:    vpsrlvd   %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb   {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT:    vpermq    {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  ret <8 x i16> %shift
+}
+
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+; SSE2-LABEL: constant_shift_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa  {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; SSE2-NEXT:    psllw   $5, %xmm2
+; SSE2-NEXT:    pxor    %xmm1, %xmm1
+; SSE2-NEXT:    pxor    %xmm3, %xmm3
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm3
+; SSE2-NEXT:    movdqa  %xmm3, %xmm4
+; SSE2-NEXT:    pandn   %xmm0, %xmm4
+; SSE2-NEXT:    psrlw   $4, %xmm0
+; SSE2-NEXT:    pand    {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    pand    %xmm3, %xmm0
+; SSE2-NEXT:    por     %xmm4, %xmm0
+; SSE2-NEXT:    paddb   %xmm2, %xmm2
+; SSE2-NEXT:    pxor    %xmm3, %xmm3
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm3
+; SSE2-NEXT:    movdqa  %xmm3, %xmm4
+; SSE2-NEXT:    pandn   %xmm0, %xmm4
+; SSE2-NEXT:    psrlw   $2, %xmm0
+; SSE2-NEXT:    pand    {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    pand    %xmm3, %xmm0
+; SSE2-NEXT:    por     %xmm4, %xmm0
+; SSE2-NEXT:    paddb   %xmm2, %xmm2
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
+; SSE2-NEXT:    movdqa  %xmm1, %xmm2
+; SSE2-NEXT:    pandn   %xmm0, %xmm2
+; SSE2-NEXT:    psrlw   $1, %xmm0
+; SSE2-NEXT:    pand    {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    pand    %xmm1, %xmm0
+; SSE2-NEXT:    por     %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_shift_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa   %xmm0, %xmm1
+; SSE41-NEXT:    movdqa   {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; SSE41-NEXT:    psllw    $5, %xmm0
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    psrlw    $4, %xmm2
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    psrlw    $2, %xmm2
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    paddb    %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    psrlw    $1, %xmm2
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    paddb    %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: constant_shift_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa   {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX-NEXT:    vpsllw    $5, %xmm1, %xmm1
+; AVX-NEXT:    vpsrlw    $4, %xmm0, %xmm2
+; AVX-NEXT:    vpand     {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw    $2, %xmm0, %xmm2
+; AVX-NEXT:    vpand     {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpaddb    %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw    $1, %xmm0, %xmm2
+; AVX-NEXT:    vpand     {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpaddb    %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <16 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+; SSE-LABEL: splatconstant_shift_v2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    psrlq $7, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_shift_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsrlq $7, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+  ret <2 x i64> %shift
+}
+
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+; SSE-LABEL: splatconstant_shift_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    psrld $5, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_shift_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsrld $5, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+  ret <4 x i32> %shift
+}
+
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+; SSE-LABEL: splatconstant_shift_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    psrlw $3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_shift_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsrlw $3, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %shift
+}
+
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+; SSE-LABEL: splatconstant_shift_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    psrlw     $3, %xmm0
+; SSE-NEXT:    pand      {{.*}}(%rip), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_shift_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsrlw    $3, %xmm0
+; AVX-NEXT:    vpand     {{.*}}(%rip), %xmm0
+; AVX-NEXT:    retq
+  %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %shift
+}
diff --git a/test/CodeGen/X86/vector-shift-lshr-256.ll b/test/CodeGen/X86/vector-shift-lshr-256.ll
new file mode 100644
index 0000000..d200abd
--- /dev/null
+++ b/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -0,0 +1,548 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+
+;
+; Variable Shifts
+;
+
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: var_shift_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpsrlq %xmm2, %xmm3, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT:    vpsrlq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = lshr <4 x i64> %a, %b
+  ret <4 x i64> %shift
+}
+
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: var_shift_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrd $1, %xmm2, %eax
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpextrd $1, %xmm3, %ecx
+; AVX1-NEXT:    shrl %cl, %eax
+; AVX1-NEXT:    vmovd %xmm2, %edx
+; AVX1-NEXT:    vmovd %xmm3, %ecx
+; AVX1-NEXT:    shrl %cl, %edx
+; AVX1-NEXT:    vmovd %edx, %xmm4
+; AVX1-NEXT:    vpinsrd $1, %eax, %xmm4, %xmm4
+; AVX1-NEXT:    vpextrd $2, %xmm2, %eax
+; AVX1-NEXT:    vpextrd $2, %xmm3, %ecx
+; AVX1-NEXT:    shrl %cl, %eax
+; AVX1-NEXT:    vpinsrd $2, %eax, %xmm4, %xmm4
+; AVX1-NEXT:    vpextrd $3, %xmm2, %eax
+; AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
+; AVX1-NEXT:    shrl %cl, %eax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm4, %xmm2
+; AVX1-NEXT:    vpextrd $1, %xmm0, %eax
+; AVX1-NEXT:    vpextrd $1, %xmm1, %ecx
+; AVX1-NEXT:    shrl %cl, %eax
+; AVX1-NEXT:    vmovd %xmm0, %edx
+; AVX1-NEXT:    vmovd %xmm1, %ecx
+; AVX1-NEXT:    shrl %cl, %edx
+; AVX1-NEXT:    vmovd %edx, %xmm3
+; AVX1-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
+; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
+; AVX1-NEXT:    vpextrd $2, %xmm1, %ecx
+; AVX1-NEXT:    shrl %cl, %eax
+; AVX1-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
+; AVX1-NEXT:    vpextrd $3, %xmm0, %eax
+; AVX1-NEXT:    vpextrd $3, %xmm1, %ecx
+; AVX1-NEXT:    shrl %cl, %eax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = lshr <8 x i32> %a, %b
+  ret <8 x i32> %shift
+}
+
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: var_shift_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpsllw $12, %xmm2, %xmm3
+; AVX1-NEXT:    vpsllw $4, %xmm2, %xmm2
+; AVX1-NEXT:    vpor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
+; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm4
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $1, %xmm2, %xmm4
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $12, %xmm1, %xmm3
+; AVX1-NEXT:    vpsllw $4, %xmm1, %xmm1
+; AVX1-NEXT:    vpor %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm3
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpsrlvd %ymm3, %ymm4, %ymm3
+; AVX2-NEXT:    vpsrld $16, %ymm3, %ymm3
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
+; AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = lshr <16 x i16> %a, %b
+  ret <16 x i16> %shift
+}
+
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: var_shift_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpsllw $5, %xmm5, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $1, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllw    $5, %ymm1, %ymm1
+; AVX2-NEXT:    vpsrlw    $4, %ymm0, %ymm2
+; AVX2-NEXT:    vpand     {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw    $2, %ymm0, %ymm2
+; AVX2-NEXT:    vpand     {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpaddb    %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw    $1, %ymm0, %ymm2
+; AVX2-NEXT:    vpand     {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpaddb    %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = lshr <32 x i8> %a, %b
+  ret <32 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: splatvar_shift_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
+  %shift = lshr <4 x i64> %a, %splat
+  ret <4 x i64> %shift
+}
+
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: splatvar_shift_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsrld %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX2-NEXT:    vpsrld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+  %shift = lshr <8 x i32> %a, %splat
+  ret <8 x i32> %shift
+}
+
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: splatvar_shift_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovd %xmm1, %eax
+; AVX1-NEXT:    movzwl %ax, %eax
+; AVX1-NEXT:    vmovd %eax, %xmm1
+; AVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovd %xmm1, %eax
+; AVX2-NEXT:    movzwl %ax, %eax
+; AVX2-NEXT:    vmovd %eax, %xmm1
+; AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+  %shift = lshr <16 x i16> %a, %splat
+  ret <16 x i16> %shift
+}
+
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: splatvar_shift_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm8 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm8, %xmm3, %xmm3
+; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm6
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $1, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddb %xmm6, %xmm6, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm8, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm1
+; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm1
+; AVX1-NEXT:    vpand %xmm7, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
+; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm2
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw $2, %ymm0, %ymm2
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw $1, %ymm0, %ymm2
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+  %shift = lshr <32 x i8> %a, %splat
+  ret <32 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+; AVX1-LABEL: constant_shift_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsrlq $62, %xmm1, %xmm2
+; AVX1-NEXT:    vpsrlq $31, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm2
+; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
+  ret <4 x i64> %shift
+}
+
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+; AVX1-LABEL: constant_shift_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpextrd $1, %xmm1, %eax
+; AVX1-NEXT:    shrl $9, %eax
+; AVX1-NEXT:    vmovd %xmm1, %ecx
+; AVX1-NEXT:    shrl $8, %ecx
+; AVX1-NEXT:    vmovd %ecx, %xmm2
+; AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT:    vpextrd $2, %xmm1, %eax
+; AVX1-NEXT:    shrl $8, %eax
+; AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT:    vpextrd $3, %xmm1, %eax
+; AVX1-NEXT:    shrl $7, %eax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
+; AVX1-NEXT:    vpextrd $1, %xmm0, %eax
+; AVX1-NEXT:    shrl $5, %eax
+; AVX1-NEXT:    vmovd %xmm0, %ecx
+; AVX1-NEXT:    shrl $4, %ecx
+; AVX1-NEXT:    vmovd %ecx, %xmm2
+; AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
+; AVX1-NEXT:    shrl $6, %eax
+; AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT:    vpextrd $3, %xmm0, %eax
+; AVX1-NEXT:    shrl $7, %eax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
+  ret <8 x i32> %shift
+}
+
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+; AVX1-LABEL: constant_shift_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32896,37008,41120,45232,49344,53456,57568,61680]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [256,8480,16704,24928,33152,41376,49600,57824]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $2, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [512,16960,33408,49856,768,17216,33664,50112]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $1, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1024,33920,1280,34176,1536,34432,1792,34688]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,4112,8224,12336,16448,20560,24672,28784]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,8224,16448,24672,32896,41120,49344,57568]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,16448,32896,49344,256,16704,33152,49600]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,32896,256,33152,512,33408,768,33664]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15]
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpsrlvd %ymm3, %ymm4, %ymm3
+; AVX2-NEXT:    vpsrld $16, %ymm3, %ymm3
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11]
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
+; AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+  ret <16 x i16> %shift
+}
+
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+; AVX1-LABEL: constant_shift_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm8 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm8, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX1-NEXT:    vpsllw $5, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $2, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX1-NEXT:    vpand %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddb %xmm4, %xmm4, %xmm6
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $1, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX1-NEXT:    vpand %xmm7, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddb %xmm6, %xmm6, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm2
+; AVX1-NEXT:    vpand %xmm8, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm2
+; AVX1-NEXT:    vpand %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm2
+; AVX1-NEXT:    vpand %xmm7, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa   {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX2-NEXT:    vpsllw    $5, %ymm1, %ymm1
+; AVX2-NEXT:    vpsrlw    $4, %ymm0, %ymm2
+; AVX2-NEXT:    vpand     {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw    $2, %ymm0, %ymm2
+; AVX2-NEXT:    vpand     {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpaddb    %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw    $1, %ymm0, %ymm2
+; AVX2-NEXT:    vpand     {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpaddb    %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <32 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+; AVX1-LABEL: splatconstant_shift_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_shift_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlq $7, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+  ret <4 x i64> %shift
+}
+
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+; AVX1-LABEL: splatconstant_shift_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsrld $5, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsrld $5, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_shift_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrld $5, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i32> %shift
+}
+
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+; AVX1-LABEL: splatconstant_shift_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsrlw $3, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsrlw $3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_shift_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <16 x i16> %shift
+}
+
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+; AVX1-LABEL: splatconstant_shift_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsrlw $3, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_shift_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlw $3, %ymm0
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0
+; AVX2-NEXT:    retq
+  %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %shift
+}
diff --git a/test/CodeGen/X86/vector-shift-shl-128.ll b/test/CodeGen/X86/vector-shift-shl-128.ll
new file mode 100644
index 0000000..3ac31ea
--- /dev/null
+++ b/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -0,0 +1,639 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+
+;
+; Variable Shifts
+;
+
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: var_shift_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psllq  %xmm3, %xmm2
+; SSE2-NEXT:    psllq  %xmm1, %xmm0
+; SSE2-NEXT:    movsd  {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSE2-NEXT:    movapd %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_shift_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa  %xmm0, %xmm2
+; SSE41-NEXT:    psllq   %xmm1, %xmm2
+; SSE41-NEXT:    pshufd  {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE41-NEXT:    psllq   %xmm1, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: var_shift_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsllq   %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vpshufd  {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpsllq   %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %shift = shl <2 x i64> %a, %b
+  ret <2 x i64> %shift
+}
+
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: var_shift_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pslld     $23, %xmm1
+; SSE2-NEXT:    paddd     {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; SSE2-NEXT:    pmuludq   %xmm0, %xmm1
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pmuludq   %xmm2, %xmm0
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT:    movdqa    %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_shift_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pslld     $23, %xmm1
+; SSE41-NEXT:    paddd     {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    cvttps2dq %xmm1, %xmm1
+; SSE41-NEXT:    pmulld    %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: var_shift_v4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpslld     $23, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddd     {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
+; AVX1-NEXT:    vpmulld    %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %shift = shl <4 x i32> %a, %b
+  ret <4 x i32> %shift
+}
+
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: var_shift_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psllw  $12, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psraw  $15, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pandn  %xmm0, %xmm3
+; SSE2-NEXT:    psllw  $8, %xmm0
+; SSE2-NEXT:    pand   %xmm2, %xmm0
+; SSE2-NEXT:    por    %xmm3, %xmm0
+; SSE2-NEXT:    paddw  %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psraw  $15, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pandn  %xmm0, %xmm3
+; SSE2-NEXT:    psllw  $4, %xmm0
+; SSE2-NEXT:    pand   %xmm2, %xmm0
+; SSE2-NEXT:    por    %xmm3, %xmm0
+; SSE2-NEXT:    paddw  %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psraw  $15, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pandn  %xmm0, %xmm3
+; SSE2-NEXT:    psllw  $2, %xmm0
+; SSE2-NEXT:    pand   %xmm2, %xmm0
+; SSE2-NEXT:    por    %xmm3, %xmm0
+; SSE2-NEXT:    paddw  %xmm1, %xmm1
+; SSE2-NEXT:    psraw  $15, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pandn  %xmm0, %xmm2
+; SSE2-NEXT:    psllw  $1, %xmm0
+; SSE2-NEXT:    pand   %xmm1, %xmm0
+; SSE2-NEXT:    por    %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_shift_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa   %xmm0, %xmm2
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    psllw    $12, %xmm0
+; SSE41-NEXT:    psllw    $4, %xmm1
+; SSE41-NEXT:    por      %xmm0, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm3
+; SSE41-NEXT:    paddw    %xmm3, %xmm3
+; SSE41-NEXT:    movdqa   %xmm2, %xmm4
+; SSE41-NEXT:    psllw    $8, %xmm4
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm4, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm1
+; SSE41-NEXT:    psllw    $4, %xmm1
+; SSE41-NEXT:    movdqa   %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm1
+; SSE41-NEXT:    psllw    $2, %xmm1
+; SSE41-NEXT:    paddw    %xmm3, %xmm3
+; SSE41-NEXT:    movdqa   %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm1
+; SSE41-NEXT:    psllw    $1, %xmm1
+; SSE41-NEXT:    paddw    %xmm3, %xmm3
+; SSE41-NEXT:    movdqa   %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: var_shift_v8i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsllw    $12, %xmm1, %xmm2
+; AVX1-NEXT:    vpsllw    $4, %xmm1, %xmm1
+; AVX1-NEXT:    vpor      %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddw    %xmm1, %xmm1, %xmm2
+; AVX1-NEXT:    vpsllw    $8, %xmm0, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw    $4, %xmm0, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw    $2, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddw    %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw    $1, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddw    %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v8i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    vpsllvd   %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb   {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT:    vpermq    {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %shift = shl <8 x i16> %a, %b
+  ret <8 x i16> %shift
+}
+
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: var_shift_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:  psllw   $5, %xmm1
+; SSE2-NEXT:  pxor    %xmm2, %xmm2
+; SSE2-NEXT:  pxor    %xmm3, %xmm3
+; SSE2-NEXT:  pcmpgtb %xmm1, %xmm3
+; SSE2-NEXT:  movdqa  %xmm3, %xmm4
+; SSE2-NEXT:  pandn   %xmm0, %xmm4
+; SSE2-NEXT:  psllw   $4, %xmm0
+; SSE2-NEXT:  pand    {{.*}}(%rip), %xmm0
+; SSE2-NEXT:  pand    %xmm3, %xmm0
+; SSE2-NEXT:  por     %xmm4, %xmm0
+; SSE2-NEXT:  paddb   %xmm1, %xmm1
+; SSE2-NEXT:  pxor    %xmm3, %xmm3
+; SSE2-NEXT:  pcmpgtb %xmm1, %xmm3
+; SSE2-NEXT:  movdqa  %xmm3, %xmm4
+; SSE2-NEXT:  pandn   %xmm0, %xmm4
+; SSE2-NEXT:  psllw   $2, %xmm0
+; SSE2-NEXT:  pand    {{.*}}(%rip), %xmm0
+; SSE2-NEXT:  pand    %xmm3, %xmm0
+; SSE2-NEXT:  por     %xmm4, %xmm0
+; SSE2-NEXT:  paddb   %xmm1, %xmm1
+; SSE2-NEXT:  pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT:  movdqa  %xmm2, %xmm1
+; SSE2-NEXT:  pandn   %xmm0, %xmm1
+; SSE2-NEXT:  paddb   %xmm0, %xmm0
+; SSE2-NEXT:  pand    %xmm2, %xmm0
+; SSE2-NEXT:  por     %xmm1, %xmm0
+; SSE2-NEXT:  retq
+;
+; SSE41-LABEL: var_shift_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa   %xmm0, %xmm2
+; SSE41-NEXT:    psllw    $5, %xmm1
+; SSE41-NEXT:    movdqa   %xmm2, %xmm3
+; SSE41-NEXT:    psllw    $4, %xmm3
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm3
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm3, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm3
+; SSE41-NEXT:    psllw    $2, %xmm3
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm3
+; SSE41-NEXT:    paddb    %xmm1, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm3, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm3
+; SSE41-NEXT:    paddb    %xmm3, %xmm3
+; SSE41-NEXT:    paddb    %xmm1, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm3, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: var_shift_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsllw    $5, %xmm1, %xmm1
+; AVX-NEXT:    vpsllw    $4, %xmm0, %xmm2
+; AVX-NEXT:    vpand     {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpsllw    $2, %xmm0, %xmm2
+; AVX-NEXT:    vpand     {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpaddb    %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpaddb    %xmm0, %xmm0, %xmm2
+; AVX-NEXT:    vpaddb    %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = shl <16 x i8> %a, %b
+  ret <16 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE-LABEL: splatvar_shift_v2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    psllq %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatvar_shift_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
+  %shift = shl <2 x i64> %a, %splat
+  ret <2 x i64> %shift
+}
+
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: splatvar_shift_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm2, %xmm2
+; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; SSE2-NEXT:    pslld %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: splatvar_shift_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; SSE41-NEXT:    pslld %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: splatvar_shift_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX-NEXT:    vpslld %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shift = shl <4 x i32> %a, %splat
+  ret <4 x i32> %shift
+}
+
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: splatvar_shift_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd   %xmm1, %eax
+; SSE2-NEXT:    movzwl %ax, %eax
+; SSE2-NEXT:    movd   %eax, %xmm1
+; SSE2-NEXT:    psllw  %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: splatvar_shift_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; SSE41-NEXT:    psllw %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: splatvar_shift_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+  %shift = shl <8 x i16> %a, %splat
+  ret <8 x i16> %shift
+}
+
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: splatvar_shift_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:  punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:  pshufd    {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; SSE2-NEXT:  pshuflw   {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:  pshufhw   {{.*#+}} xmm2 = xmm1[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:  psllw     $5, %xmm2
+; SSE2-NEXT:  pxor      %xmm1, %xmm1
+; SSE2-NEXT:  pxor      %xmm3, %xmm3
+; SSE2-NEXT:  pcmpgtb   %xmm2, %xmm3
+; SSE2-NEXT:  movdqa    %xmm3, %xmm4
+; SSE2-NEXT:  pandn     %xmm0, %xmm4
+; SSE2-NEXT:  psllw     $4, %xmm0
+; SSE2-NEXT:  pand      {{.*}}(%rip), %xmm0
+; SSE2-NEXT:  pand      %xmm3, %xmm0
+; SSE2-NEXT:  por       %xmm4, %xmm0
+; SSE2-NEXT:  paddb     %xmm2, %xmm2
+; SSE2-NEXT:  pxor      %xmm3, %xmm3
+; SSE2-NEXT:  pcmpgtb   %xmm2, %xmm3
+; SSE2-NEXT:  movdqa    %xmm3, %xmm4
+; SSE2-NEXT:  pandn     %xmm0, %xmm4
+; SSE2-NEXT:  psllw     $2, %xmm0
+; SSE2-NEXT:  pand      {{.*}}(%rip), %xmm0
+; SSE2-NEXT:  pand      %xmm3, %xmm0
+; SSE2-NEXT:  por       %xmm4, %xmm0
+; SSE2-NEXT:  paddb     %xmm2, %xmm2
+; SSE2-NEXT:  pcmpgtb   %xmm2, %xmm1
+; SSE2-NEXT:  movdqa    %xmm1, %xmm2
+; SSE2-NEXT:  pandn     %xmm0, %xmm2
+; SSE2-NEXT:  paddb     %xmm0, %xmm0
+; SSE2-NEXT:  pand      %xmm1, %xmm0
+; SSE2-NEXT:  por       %xmm2, %xmm0
+; SSE2-NEXT:  retq
+;
+; SSE41-LABEL: splatvar_shift_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa   %xmm0, %xmm2
+; SSE41-NEXT:    pxor     %xmm0, %xmm0
+; SSE41-NEXT:    pshufb   %xmm0, %xmm1
+; SSE41-NEXT:    psllw    $5, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm3
+; SSE41-NEXT:    paddb    %xmm3, %xmm3
+; SSE41-NEXT:    movdqa   %xmm2, %xmm4
+; SSE41-NEXT:    psllw    $4, %xmm4
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm4
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm4, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm1
+; SSE41-NEXT:    psllw    $2, %xmm1
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    movdqa   %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm1
+; SSE41-NEXT:    paddb    %xmm1, %xmm1
+; SSE41-NEXT:    paddb    %xmm3, %xmm3
+; SSE41-NEXT:    movdqa   %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa   %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: splatvar_shift_v16i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor     %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb   %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllw    $5, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddb    %xmm1, %xmm1, %xmm2
+; AVX1-NEXT:    vpsllw    $4, %xmm0, %xmm3
+; AVX1-NEXT:    vpand     {{.*}}(%rip), %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw    $2, %xmm0, %xmm1
+; AVX1-NEXT:    vpand     {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpaddb    %xmm0, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddb    %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v16i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
+; AVX2-NEXT:    vpsllw       $5, %xmm1, %xmm1
+; AVX2-NEXT:    vpsllw       $4, %xmm0, %xmm2
+; AVX2-NEXT:    vpand        {{.*}}(%rip), %xmm2, %xmm2
+; AVX2-NEXT:    vpblendvb    %xmm1, %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpsllw       $2, %xmm0, %xmm2
+; AVX2-NEXT:    vpand        {{.*}}(%rip), %xmm2, %xmm2
+; AVX2-NEXT:    vpaddb       %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpblendvb    %xmm1, %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpaddb       %xmm0, %xmm0, %xmm2
+; AVX2-NEXT:    vpaddb       %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpblendvb    %xmm1, %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+  %shift = shl <16 x i8> %a, %splat
+  ret <16 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+; SSE2-LABEL: constant_shift_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psllq  $7, %xmm1
+; SSE2-NEXT:    psllq  $1, %xmm0
+; SSE2-NEXT:    movsd  {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT:    movapd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_shift_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa  %xmm0, %xmm1
+; SSE41-NEXT:    psllq   $7, %xmm1
+; SSE41-NEXT:    psllq   $1, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: constant_shift_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsllq  $7, %xmm0, %xmm1
+; AVX1-NEXT:    vpsllq  $1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %shift = shl <2 x i64> %a, <i64 1, i64 7>
+  ret <2 x i64> %shift
+}
+
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+; SSE2-LABEL: constant_shift_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa    {{.*#+}} xmm1 = [16,32,64,128]
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pmuludq   %xmm1, %xmm0
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT:    pmuludq   %xmm2, %xmm1
+; SSE2-NEXT:    pshufd    {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_shift_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: constant_shift_v4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i32> %shift
+}
+
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+; SSE-LABEL: constant_shift_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pmullw {{.*}}(%rip), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: constant_shift_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  ret <8 x i16> %shift
+}
+
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+; SSE2-LABEL: constant_shift_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa  {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; SSE2-NEXT:    psllw   $5, %xmm2
+; SSE2-NEXT:    pxor    %xmm1, %xmm1
+; SSE2-NEXT:    pxor    %xmm3, %xmm3
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm3
+; SSE2-NEXT:    movdqa  %xmm3, %xmm4
+; SSE2-NEXT:    pandn   %xmm0, %xmm4
+; SSE2-NEXT:    psllw   $4, %xmm0
+; SSE2-NEXT:    pand    {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    pand    %xmm3, %xmm0
+; SSE2-NEXT:    por     %xmm4, %xmm0
+; SSE2-NEXT:    paddb   %xmm2, %xmm2
+; SSE2-NEXT:    pxor    %xmm3, %xmm3
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm3
+; SSE2-NEXT:    movdqa  %xmm3, %xmm4
+; SSE2-NEXT:    pandn   %xmm0, %xmm4
+; SSE2-NEXT:    psllw   $2, %xmm0
+; SSE2-NEXT:    pand    {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    pand    %xmm3, %xmm0
+; SSE2-NEXT:    por     %xmm4, %xmm0
+; SSE2-NEXT:    paddb   %xmm2, %xmm2
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
+; SSE2-NEXT:    movdqa  %xmm1, %xmm2
+; SSE2-NEXT:    pandn   %xmm0, %xmm2
+; SSE2-NEXT:    paddb   %xmm0, %xmm0
+; SSE2-NEXT:    pand    %xmm1, %xmm0
+; SSE2-NEXT:    por     %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_shift_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa   %xmm0, %xmm1
+; SSE41-NEXT:    movdqa   {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; SSE41-NEXT:    psllw    $5, %xmm0
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    psllw    $4, %xmm2
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    psllw    $2, %xmm2
+; SSE41-NEXT:    pand     {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    paddb    %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm2
+; SSE41-NEXT:    paddb    %xmm2, %xmm2
+; SSE41-NEXT:    paddb    %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa   %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: constant_shift_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa   {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX-NEXT:    vpsllw    $5, %xmm1, %xmm1
+; AVX-NEXT:    vpsllw    $4, %xmm0, %xmm2
+; AVX-NEXT:    vpand     {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpsllw    $2, %xmm0, %xmm2
+; AVX-NEXT:    vpand     {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpaddb    %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpaddb    %xmm0, %xmm0, %xmm2
+; AVX-NEXT:    vpaddb    %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <16 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+; SSE-LABEL: splatconstant_shift_v2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    psllq $7, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_shift_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsllq $7, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = shl <2 x i64> %a, <i64 7, i64 7>
+  ret <2 x i64> %shift
+}
+
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+; SSE-LABEL: splatconstant_shift_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pslld $5, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_shift_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpslld $5, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+  ret <4 x i32> %shift
+}
+
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+; SSE-LABEL: splatconstant_shift_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    psllw $3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_shift_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsllw $3, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %shift
+}
+
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+; SSE-LABEL: splatconstant_shift_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    psllw     $3, %xmm0
+; SSE-NEXT:    pand      {{.*}}(%rip), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_shift_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsllw    $3, %xmm0
+; AVX-NEXT:    vpand     {{.*}}(%rip), %xmm0
+; AVX-NEXT:    retq
+  %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %shift
+}
diff --git a/test/CodeGen/X86/vector-shift-shl-256.ll b/test/CodeGen/X86/vector-shift-shl-256.ll
new file mode 100644
index 0000000..7c13c0a
--- /dev/null
+++ b/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -0,0 +1,459 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+
+;
+; Variable Shifts
+;
+
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: var_shift_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpsllq %xmm2, %xmm3, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT:    vpsllq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = shl <4 x i64> %a, %b
+  ret <4 x i64> %shift
+}
+
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: var_shift_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpslld $23, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
+; AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vcvttps2dq %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpmulld %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
+; AVX1-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = shl <8 x i32> %a, %b
+  ret <8 x i32> %shift
+}
+
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: var_shift_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpsllw $12, %xmm2, %xmm3
+; AVX1-NEXT:    vpsllw $4, %xmm2, %xmm2
+; AVX1-NEXT:    vpor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpsllw $8, %xmm4, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
+; AVX1-NEXT:    vpsllw $4, %xmm2, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $2, %xmm2, %xmm4
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $1, %xmm2, %xmm4
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $12, %xmm1, %xmm3
+; AVX1-NEXT:    vpsllw $4, %xmm1, %xmm1
+; AVX1-NEXT:    vpor %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm3
+; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw $4, %xmm0, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw $2, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw $1, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpsllvd %ymm3, %ymm4, %ymm3
+; AVX2-NEXT:    vpsrld $16, %ymm3, %ymm3
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
+; AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = shl <16 x i16> %a, %b
+  ret <16 x i16> %shift
+}
+
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: var_shift_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsllw $4, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpsllw $5, %xmm5, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $2, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddb %xmm2, %xmm2, %xmm3
+; AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $4, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw $2, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpaddb %xmm0, %xmm0, %xmm3
+; AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX2-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsllw $2, %ymm0, %ymm2
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
+; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = shl <32 x i8> %a, %b
+  ret <32 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: splatvar_shift_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsllq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
+  %shift = shl <4 x i64> %a, %splat
+  ret <4 x i64> %shift
+}
+
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: splatvar_shift_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpslld %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vpblendw $3, %xmm1, %xmm2, %xmm1 # xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX2-NEXT:    vpslld %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+  %shift = shl <8 x i32> %a, %splat
+  ret <8 x i32> %shift
+}
+
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: splatvar_shift_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovd %xmm1, %eax
+; AVX1-NEXT:    movzwl %ax, %eax
+; AVX1-NEXT:    vmovd %eax, %xmm1
+; AVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovd %xmm1, %eax
+; AVX2-NEXT:    movzwl %ax, %eax
+; AVX2-NEXT:    vmovd %eax, %xmm1
+; AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+  %shift = shl <16 x i16> %a, %splat
+  ret <16 x i16> %shift
+}
+
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: splatvar_shift_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsllw $4, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $2, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm6
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddb %xmm2, %xmm2, %xmm3
+; AVX1-NEXT:    vpaddb %xmm6, %xmm6, %xmm7
+; AVX1-NEXT:    vpblendvb %xmm7, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $4, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw $2, %xmm0, %xmm1
+; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpaddb %xmm0, %xmm0, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm7, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatvar_shift_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
+; AVX2-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsllw $2, %ymm0, %ymm2
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
+; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+  %shift = shl <32 x i8> %a, %splat
+  ret <32 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+; AVX1-LABEL: constant_shift_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsllq $62, %xmm1, %xmm2
+; AVX1-NEXT:    vpsllq $31, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vpsllq $7, %xmm0, %xmm2
+; AVX1-NEXT:    vpsllq $1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
+  ret <4 x i64> %shift
+}
+
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+; AVX1-LABEL: constant_shift_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
+  ret <8 x i32> %shift
+}
+
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+; AVX1-LABEL: constant_shift_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+  ret <16 x i16> %shift
+}
+
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+; AVX1-LABEL: constant_shift_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsllw $4, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*}}(%rip), %xmm4  # xmm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX1-NEXT:    vpsllw $5, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllw $2, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX1-NEXT:    vpand %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddb %xmm4, %xmm4, %xmm6
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm2
+; AVX1-NEXT:    vpaddb %xmm6, %xmm6, %xmm7
+; AVX1-NEXT:    vpblendvb %xmm7, %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllw $4, %xmm0, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw $2, %xmm0, %xmm2
+; AVX1-NEXT:    vpand %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpaddb %xmm0, %xmm0, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm7, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX2-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsllw $2, %ymm0, %ymm2
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
+; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <32 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+; AVX1-LABEL: splatconstant_shift_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsllq $7, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsllq $7, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_shift_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllq $7, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+  ret <4 x i64> %shift
+}
+
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+; AVX1-LABEL: splatconstant_shift_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpslld $5, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpslld $5, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_shift_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpslld $5, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i32> %shift
+}
+
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+; AVX1-LABEL: splatconstant_shift_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsllw $3, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsllw $3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_shift_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <16 x i16> %shift
+}
+
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+; AVX1-LABEL: splatconstant_shift_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsllw $3, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllw $3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_shift_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %shift
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll
index 53d13c8..124d6e8 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -653,28 +653,28 @@ define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
 define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
 ; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    shll   $8, %edi
-; SSE2-NEXT:    pxor   %xmm0, %xmm0
+; SSE2-NEXT:    shll $8, %edi
+; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pinsrw $2, %edi, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    shll   $8, %edi
-; SSSE3-NEXT:    pxor   %xmm0, %xmm0
+; SSSE3-NEXT:    shll $8, %edi
+; SSSE3-NEXT:    pxor %xmm0, %xmm0
 ; SSSE3-NEXT:    pinsrw $2, %edi, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pxor   %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm0, %xmm0
 ; SSE41-NEXT:    pinsrb $5, %edi, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpxor   %xmm0, %xmm0
-; AVX-NEXT:    vpinsrb $5, %edi, %xmm0
+; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vpinsrb $5, %edi, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <16 x i8> undef, i8 %i, i32 0
   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -684,28 +684,28 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
 define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {
 ; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    shll   $8, %edi
-; SSE2-NEXT:    pxor   %xmm0, %xmm0
+; SSE2-NEXT:    shll $8, %edi
+; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pinsrw $7, %edi, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    shll   $8, %edi
-; SSSE3-NEXT:    pxor   %xmm0, %xmm0
+; SSSE3-NEXT:    shll $8, %edi
+; SSSE3-NEXT:    pxor %xmm0, %xmm0
 ; SSSE3-NEXT:    pinsrw $7, %edi, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pxor   %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm0, %xmm0
 ; SSE41-NEXT:    pinsrb $15, %edi, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpxor   %xmm0, %xmm0
-; AVX-NEXT:    vpinsrb $15, %edi, %xmm0
+; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vpinsrb $15, %edi, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <16 x i8> undef, i8 %i, i32 0
   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
@@ -716,27 +716,27 @@ define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
 ; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movzbl %dil, %eax
-; SSE2-NEXT:    pxor   %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pinsrw $1, %eax, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; SSSE3:       # BB#0:
 ; SSSE3-NEXT:    movzbl %dil, %eax
-; SSSE3-NEXT:    pxor   %xmm0, %xmm0
+; SSSE3-NEXT:    pxor %xmm0, %xmm0
 ; SSSE3-NEXT:    pinsrw $1, %eax, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pxor   %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm0, %xmm0
 ; SSE41-NEXT:    pinsrb $2, %edi, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpxor   %xmm0, %xmm0
-; AVX-NEXT:    vpinsrb $2, %edi, %xmm0
+; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vpinsrb $2, %edi, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <16 x i8> undef, i8 %i, i32 3
   %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -1341,12 +1341,12 @@ define <16 x i8> @shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz(
 define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) {
 ; SSE-LABEL: shuffle_v16i8_bitcast_unpack:
 ; SSE:       # BB#0:
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v16i8_bitcast_unpack:
 ; AVX:       # BB#0:
-; AVX-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ; AVX-NEXT:    retq
   %shuffle8  = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32 2, i32 18, i32 1, i32 17, i32 0, i32 16>
   %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float>
diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll
index 4007f0b..6a29d33 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -1384,14 +1384,14 @@ define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
 define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
 ; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pxor   %xmm0, %xmm0
+; SSE-NEXT:    pxor %xmm0, %xmm0
 ; SSE-NEXT:    pinsrw $1, %edi, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpxor   %xmm0, %xmm0
-; AVX-NEXT:    vpinsrw $1, %edi, %xmm0
+; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
@@ -1401,14 +1401,14 @@ define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
 define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
 ; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pxor   %xmm0, %xmm0
+; SSE-NEXT:    pxor %xmm0, %xmm0
 ; SSE-NEXT:    pinsrw $5, %edi, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpxor   %xmm0, %xmm0
-; AVX-NEXT:    vpinsrw $5, %edi, %xmm0
+; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
@@ -1418,14 +1418,14 @@ define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
 define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
 ; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pxor   %xmm0, %xmm0
+; SSE-NEXT:    pxor %xmm0, %xmm0
 ; SSE-NEXT:    pinsrw $7, %edi, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpxor   %xmm0, %xmm0
-; AVX-NEXT:    vpinsrw $7, %edi, %xmm0
+; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <8 x i16> undef, i16 %i, i32 0
   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
@@ -1435,14 +1435,14 @@ define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
 define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
 ; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pxor   %xmm0, %xmm0
+; SSE-NEXT:    pxor %xmm0, %xmm0
 ; SSE-NEXT:    pinsrw $2, %edi, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpxor   %xmm0, %xmm0
-; AVX-NEXT:    vpinsrw $2, %edi, %xmm0
+; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vpinsrw $2, %edi, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %a = insertelement <8 x i16> undef, i16 %i, i32 3
   %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 944ec4b..62bf288 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -810,30 +810,20 @@ define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
 }
 
 define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
-; AVX1-LABEL: insert_reg_and_zero_v4i64:
-; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovq %rdi, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: insert_reg_and_zero_v4i64:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vmovq %rdi, %xmm0
-; AVX2-NEXT:    retq
+; ALL-LABEL: insert_reg_and_zero_v4i64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovq %rdi, %xmm0
+; ALL-NEXT:    retq
   %v = insertelement <4 x i64> undef, i64 %a, i64 0
   %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   ret <4 x i64> %shuffle
 }
 
 define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
-; AVX1-LABEL: insert_mem_and_zero_v4i64:
-; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: insert_mem_and_zero_v4i64:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
-; AVX2-NEXT:    retq
+; ALL-LABEL: insert_mem_and_zero_v4i64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; ALL-NEXT:    retq
   %a = load i64, i64* %ptr
   %v = insertelement <4 x i64> undef, i64 %a, i64 0
   %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -874,15 +864,10 @@ define <4 x double> @splat_mem_v4f64(double* %ptr) {
 }
 
 define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
-; AVX1-LABEL: splat_mem_v4i64:
-; AVX1:       # BB#0:
-; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: splat_mem_v4i64:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
-; AVX2-NEXT:    retq
+; ALL-LABEL: splat_mem_v4i64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
+; ALL-NEXT:    retq
   %a = load i64, i64* %ptr
   %v = insertelement <4 x i64> undef, i64 %a, i64 0
   %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
@@ -915,6 +900,60 @@ define <4 x double> @splat_v4f64(<2 x double> %r) {
   ret <4 x double> %1
 }
 
+define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
+; AVX1-LABEL: splat_mem_v4i64_from_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splat_mem_v4i64_from_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
+; AVX2-NEXT:    retq
+  %v = load <2 x i64>, <2 x i64>* %ptr
+  %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x double> @splat_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
+; AVX1-LABEL: splat_mem_v4f64_from_v2f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splat_mem_v4f64_from_v2f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
+; AVX2-NEXT:    retq
+  %v = load <2 x double>, <2 x double>* %ptr
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x double> %shuffle
+}
+
+define <4 x i64> @splat128_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
+; ALL-LABEL: splat128_mem_v4i64_from_v2i64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps (%rdi), %xmm0
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    retq
+  %v = load <2 x i64>, <2 x i64>* %ptr
+  %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x double> @splat128_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
+; ALL-LABEL: splat128_mem_v4f64_from_v2f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps (%rdi), %xmm0
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    retq
+  %v = load <2 x double>, <2 x double>* %ptr
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  ret <4 x double> %shuffle
+}
+
 define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
 ; AVX1-LABEL: bitcast_v4f64_0426:
 ; AVX1:       # BB#0:
@@ -923,7 +962,7 @@ define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
 ;
 ; AVX2-LABEL: bitcast_v4f64_0426:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpunpcklqdq  {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
 ; AVX2-NEXT:    retq
   %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
   %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll
index bb07077..bc72e0a 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -2088,15 +2088,10 @@ entry:
 }
 
 define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
-; AVX1-LABEL: insert_mem_and_zero_v8i32:
-; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: insert_mem_and_zero_v8i32:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX2-NEXT:    retq
+; ALL-LABEL: insert_mem_and_zero_v8i32:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ALL-NEXT:    retq
   %a = load i32, i32* %ptr
   %v = insertelement <8 x i32> undef, i32 %a, i32 0
   %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll
index 2c6c8a3..62d4af7 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -15,8 +15,9 @@ define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00000010:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
   ret <8 x double> %shuffle
@@ -25,8 +26,9 @@ define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00000200:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
   ret <8 x double> %shuffle
@@ -35,8 +37,9 @@ define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00003000:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
   ret <8 x double> %shuffle
@@ -45,8 +48,11 @@ define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00040000:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
   ret <8 x double> %shuffle
@@ -55,8 +61,11 @@ define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00500000:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,0]
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x double> %shuffle
@@ -65,8 +74,11 @@ define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_06000000:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,2,0,0]
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x double> %shuffle
@@ -75,11 +87,11 @@ define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_70000000:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
-; ALL-NEXT:    movl $7, %eax
-; ALL-NEXT:    vpinsrq $0, %rax, %xmm1, %xmm2
-; ALL-NEXT:    vinserti32x4 $0, %xmm2, %zmm1, %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[3,0,0,0]
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x double> %shuffle
@@ -88,7 +100,10 @@ define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_01014545:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vshuff64x2 $160, %zmm0, %zmm0, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
   ret <8 x double> %shuffle
@@ -97,8 +112,9 @@ define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00112233:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,1,1]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,3,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
   ret <8 x double> %shuffle
@@ -107,8 +123,9 @@ define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00001111:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
   ret <8 x double> %shuffle
@@ -117,7 +134,11 @@ define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_81a3c5e7:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vshufpd $170, %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
   ret <8 x double> %shuffle
@@ -126,9 +147,10 @@ define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_08080808:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2pd %zmm1, %zmm0, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
   ret <8 x double> %shuffle
@@ -137,9 +159,15 @@ define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_08084c4c:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2pd %zmm1, %zmm0, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
+; ALL-NEXT:    vinsertf128 $1, %xmm2, %ymm2, %ymm2
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
+; ALL-NEXT:    vbroadcastsd %xmm3, %ymm3
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3]
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
   ret <8 x double> %shuffle
@@ -148,9 +176,13 @@ define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_8823cc67:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2pd %zmm0, %zmm1, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
+; ALL-NEXT:    vbroadcastsd %xmm3, %ymm3
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3]
+; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
   ret <8 x double> %shuffle
@@ -159,9 +191,13 @@ define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_9832dc76(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_9832dc76:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2pd %zmm0, %zmm1, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm1[0,1],ymm0[2,3]
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
   ret <8 x double> %shuffle
@@ -170,9 +206,13 @@ define <8 x double> @shuffle_v8f64_9832dc76(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_9810dc54(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_9810dc54:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2pd %zmm0, %zmm1, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm2
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
   ret <8 x double> %shuffle
@@ -181,9 +221,15 @@ define <8 x double> @shuffle_v8f64_9810dc54(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_08194c5d:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2pd %zmm1, %zmm0, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
+; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,0,2,1]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm3
+; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[0,1,1,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
   ret <8 x double> %shuffle
@@ -192,9 +238,15 @@ define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_2a3b6e7f(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_2a3b6e7f:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2pd %zmm1, %zmm0, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
+; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,2,2,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm3
+; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[2,1,3,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
   ret <8 x double> %shuffle
@@ -203,9 +255,13 @@ define <8 x double> @shuffle_v8f64_2a3b6e7f(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_08192a3b:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2pd %zmm1, %zmm0, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm1[0,2,2,3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x double> %shuffle
@@ -214,9 +270,11 @@ define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_08991abb:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2pd %zmm0, %zmm1, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm1[0,0,1,1]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm0[0],ymm2[1,2,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
   ret <8 x double> %shuffle
@@ -225,9 +283,12 @@ define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_091b2d3f:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2pd %zmm1, %zmm0, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
+; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
   ret <8 x double> %shuffle
@@ -236,9 +297,11 @@ define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_09ab1def:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2pd %zmm0, %zmm1, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm3 = ymm0[1,0,2,2]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
   ret <8 x double> %shuffle
@@ -247,7 +310,10 @@ define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00014445:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd $64, %zmm0, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
   ret <8 x double> %shuffle
@@ -256,7 +322,10 @@ define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00204464:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd $32, %zmm0, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
   ret <8 x double> %shuffle
@@ -265,7 +334,10 @@ define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_03004744:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd $12, %zmm0, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
   ret <8 x double> %shuffle
@@ -274,7 +346,10 @@ define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_10005444:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd $1, %zmm0, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
   ret <8 x double> %shuffle
@@ -283,7 +358,10 @@ define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_22006644:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd $10, %zmm0, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
   ret <8 x double> %shuffle
@@ -292,7 +370,10 @@ define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_33307774:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd $63, %zmm0, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
   ret <8 x double> %shuffle
@@ -301,7 +382,10 @@ define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_32107654:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd $27, %zmm0, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   ret <8 x double> %shuffle
@@ -310,7 +394,10 @@ define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00234467:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $136, %zmm0, %zmm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
   ret <8 x double> %shuffle
@@ -319,7 +406,10 @@ define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00224466:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $0, %zmm0, %zmm0
+; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   ret <8 x double> %shuffle
@@ -328,7 +418,10 @@ define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_10325476(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_10325476:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $85, %zmm0, %zmm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   ret <8 x double> %shuffle
@@ -337,7 +430,10 @@ define <8 x double> @shuffle_v8f64_10325476(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_11335577(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_11335577:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $255, %zmm0, %zmm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   ret <8 x double> %shuffle
@@ -346,7 +442,10 @@ define <8 x double> @shuffle_v8f64_11335577(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_10235467(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_10235467:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $153, %zmm0, %zmm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   ret <8 x double> %shuffle
@@ -355,7 +454,10 @@ define <8 x double> @shuffle_v8f64_10235467(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_10225466(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_10225466:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $17, %zmm0, %zmm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
   ret <8 x double> %shuffle
@@ -364,8 +466,10 @@ define <8 x double> @shuffle_v8f64_10225466(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00015444:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
   ret <8 x double> %shuffle
@@ -374,8 +478,10 @@ define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00204644:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
   ret <8 x double> %shuffle
@@ -384,8 +490,10 @@ define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_03004474:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,3,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
   ret <8 x double> %shuffle
@@ -394,8 +502,10 @@ define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_10004444:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   ret <8 x double> %shuffle
@@ -404,8 +514,10 @@ define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_22006446:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,0,0,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
   ret <8 x double> %shuffle
@@ -414,8 +526,10 @@ define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_33307474:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,0,3,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
   ret <8 x double> %shuffle
@@ -424,8 +538,9 @@ define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_32104567:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   ret <8 x double> %shuffle
@@ -434,8 +549,10 @@ define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00236744:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
   ret <8 x double> %shuffle
@@ -444,8 +561,10 @@ define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00226644:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
   ret <8 x double> %shuffle
@@ -454,7 +573,9 @@ define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_10324567(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_10324567:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $165, %zmm0, %zmm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
   ret <8 x double> %shuffle
@@ -463,7 +584,9 @@ define <8 x double> @shuffle_v8f64_10324567(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_11334567:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $175, %zmm0, %zmm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x double> %shuffle
@@ -472,7 +595,9 @@ define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_01235467:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $154, %zmm0, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   ret <8 x double> %shuffle
@@ -481,7 +606,9 @@ define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_01235466:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $26, %zmm0, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
   ret <8 x double> %shuffle
@@ -490,8 +617,10 @@ define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_002u6u44:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
   ret <8 x double> %shuffle
@@ -500,8 +629,10 @@ define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_00uu66uu:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
   ret <8 x double> %shuffle
@@ -510,7 +641,9 @@ define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_103245uu(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_103245uu:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $37, %zmm0, %zmm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
   ret <8 x double> %shuffle
@@ -519,7 +652,9 @@ define <8 x double> @shuffle_v8f64_103245uu(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_1133uu67:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $143, %zmm0, %zmm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
   ret <8 x double> %shuffle
@@ -528,7 +663,9 @@ define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_0uu354uu:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $24, %zmm0, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
   ret <8 x double> %shuffle
@@ -537,7 +674,9 @@ define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_uuu3uu66:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $8, %zmm0, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
   ret <8 x double> %shuffle
@@ -546,9 +685,16 @@ define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_c348cda0:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2pd %zmm0, %zmm1, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm2[0,1]
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
+; ALL-NEXT:    vbroadcastsd %xmm1, %ymm4
+; ALL-NEXT:    vblendpd {{.*#+}} ymm4 = ymm3[0,1,2],ymm4[3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm4[0],ymm2[1,2],ymm4[3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2],ymm3[3]
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
   ret <8 x double> %shuffle
@@ -557,9 +703,17 @@ define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
 define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
 ; ALL-LABEL: shuffle_v8f64_f511235a:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2pd %zmm1, %zmm0, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
+; ALL-NEXT:    vblendpd {{.*#+}} ymm3 = ymm0[0],ymm2[1],ymm0[2,3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[2,3,1,3]
+; ALL-NEXT:    vmovddup {{.*#+}} ymm4 = ymm1[0,0,2,2]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0,1,2],ymm4[3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm3, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
   ret <8 x double> %shuffle
@@ -577,8 +731,9 @@ define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00000010:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
   ret <8 x i64> %shuffle
@@ -587,8 +742,9 @@ define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00000200:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
   ret <8 x i64> %shuffle
@@ -597,8 +753,9 @@ define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00003000:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
   ret <8 x i64> %shuffle
@@ -607,8 +764,11 @@ define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00040000:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
+; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
   ret <8 x i64> %shuffle
@@ -617,8 +777,11 @@ define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00500000:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,0]
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x i64> %shuffle
@@ -627,8 +790,11 @@ define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_06000000:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,0,0]
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x i64> %shuffle
@@ -637,11 +803,11 @@ define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_70000000:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
-; ALL-NEXT:    movl $7, %eax
-; ALL-NEXT:    vpinsrq $0, %rax, %xmm1, %xmm2
-; ALL-NEXT:    vinserti32x4 $0, %xmm2, %zmm1, %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[3,0,0,0]
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x i64> %shuffle
@@ -650,7 +816,10 @@ define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_01014545:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vshufi64x2 $160, %zmm0, %zmm0, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
+; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
   ret <8 x i64> %shuffle
@@ -659,8 +828,9 @@ define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00112233:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,1,1]
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,3,3]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
   ret <8 x i64> %shuffle
@@ -669,8 +839,9 @@ define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00001111:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,1,1,1]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
   ret <8 x i64> %shuffle
@@ -679,7 +850,11 @@ define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_81a3c5e7:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vshufpd $170, %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
   ret <8 x i64> %shuffle
@@ -688,9 +863,10 @@ define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_08080808:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2q %zmm1, %zmm0, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
   ret <8 x i64> %shuffle
@@ -699,9 +875,15 @@ define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_08084c4c:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2q %zmm1, %zmm0, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
+; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm2, %ymm2
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
+; ALL-NEXT:    vpbroadcastq %xmm3, %ymm3
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3],ymm2[4,5],ymm3[6,7]
+; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
   ret <8 x i64> %shuffle
@@ -710,9 +892,13 @@ define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_8823cc67:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
+; ALL-NEXT:    vpbroadcastq %xmm3, %ymm3
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
+; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
   ret <8 x i64> %shuffle
@@ -721,9 +907,13 @@ define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_9832dc76(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_9832dc76:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; ALL-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
   ret <8 x i64> %shuffle
@@ -732,9 +922,13 @@ define <8 x i64> @shuffle_v8i64_9832dc76(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_9810dc54(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_9810dc54:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm2
+; ALL-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
   ret <8 x i64> %shuffle
@@ -743,9 +937,15 @@ define <8 x i64> @shuffle_v8i64_9810dc54(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_08194c5d:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2q %zmm1, %zmm0, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,0,2,1]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm3[0,1,1,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
   ret <8 x i64> %shuffle
@@ -754,9 +954,15 @@ define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_2a3b6e7f(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_2a3b6e7f:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2q %zmm1, %zmm0, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm3[2,1,3,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
   ret <8 x i64> %shuffle
@@ -765,9 +971,13 @@ define <8 x i64> @shuffle_v8i64_2a3b6e7f(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_08192a3b(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_08192a3b:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2q %zmm1, %zmm0, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[0,2,2,3]
+; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i64> %shuffle
@@ -776,9 +986,11 @@ define <8 x i64> @shuffle_v8i64_08192a3b(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_08991abb:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[0,0,1,1]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm0[0,1],ymm2[2,3,4,5,6,7]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,3,3]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
   ret <8 x i64> %shuffle
@@ -787,9 +999,12 @@ define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_091b2d3f:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2q %zmm1, %zmm0, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
   ret <8 x i64> %shuffle
@@ -798,9 +1013,11 @@ define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_09ab1def(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_09ab1def:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; ALL-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3,4,5,6,7]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
   ret <8 x i64> %shuffle
@@ -809,7 +1026,10 @@ define <8 x i64> @shuffle_v8i64_09ab1def(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00014445:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermq $64, %zmm0, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
   ret <8 x i64> %shuffle
@@ -818,7 +1038,10 @@ define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00204464:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermq $32, %zmm0, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
   ret <8 x i64> %shuffle
@@ -827,7 +1050,10 @@ define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_03004744:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermq $12, %zmm0, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
   ret <8 x i64> %shuffle
@@ -836,7 +1062,10 @@ define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_10005444:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermq $1, %zmm0, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
   ret <8 x i64> %shuffle
@@ -845,7 +1074,10 @@ define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_22006644:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermq $10, %zmm0, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
   ret <8 x i64> %shuffle
@@ -854,7 +1086,10 @@ define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_33307774:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermq $63, %zmm0, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
   ret <8 x i64> %shuffle
@@ -863,7 +1098,10 @@ define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_32107654:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermq $27, %zmm0, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   ret <8 x i64> %shuffle
@@ -872,7 +1110,10 @@ define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00234467:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $136, %zmm0, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
   ret <8 x i64> %shuffle
@@ -881,7 +1122,10 @@ define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00224466(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00224466:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $0, %zmm0, %zmm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   ret <8 x i64> %shuffle
@@ -890,7 +1134,10 @@ define <8 x i64> @shuffle_v8i64_00224466(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_10325476(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_10325476:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $85, %zmm0, %zmm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   ret <8 x i64> %shuffle
@@ -899,7 +1146,10 @@ define <8 x i64> @shuffle_v8i64_10325476(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_11335577(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_11335577:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $255, %zmm0, %zmm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   ret <8 x i64> %shuffle
@@ -908,7 +1158,10 @@ define <8 x i64> @shuffle_v8i64_11335577(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_10235467:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $153, %zmm0, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,2,3]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,2,3]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   ret <8 x i64> %shuffle
@@ -917,7 +1170,10 @@ define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_10225466:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $17, %zmm0, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,2,2]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,2,2]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
   ret <8 x i64> %shuffle
@@ -926,8 +1182,10 @@ define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00015444(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00015444:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
   ret <8 x i64> %shuffle
@@ -936,8 +1194,10 @@ define <8 x i64> @shuffle_v8i64_00015444(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00204644(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00204644:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
   ret <8 x i64> %shuffle
@@ -946,8 +1206,10 @@ define <8 x i64> @shuffle_v8i64_00204644(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_03004474(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_03004474:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,3,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
   ret <8 x i64> %shuffle
@@ -956,8 +1218,10 @@ define <8 x i64> @shuffle_v8i64_03004474(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_10004444:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   ret <8 x i64> %shuffle
@@ -966,8 +1230,10 @@ define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_22006446(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_22006446:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,0,0,2]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
   ret <8 x i64> %shuffle
@@ -976,8 +1242,10 @@ define <8 x i64> @shuffle_v8i64_22006446(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_33307474(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_33307474:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,3,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
   ret <8 x i64> %shuffle
@@ -986,8 +1254,9 @@ define <8 x i64> @shuffle_v8i64_33307474(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_32104567(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_32104567:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i64> %shuffle
@@ -996,8 +1265,10 @@ define <8 x i64> @shuffle_v8i64_32104567(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00236744(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00236744:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
   ret <8 x i64> %shuffle
@@ -1006,8 +1277,10 @@ define <8 x i64> @shuffle_v8i64_00236744(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00226644(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00226644:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
   ret <8 x i64> %shuffle
@@ -1016,7 +1289,9 @@ define <8 x i64> @shuffle_v8i64_00226644(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_10324567(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_10324567:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $165, %zmm0, %zmm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i64> %shuffle
@@ -1025,7 +1300,9 @@ define <8 x i64> @shuffle_v8i64_10324567(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_11334567:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $175, %zmm0, %zmm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i64> %shuffle
@@ -1034,7 +1311,9 @@ define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_01235467:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $154, %zmm0, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[1,0,2,3]
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   ret <8 x i64> %shuffle
@@ -1043,7 +1322,9 @@ define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_01235466:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $26, %zmm0, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[1,0,2,2]
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
   ret <8 x i64> %shuffle
@@ -1052,8 +1333,10 @@ define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_002u6u44:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
   ret <8 x i64> %shuffle
@@ -1062,8 +1345,10 @@ define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_00uu66uu:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm1
-; ALL-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
   ret <8 x i64> %shuffle
@@ -1072,7 +1357,9 @@ define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_103245uu:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $37, %zmm0, %zmm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
   ret <8 x i64> %shuffle
@@ -1081,7 +1368,9 @@ define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_1133uu67:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $143, %zmm0, %zmm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
   ret <8 x i64> %shuffle
@@ -1090,7 +1379,9 @@ define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_0uu354uu:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $24, %zmm0, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
   ret <8 x i64> %shuffle
@@ -1099,7 +1390,9 @@ define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_uuu3uu66:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd $8, %zmm0, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
   ret <8 x i64> %shuffle
@@ -1108,9 +1401,15 @@ define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
 define <8 x i64> @shuffle_v8i64_6caa87e5(<8 x i64> %a, <8 x i64> %b) {
 ; ALL-LABEL: shuffle_v8i64_6caa87e5:
 ; ALL:       # BB#0:
-; ALL-NEXT:    vmovdqa64 {{.*}}(%rip), %zmm2
-; ALL-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
-; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; ALL-NEXT:    vpblendd {{.*#+}} ymm3 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6,7]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2,3],ymm3[4,5],ymm0[6,7]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
   ret <8 x i64> %shuffle
diff --git a/test/CodeGen/X86/widen_conv-3.ll b/test/CodeGen/X86/widen_conv-3.ll
index a2f3d7b..0a6eea0 100644
--- a/test/CodeGen/X86/widen_conv-3.ll
+++ b/test/CodeGen/X86/widen_conv-3.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
-; CHECK: cvtsi2ss
+; CHECK: cvtdq2ps
 
 ; sign to float v2i16 to v2f32
 
diff --git a/test/CodeGen/X86/win64_params.ll b/test/CodeGen/X86/win64_params.ll
index 9718c86..a0b552d 100644
--- a/test/CodeGen/X86/win64_params.ll
+++ b/test/CodeGen/X86/win64_params.ll
@@ -7,8 +7,7 @@ define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind re
 entry:
 ; CHECK: movl    48(%rsp), %eax
 ; CHECK: addl    40(%rsp), %eax
-; LINUX: addl    %r9d, %r8d
-; LINUX: movl    %r8d, %eax
+; LINUX: leal    (%r8,%r9), %eax
   %add = add nsw i32 %p6, %p5
   ret i32 %add
 }
@@ -27,10 +26,8 @@ entry:
 ; on other platforms here (note the x86_64_sysvcc calling convention).
 define x86_64_sysvcc i32 @f8(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
 entry:
-; CHECK: addl    %r9d, %r8d
-; CHECK: movl    %r8d, %eax
-; LINUX: addl    %r9d, %r8d
-; LINUX: movl    %r8d, %eax
+; CHECK: leal    (%r8,%r9), %eax
+; LINUX: leal    (%r8,%r9), %eax
   %add = add nsw i32 %p6, %p5
   ret i32 %add
 }
diff --git a/test/CodeGen/X86/win_cst_pool.ll b/test/CodeGen/X86/win_cst_pool.ll
index 199557d..77c37b4 100644
--- a/test/CodeGen/X86/win_cst_pool.ll
+++ b/test/CodeGen/X86/win_cst_pool.ll
@@ -64,3 +64,16 @@ define <4 x float> @undef1() {
 ; CHECK:               movaps  __xmm@00000000000000003f8000003f800000(%rip), %xmm0
 ; CHECK-NEXT:          ret
 }
+
+define float @pr23966(i32 %a) {
+  %tobool = icmp ne i32 %a, 0
+  %sel = select i1 %tobool, float -1.000000e+00, float 1.000000e+00
+  ret float %sel
+}
+
+; CHECK:              .globl  __real@bf8000003f800000
+; CHECK-NEXT:         .section        .rdata,"dr",discard,__real@bf8000003f800000
+; CHECK-NEXT:         .align  4
+; CHECK-NEXT: __real@bf8000003f800000:
+; CHECK-NEXT:         .long   1065353216
+; CHECK-NEXT:         .long   3212836864
diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll
index 1459124..dfa6e3a 100644
--- a/test/CodeGen/X86/win_ftol2.ll
+++ b/test/CodeGen/X86/win_ftol2.ll
@@ -142,3 +142,25 @@ define i64 @double_ui64_5(double %X) {
   %tmp.1 = fptoui double %X to i64
   ret i64 %tmp.1
 }
+
+define double @pr23957_32(double %A) {
+; FTOL-LABEL: @pr23957_32
+; FTOL: fldl
+; FTOL-NEXT: fld %st(0)
+; FTOL-NEXT: calll __ftol2
+  %B = fptoui double %A to i32
+  %C = uitofp i32 %B to double
+  %D = fsub double %C, %A
+  ret double %D
+}
+
+define double @pr23957_64(double %A) {
+; FTOL-LABEL: @pr23957_64
+; FTOL: fldl
+; FTOL-NEXT: fld %st(0)
+; FTOL-NEXT: calll __ftol2
+  %B = fptoui double %A to i64
+  %C = uitofp i64 %B to double
+  %D = fsub double %C, %A
+  ret double %D
+}
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index 829be41..f78fe27 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86            -mattr=+sse2 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s -check-prefix=X32
 ; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 | FileCheck %s -check-prefix=X64
 ; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2 | FileCheck %s -check-prefix=X64
 
@@ -193,3 +193,22 @@ define i32 @test11(i32 %b) {
 ; X32: movl    $-2, %[[REG:.*]]
 ; X32: roll    %{{.*}}, %[[REG]]
 }
+
+%struct.ref_s = type { %union.v, i16, i16 }
+%union.v = type { i64 }
+
+define %struct.ref_s* @test12(%struct.ref_s* %op, i64 %osbot, i64 %intval) {
+  %neg = shl i64 %intval, 32
+  %sext = xor i64 %neg, -4294967296
+  %idx.ext = ashr exact i64 %sext, 32
+  %add.ptr = getelementptr inbounds %struct.ref_s, %struct.ref_s* %op, i64 %idx.ext
+  ret %struct.ref_s* %add.ptr
+; X64-LABEL: test12:
+; X64: shlq	$32, %[[REG:.*]]
+; X64-NOT: not
+; X64: sarq	$28, %[[REG]]
+; X32-LABEL: test12:
+; X32: leal
+; X32-NOT: not
+; X32: shll	$2, %eax
+}
diff --git a/test/DebugInfo/AArch64/bitfields.ll b/test/DebugInfo/AArch64/bitfields.ll
new file mode 100644
index 0000000..5f0caab
--- /dev/null
+++ b/test/DebugInfo/AArch64/bitfields.ll
@@ -0,0 +1,73 @@
+; RUN: llc -mtriple aarch64_be-gnu-linux -O0 -filetype=obj -o %t_be.o %s
+; RUN: llvm-dwarfdump -debug-dump=info %t_be.o | FileCheck %s
+
+; Produced at -O0 from:
+; struct bitfield {
+;   int a : 2;
+;   int b : 32;
+;   int c : 1;
+;   int d : 28;
+; };
+; struct bitfield b;
+
+; Note that DWARF 2 counts bit offsets backwards from the high end of
+; the storage unit to the high end of the bit field.
+
+; CHECK: DW_TAG_member
+; CHECK-NEXT: DW_AT_name{{.*}}"a"
+; CHECK-NOT: DW_TAG_member
+; CHECK:      DW_AT_byte_size  {{.*}} (0x04)
+; CHECK-NEXT: DW_AT_bit_size   {{.*}} (0x02)
+; CHECK-NEXT: DW_AT_bit_offset {{.*}} (0x00)
+; CHECK-NEXT: DW_AT_data_member_location {{.*}} 00
+
+; CHECK: DW_TAG_member
+; CHECK-NEXT: DW_AT_name{{.*}}"b"
+; CHECK-NOT: DW_TAG_member
+; CHECK:      DW_AT_data_member_location {{.*}} 04
+
+; CHECK: DW_TAG_member
+; CHECK-NEXT: DW_AT_name{{.*}}"c"
+; CHECK-NOT: DW_TAG_member
+; CHECK:      DW_AT_byte_size  {{.*}} (0x04)
+; CHECK-NEXT: DW_AT_bit_size   {{.*}} (0x01)
+; CHECK-NEXT: DW_AT_bit_offset {{.*}} (0x00)
+; CHECK-NEXT: DW_AT_data_member_location {{.*}} 08
+
+; CHECK: DW_TAG_member
+; CHECK-NEXT: DW_AT_name{{.*}}"d"
+; CHECK-NOT: DW_TAG_member
+; CHECK:      DW_AT_byte_size  {{.*}} (0x04)
+; CHECK-NEXT: DW_AT_bit_size   {{.*}} (0x1c)
+; CHECK-NEXT: DW_AT_bit_offset {{.*}} (0x01)
+; CHECK-NEXT: DW_AT_data_member_location {{.*}} 08
+
+; ModuleID = 'bitfields.c'
+target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64_be--linux-gnu"
+
+%struct.bitfield = type <{ i8, [3 x i8], i64 }>
+
+@b = common global %struct.bitfield zeroinitializer, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14, !15}
+!llvm.ident = !{!16}
+
+!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!1 = !DIFile(filename: "bitfields.c", directory: "/")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIGlobalVariable(name: "b", scope: !0, file: !5, line: 8, type: !6, isLocal: false, isDefinition: true, variable: %struct.bitfield* @b)
+!5 = !DIFile(filename: "bitfields.c", directory: "/")
+!6 = !DICompositeType(tag: DW_TAG_structure_type, name: "bitfield", file: !5, line: 1, size: 96, align: 32, elements: !7)
+!7 = !{!8, !10, !11, !12}
+!8 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !6, file: !5, line: 2, baseType: !9, size: 2, align: 32)
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !6, file: !5, line: 3, baseType: !9, size: 32, align: 32, offset: 32)
+!11 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !6, file: !5, line: 4, baseType: !9, size: 1, align: 32, offset: 64)
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "d", scope: !6, file: !5, line: 5, baseType: !9, size: 28, align: 32, offset: 65)
+!13 = !{i32 2, !"Dwarf Version", i32 2}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{i32 1, !"PIC Level", i32 2}
+!16 = !{!"clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)"}
diff --git a/test/DebugInfo/ARM/bitfield.ll b/test/DebugInfo/ARM/bitfield.ll
new file mode 100644
index 0000000..9b41e4e
--- /dev/null
+++ b/test/DebugInfo/ARM/bitfield.ll
@@ -0,0 +1,45 @@
+; RUN: %llc_dwarf -O0 -filetype=obj -o %t.o %s
+; RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s
+; REQUIRES: object-emission
+;
+; Generated from:
+; struct {
+;   char c;
+;   int : 4;
+;   int reserved : 28;
+; } a;
+;
+; CHECK:      DW_TAG_member
+; CHECK:          DW_AT_name {{.*}} "reserved"
+; CHECK:          DW_AT_byte_size  {{.*}} (0x04)
+; CHECK:          DW_AT_bit_size   {{.*}} (0x1c)
+; CHECK:          DW_AT_bit_offset {{.*}} (0x18)
+; CHECK:          DW_AT_data_member_location {{.*}}00
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+%struct.anon = type { i8, [5 x i8] }
+
+@a = common global %struct.anon zeroinitializer, align 1
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12, !13, !14, !15}
+!llvm.ident = !{!16}
+
+!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!1 = !DIFile(filename: "test.i", directory: "/")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIGlobalVariable(name: "a", scope: !0, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, variable: %struct.anon* @a)
+!5 = !DICompositeType(tag: DW_TAG_structure_type, file: !1, line: 1, size: 48, align: 8, elements: !6)
+!6 = !{!7, !9}
+!7 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !5, file: !1, line: 2, baseType: !8, size: 8, align: 8)
+!8 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!9 = !DIDerivedType(tag: DW_TAG_member, name: "reserved", scope: !5, file: !1, line: 4, baseType: !10, size: 28, align: 32, offset: 12)
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !{i32 2, !"Dwarf Version", i32 2}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !{i32 1, !"wchar_size", i32 4}
+!14 = !{i32 1, !"min_enum_size", i32 4}
+!15 = !{i32 1, !"PIC Level", i32 2}
+!16 = !{!"clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)"}
diff --git a/test/DebugInfo/X86/DIModule.ll b/test/DebugInfo/X86/DIModule.ll
new file mode 100644
index 0000000..daed43a
--- /dev/null
+++ b/test/DebugInfo/X86/DIModule.ll
@@ -0,0 +1,25 @@
+; ModuleID = '/Volumes/Data/apple-internal/llvm/tools/clang/test/Modules/debug-info-moduleimport.m'
+; RUN: llc %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; CHECK: DW_TAG_module
+; CHECK-NEXT: DW_AT_name {{.*}}"DebugModule"
+; CHECK-NEXT: DW_AT_LLVM_config_macros {{.*}}"-DMODULES=0"
+; CHECK-NEXT: DW_AT_LLVM_include_path {{.*}}"/llvm/tools/clang/test/Modules/Inputs"
+; CHECK-NEXT: DW_AT_LLVM_isysroot {{.*}}"/"
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, file: !1, producer: "LLVM version 3.7.0", isOptimized: false, runtimeVersion: 2, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !3)
+!1 = !DIFile(filename: "/llvm/tools/clang/test/Modules/<stdin>", directory: "/")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !0, entity: !5, line: 5)
+!5 = !DIModule(scope: null, name: "DebugModule", configMacros: "-DMODULES=0", includePath: "/llvm/tools/clang/test/Modules/Inputs", isysroot: "/")
+!6 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{!"LLVM version 3.7.0"}
diff --git a/test/DebugInfo/X86/asm-macro-line-number.s b/test/DebugInfo/X86/asm-macro-line-number.s
index 0f51dbb..8b0843d 100644
--- a/test/DebugInfo/X86/asm-macro-line-number.s
+++ b/test/DebugInfo/X86/asm-macro-line-number.s
@@ -3,12 +3,18 @@
 # 1 "reduced.S"
 # 1 "<built-in>" 1
 # 1 "reduced.S" 2
+# 200 "macros.h"
 
  .macro return arg
   movl %eax, \arg
   retl
  .endm
 
+ .macro return2 arg
+  return \arg
+ .endm
+
+# 7 "reduced.S"
 function:
  return 0
 
@@ -18,3 +24,11 @@ function:
 # CHECK: .loc 2 8 0
 # CHECK: retl
 
+# 42 "reduced.S"
+function2:
+ return2 0
+
+# CHECK: .loc 2 43 0
+# CHECK: movl %eax, 0
+# CHECK: .loc 2 43 0
+# CHECK: retl
diff --git a/test/DebugInfo/X86/bitfields.ll b/test/DebugInfo/X86/bitfields.ll
new file mode 100644
index 0000000..e895fd6
--- /dev/null
+++ b/test/DebugInfo/X86/bitfields.ll
@@ -0,0 +1,73 @@
+; RUN: llc -mtriple x86_64-apple-macosx -O0 -filetype=obj -o %t_le.o %s
+; RUN: llvm-dwarfdump -debug-dump=info %t_le.o | FileCheck %s
+
+; Produced at -O0 from:
+; struct bitfield {
+;   int a : 2;
+;   int b : 32;
+;   int c : 1;
+;   int d : 28;
+; };
+; struct bitfield b;
+
+; Note that DWARF 2 counts bit offsets backwards from the high end of
+; the storage unit to the high end of the bit field.
+
+; CHECK: DW_TAG_member
+; CHECK-NEXT: DW_AT_name{{.*}}"a"
+; CHECK-NOT: DW_TAG_member
+; CHECK:      DW_AT_byte_size  {{.*}} (0x04)
+; CHECK-NEXT: DW_AT_bit_size   {{.*}} (0x02)
+; CHECK-NEXT: DW_AT_bit_offset {{.*}} (0x1e)
+; CHECK-NEXT: DW_AT_data_member_location {{.*}} 00
+
+; CHECK: DW_TAG_member
+; CHECK-NEXT: DW_AT_name{{.*}}"b"
+; CHECK-NOT: DW_TAG_member
+; CHECK:      DW_AT_data_member_location {{.*}} 04
+
+; CHECK: DW_TAG_member
+; CHECK-NEXT: DW_AT_name{{.*}}"c"
+; CHECK-NOT: DW_TAG_member
+; CHECK:      DW_AT_byte_size  {{.*}} (0x04)
+; CHECK-NEXT: DW_AT_bit_size   {{.*}} (0x01)
+; CHECK-NEXT: DW_AT_bit_offset {{.*}} (0x1f)
+; CHECK-NEXT: DW_AT_data_member_location {{.*}} 08
+
+; CHECK: DW_TAG_member
+; CHECK-NEXT: DW_AT_name{{.*}}"d"
+; CHECK-NOT: DW_TAG_member
+; CHECK:      DW_AT_byte_size  {{.*}} (0x04)
+; CHECK-NEXT: DW_AT_bit_size   {{.*}} (0x1c)
+; CHECK-NEXT: DW_AT_bit_offset {{.*}} (0x03)
+; CHECK-NEXT: DW_AT_data_member_location {{.*}} 08
+
+; ModuleID = 'bitfields.c'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+%struct.bitfield = type <{ i8, [3 x i8], i64 }>
+
+@b = common global %struct.bitfield zeroinitializer, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14, !15}
+!llvm.ident = !{!16}
+
+!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!1 = !DIFile(filename: "bitfields.c", directory: "/")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIGlobalVariable(name: "b", scope: !0, file: !5, line: 8, type: !6, isLocal: false, isDefinition: true, variable: %struct.bitfield* @b)
+!5 = !DIFile(filename: "bitfields.c", directory: "/")
+!6 = !DICompositeType(tag: DW_TAG_structure_type, name: "bitfield", file: !5, line: 1, size: 96, align: 32, elements: !7)
+!7 = !{!8, !10, !11, !12}
+!8 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !6, file: !5, line: 2, baseType: !9, size: 2, align: 32)
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !6, file: !5, line: 3, baseType: !9, size: 32, align: 32, offset: 32)
+!11 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !6, file: !5, line: 4, baseType: !9, size: 1, align: 32, offset: 64)
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "d", scope: !6, file: !5, line: 5, baseType: !9, size: 28, align: 32, offset: 65)
+!13 = !{i32 2, !"Dwarf Version", i32 2}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{i32 1, !"PIC Level", i32 2}
+!16 = !{!"clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)"}
diff --git a/test/DebugInfo/X86/debug-info-packed-struct.ll b/test/DebugInfo/X86/debug-info-packed-struct.ll
new file mode 100644
index 0000000..6829c2d
--- /dev/null
+++ b/test/DebugInfo/X86/debug-info-packed-struct.ll
@@ -0,0 +1,198 @@
+; Generated from tools/clang/test/CodeGen/debug-info-packed-struct.c
+; ModuleID = 'llvm/tools/clang/test/CodeGen/debug-info-packed-struct.c'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+; RUN: %llc_dwarf -O0 -filetype=obj -o %t.o %s
+; RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s
+; REQUIRES: object-emission
+
+;  // ---------------------------------------------------------------------
+;  // Not packed.
+;  // ---------------------------------------------------------------------
+;  struct size8 {
+;    int i : 4;
+;    long long l : 60;
+;  };
+;  struct layout0 {
+;    char l0_ofs0;
+;    struct size8 l0_ofs8;
+;    int l0_ofs16 : 1;
+;  } l0;
+
+%struct.layout0 = type { i8, %struct.size8, i8 }
+%struct.size8 = type { i64 }
+; CHECK:  DW_TAG_structure_type
+; CHECK:      DW_AT_name {{.*}} "layout0"
+; CHECK:      DW_AT_byte_size {{.*}} (0x18)
+; CHECK:      DW_TAG_member
+; CHECK:          DW_AT_name {{.*}} "l0_ofs0"
+; CHECK:          DW_AT_data_member_location {{.*}}00
+; CHECK:      DW_TAG_member
+; CHECK:          DW_AT_name {{.*}} "l0_ofs8"
+; CHECK:          DW_AT_data_member_location {{.*}}08
+; CHECK:      DW_TAG_member
+; CHECK:          DW_AT_name {{.*}} "l0_ofs16"
+; CHECK:          DW_AT_bit_size   {{.*}} (0x01)
+; CHECK:          DW_AT_bit_offset {{.*}} (0x1f)
+; CHECK:          DW_AT_data_member_location {{.*}}10
+
+
+; // ---------------------------------------------------------------------
+; // Implicitly packed.
+; // ---------------------------------------------------------------------
+; struct size8_anon {
+;   int : 4;
+;   long long : 60;
+; };
+; struct layout1 {
+;   char l1_ofs0;
+;   struct size8_anon l1_ofs1;
+;   int l1_ofs9 : 1;
+; } l1;
+
+%struct.layout1 = type <{ i8, %struct.size8_anon, i8, [2 x i8] }>
+%struct.size8_anon = type { i64 }
+
+; CHECK:  DW_TAG_structure_type
+; CHECK:      DW_AT_name {{.*}} "layout1"
+; CHECK:      DW_AT_byte_size {{.*}} (0x0c)
+; CHECK:      DW_TAG_member
+; CHECK:          DW_AT_name {{.*}} "l1_ofs0"
+; CHECK:          DW_AT_data_member_location {{.*}}00
+; CHECK:      DW_TAG_member
+; CHECK:          DW_AT_name {{.*}} "l1_ofs1"
+; CHECK:          DW_AT_data_member_location {{.*}}01
+; CHECK:      DW_TAG_member
+; CHECK:          DW_AT_name {{.*}} "l1_ofs9"
+; CHECK:          DW_AT_byte_size  {{.*}} (0x04)
+; CHECK:          DW_AT_bit_size   {{.*}} (0x01)
+; CHECK:          DW_AT_bit_offset {{.*}} (0x17)
+; CHECK:          DW_AT_data_member_location {{.*}}08
+
+; // ---------------------------------------------------------------------
+; // Explicitly packed.
+; // ---------------------------------------------------------------------
+; #pragma pack(1)
+; struct size8_pack1 {
+;   int i : 4;
+;   long long l : 60;
+; };
+; struct layout2 {
+;   char l2_ofs0;
+;   struct size8_pack1 l2_ofs1;
+;   int l2_ofs9 : 1;
+; } l2;
+; #pragma pack()
+
+%struct.layout2 = type <{ i8, %struct.size8_pack1, i8 }>
+%struct.size8_pack1 = type { i64 }
+
+; CHECK:  DW_TAG_structure_type
+; CHECK:      DW_AT_name {{.*}} "layout2"
+; CHECK:      DW_AT_byte_size {{.*}} (0x0a)
+; CHECK:      DW_TAG_member
+; CHECK:          DW_AT_name {{.*}} "l2_ofs0"
+; CHECK:          DW_AT_data_member_location {{.*}}00
+; CHECK:      DW_TAG_member
+; CHECK:          DW_AT_name {{.*}} "l2_ofs1"
+; CHECK:          DW_AT_data_member_location {{.*}}01
+; CHECK:      DW_TAG_member
+; CHECK:          DW_AT_name {{.*}} "l2_ofs9"
+; CHECK:          DW_AT_byte_size  {{.*}} (0x04)
+; CHECK:          DW_AT_bit_size   {{.*}} (0x01)
+; CHECK:          DW_AT_bit_offset {{.*}} (0x17)
+; CHECK:          DW_AT_data_member_location {{.*}}08
+
+; // ---------------------------------------------------------------------
+; // Explicitly packed with different alignment.
+; // ---------------------------------------------------------------------
+; #pragma pack(4)
+; struct size8_pack4 {
+;   int i : 4;
+;   long long l : 60;
+; };
+; struct layout3 {
+;   char l3_ofs0;
+;   struct size8_pack4 l3_ofs4;
+;   int l3_ofs12 : 1;
+; } l 3;
+; #pragma pack()
+
+
+%struct.layout3 = type <{ i8, [3 x i8], %struct.size8_pack4, i8, [3 x i8] }>
+%struct.size8_pack4 = type { i64 }
+
+; CHECK:  DW_TAG_structure_type
+; CHECK:      DW_AT_name {{.*}} "layout3"
+; CHECK:      DW_AT_byte_size {{.*}} (0x10)
+; CHECK:      DW_TAG_member
+; CHECK:          DW_AT_name {{.*}} "l3_ofs0"
+; CHECK:          DW_AT_data_member_location {{.*}}00
+; CHECK:      DW_TAG_member
+; CHECK:          DW_AT_name {{.*}} "l3_ofs4"
+; CHECK:          DW_AT_data_member_location {{.*}}04
+; CHECK:      DW_TAG_member
+; CHECK:          DW_AT_name {{.*}} "l3_ofs12"
+; CHECK:          DW_AT_byte_size  {{.*}} (0x04)
+; CHECK:          DW_AT_bit_size   {{.*}} (0x01)
+; CHECK:          DW_AT_bit_offset {{.*}} (0x1f)
+; CHECK:          DW_AT_data_member_location {{.*}}0c
+
+@l0 = common global %struct.layout0 zeroinitializer, align 8
+@l1 = common global %struct.layout1 zeroinitializer, align 4
+@l2 = common global %struct.layout2 zeroinitializer, align 1
+@l3 = common global %struct.layout3 zeroinitializer, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!45, !46}
+!llvm.ident = !{!47}
+
+!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240791) (llvm/trunk 240790)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!1 = !DIFile(filename: "/llvm/tools/clang/test/CodeGen/<stdin>", directory: "/llvm/_build.ninja.release")
+!2 = !{}
+!3 = !{!4, !18, !25, !35}
+!4 = !DIGlobalVariable(name: "l0", scope: !0, file: !5, line: 88, type: !6, isLocal: false, isDefinition: true, variable: %struct.layout0* @l0)
+!5 = !DIFile(filename: "/llvm/tools/clang/test/CodeGen/debug-info-packed-struct.c", directory: "/llvm/_build.ninja.release")
+!6 = !DICompositeType(tag: DW_TAG_structure_type, name: "layout0", file: !5, line: 15, size: 192, align: 64, elements: !7)
+!7 = !{!8, !10, !17}
+!8 = !DIDerivedType(tag: DW_TAG_member, name: "l0_ofs0", scope: !6, file: !5, line: 16, baseType: !9, size: 8, align: 8)
+!9 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!10 = !DIDerivedType(tag: DW_TAG_member, name: "l0_ofs8", scope: !6, file: !5, line: 17, baseType: !11, size: 64, align: 64, offset: 64)
+!11 = !DICompositeType(tag: DW_TAG_structure_type, name: "size8", file: !5, line: 11, size: 64, align: 64, elements: !12)
+!12 = !{!13, !15}
+!13 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !11, file: !5, line: 12, baseType: !14, size: 4, align: 32)
+!14 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!15 = !DIDerivedType(tag: DW_TAG_member, name: "l", scope: !11, file: !5, line: 13, baseType: !16, size: 60, offset: 4)
+!16 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "l0_ofs16", scope: !6, file: !5, line: 18, baseType: !14, size: 1, align: 32, offset: 128)
+!18 = !DIGlobalVariable(name: "l1", scope: !0, file: !5, line: 89, type: !19, isLocal: false, isDefinition: true, variable: %struct.layout1* @l1)
+!19 = !DICompositeType(tag: DW_TAG_structure_type, name: "layout1", file: !5, line: 34, size: 96, align: 32, elements: !20)
+!20 = !{!21, !22, !24}
+!21 = !DIDerivedType(tag: DW_TAG_member, name: "l1_ofs0", scope: !19, file: !5, line: 35, baseType: !9, size: 8, align: 8)
+!22 = !DIDerivedType(tag: DW_TAG_member, name: "l1_ofs1", scope: !19, file: !5, line: 36, baseType: !23, size: 64, align: 8, offset: 8)
+!23 = !DICompositeType(tag: DW_TAG_structure_type, name: "size8_anon", file: !5, line: 30, size: 64, align: 8, elements: !2)
+!24 = !DIDerivedType(tag: DW_TAG_member, name: "l1_ofs9", scope: !19, file: !5, line: 37, baseType: !14, size: 1, align: 32, offset: 72)
+!25 = !DIGlobalVariable(name: "l2", scope: !0, file: !5, line: 90, type: !26, isLocal: false, isDefinition: true, variable: %struct.layout2* @l2)
+!26 = !DICompositeType(tag: DW_TAG_structure_type, name: "layout2", file: !5, line: 54, size: 80, align: 8, elements: !27)
+!27 = !{!28, !29, !34}
+!28 = !DIDerivedType(tag: DW_TAG_member, name: "l2_ofs0", scope: !26, file: !5, line: 55, baseType: !9, size: 8, align: 8)
+!29 = !DIDerivedType(tag: DW_TAG_member, name: "l2_ofs1", scope: !26, file: !5, line: 56, baseType: !30, size: 64, align: 8, offset: 8)
+!30 = !DICompositeType(tag: DW_TAG_structure_type, name: "size8_pack1", file: !5, line: 50, size: 64, align: 8, elements: !31)
+!31 = !{!32, !33}
+!32 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !30, file: !5, line: 51, baseType: !14, size: 4, align: 32)
+!33 = !DIDerivedType(tag: DW_TAG_member, name: "l", scope: !30, file: !5, line: 52, baseType: !16, size: 60, offset: 4)
+!34 = !DIDerivedType(tag: DW_TAG_member, name: "l2_ofs9", scope: !26, file: !5, line: 57, baseType: !14, size: 1, align: 32, offset: 72)
+!35 = !DIGlobalVariable(name: "l3", scope: !0, file: !5, line: 91, type: !36, isLocal: false, isDefinition: true, variable: %struct.layout3* @l3)
+!36 = !DICompositeType(tag: DW_TAG_structure_type, name: "layout3", file: !5, line: 76, size: 128, align: 32, elements: !37)
+!37 = !{!38, !39, !44}
+!38 = !DIDerivedType(tag: DW_TAG_member, name: "l3_ofs0", scope: !36, file: !5, line: 77, baseType: !9, size: 8, align: 8)
+!39 = !DIDerivedType(tag: DW_TAG_member, name: "l3_ofs4", scope: !36, file: !5, line: 78, baseType: !40, size: 64, align: 32, offset: 32)
+!40 = !DICompositeType(tag: DW_TAG_structure_type, name: "size8_pack4", file: !5, line: 72, size: 64, align: 32, elements: !41)
+!41 = !{!42, !43}
+!42 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !40, file: !5, line: 73, baseType: !14, size: 4, align: 32)
+!43 = !DIDerivedType(tag: DW_TAG_member, name: "l", scope: !40, file: !5, line: 74, baseType: !16, size: 60, offset: 4)
+!44 = !DIDerivedType(tag: DW_TAG_member, name: "l3_ofs12", scope: !36, file: !5, line: 79, baseType: !14, size: 1, align: 32, offset: 96)
+!45 = !{i32 2, !"Dwarf Version", i32 2}
+!46 = !{i32 2, !"Debug Info Version", i32 3}
+!47 = !{!"clang version 3.7.0 (trunk 240791) (llvm/trunk 240790)"}
diff --git a/test/DebugInfo/X86/debug-loc-empty-entries.ll b/test/DebugInfo/X86/debug-loc-empty-entries.ll
new file mode 100644
index 0000000..3b997fd
--- /dev/null
+++ b/test/DebugInfo/X86/debug-loc-empty-entries.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mtriple=x86_64-apple-macosx <%s | FileCheck %s
+; Test that we don't generate empty .debug_loc entries.  Originally, there were
+; two empty .debug_loc entries for 'a' in an otherwise empty .debug_loc list.
+;
+; CHECK:      .section __DWARF,__debug_loc,regular,debug
+; CHECK-NEXT: Lsection_debug_loc:
+; CHECK-NEXT: .section __DWARF,__debug_abbrev,regular,debug
+;
+; Test that the variable stuck around.
+; CHECK:      .section __DWARF,__debug_info,regular,debug
+; CHECK:      DW_TAG_variable
+; CHECK-NOT:  DW_AT_location
+
+; Generated using clang -cc1 with the following args:
+;
+;   -triple x86_64-apple-macosx -emit-llvm -gdwarf-4 -O1
+;
+; From this testcase:
+;
+;;   void fn1() {
+;;     float a = 1;
+;;     for (;;)
+;;       a = 0;
+;;   }
+
+; Function Attrs: noreturn nounwind readnone
+define void @_Z3fn1v() #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata float 1.000000e+00, i64 0, metadata !9, metadata !14), !dbg !15
+  br label %for.cond, !dbg !16
+
+for.cond:                                         ; preds = %for.cond, %entry
+  tail call void @llvm.dbg.value(metadata float 0.000000e+00, i64 0, metadata !9, metadata !14), !dbg !15
+  br label %for.cond, !dbg !17
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { noreturn nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12}
+!llvm.ident = !{!13}
+
+!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 238517) (llvm/trunk 238524)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "<stdin>", directory: "/Users/dexonsmith/data/llvm/bootstrap/play/delta2/testcase")
+!2 = !{}
+!3 = !{!4}
+!4 = !DISubprogram(name: "fn1", linkageName: "_Z3fn1v", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @_Z3fn1v, variables: !8)
+!5 = !DIFile(filename: "t.cpp", directory: "/Users/dexonsmith/data/llvm/bootstrap/play/delta2/testcase")
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = !{!9}
+!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", scope: !4, file: !5, line: 2, type: !10)
+!10 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !{!"clang version 3.7.0 (trunk 238517) (llvm/trunk 238524)"}
+!14 = !DIExpression()
+!15 = !DILocation(line: 2, scope: !4)
+!16 = !DILocation(line: 3, scope: !4)
+!17 = !DILocation(line: 3, scope: !18)
+!18 = distinct !DILexicalBlock(scope: !19, file: !5, line: 3)
+!19 = distinct !DILexicalBlock(scope: !4, file: !5, line: 3)
diff --git a/test/DebugInfo/X86/dwarf-public-names.ll b/test/DebugInfo/X86/dwarf-public-names.ll
index 48f13fd..c72da38 100644
--- a/test/DebugInfo/X86/dwarf-public-names.ll
+++ b/test/DebugInfo/X86/dwarf-public-names.ll
@@ -39,11 +39,11 @@
 
 ; Darwin and PS4 shouldn't be generating the section by default
 ; NOPUB: debug_pubnames
-; NOPUB: {{^$}}
+; NOPUB-NEXT: {{^$}}
 
 ; Skip the output to the header of the pubnames section.
 ; LINUX: debug_pubnames
-; LINUX: unit_size = 0x00000128
+; LINUX-NEXT: unit_size = 0x00000128
 
 ; Check for each name in the output.
 ; LINUX-DAG: "ns"
diff --git a/test/DebugInfo/dwarfdump-invalid.test b/test/DebugInfo/dwarfdump-invalid.test
index da5b23e..a36ad2f 100644
--- a/test/DebugInfo/dwarfdump-invalid.test
+++ b/test/DebugInfo/dwarfdump-invalid.test
@@ -1,6 +1,6 @@
 ; Verify that llvm-dwarfdump doesn't crash on broken input files.
 
-RUN: llvm-dwarfdump %p/Inputs/invalid.elf 2>&1 | FileCheck %s --check-prefix=INVALID-ELF
-RUN: llvm-dwarfdump %p/Inputs/invalid.elf.2 2>&1 | FileCheck %s --check-prefix=INVALID-ELF
-RUN: llvm-dwarfdump %p/Inputs/invalid.elf.3 2>&1 | FileCheck %s --check-prefix=INVALID-ELF
+RUN: not llvm-dwarfdump %p/Inputs/invalid.elf 2>&1 | FileCheck %s --check-prefix=INVALID-ELF
+RUN: not llvm-dwarfdump %p/Inputs/invalid.elf.2 2>&1 | FileCheck %s --check-prefix=INVALID-ELF
+RUN: not llvm-dwarfdump %p/Inputs/invalid.elf.3 2>&1 | FileCheck %s --check-prefix=INVALID-ELF
 INVALID-ELF: Invalid data was encountered while parsing the file
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll b/test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll
new file mode 100644
index 0000000..7827f3f
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -asan -S -o %t.ll
+; RUN: FileCheck %s < %t.ll
+
+; Don't do stack malloc on functions containing inline assembly on 64-bit
+; platforms. It makes LLVM run out of registers.
+
+; CHECK-LABEL: define void @TestAbsenceOfStackMalloc(i8* %S, i32 %pS, i8* %D, i32 %pD, i32 %h)
+; CHECK: %MyAlloca
+; CHECK-NOT: call {{.*}} @__asan_stack_malloc
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define void @TestAbsenceOfStackMalloc(i8* %S, i32 %pS, i8* %D, i32 %pD, i32 %h) #0 {
+entry:
+  %S.addr = alloca i8*, align 8
+  %pS.addr = alloca i32, align 4
+  %D.addr = alloca i8*, align 8
+  %pD.addr = alloca i32, align 4
+  %h.addr = alloca i32, align 4
+  %sr = alloca i32, align 4
+  %pDiffD = alloca i32, align 4
+  %pDiffS = alloca i32, align 4
+  %flagSA = alloca i8, align 1
+  %flagDA = alloca i8, align 1
+  store i8* %S, i8** %S.addr, align 8
+  store i32 %pS, i32* %pS.addr, align 4
+  store i8* %D, i8** %D.addr, align 8
+  store i32 %pD, i32* %pD.addr, align 4
+  store i32 %h, i32* %h.addr, align 4
+  store i32 4, i32* %sr, align 4
+  %0 = load i32, i32* %pD.addr, align 4
+  %sub = sub i32 %0, 5
+  store i32 %sub, i32* %pDiffD, align 4
+  %1 = load i32, i32* %pS.addr, align 4
+  %shl = shl i32 %1, 1
+  %sub1 = sub i32 %shl, 5
+  store i32 %sub1, i32* %pDiffS, align 4
+  %2 = load i32, i32* %pS.addr, align 4
+  %and = and i32 %2, 15
+  %cmp = icmp eq i32 %and, 0
+  %conv = zext i1 %cmp to i32
+  %conv2 = trunc i32 %conv to i8
+  store i8 %conv2, i8* %flagSA, align 1
+  %3 = load i32, i32* %pD.addr, align 4
+  %and3 = and i32 %3, 15
+  %cmp4 = icmp eq i32 %and3, 0
+  %conv5 = zext i1 %cmp4 to i32
+  %conv6 = trunc i32 %conv5 to i8
+  store i8 %conv6, i8* %flagDA, align 1
+  call void asm sideeffect "mov\09\09\09$0,\09\09\09\09\09\09\09\09\09\09%rsi\0Amov\09\09\09$2,\09\09\09\09\09\09\09\09\09\09%rcx\0Amov\09\09\09$1,\09\09\09\09\09\09\09\09\09\09%rdi\0Amov\09\09\09$8,\09\09\09\09\09\09\09\09\09\09%rax\0A", "*m,*m,*m,*m,*m,*m,*m,*m,*m,~{rsi},~{rdi},~{rax},~{rcx},~{rdx},~{memory},~{dirflag},~{fpsr},~{flags}"(i8** %S.addr, i8** %D.addr, i32* %pS.addr, i32* %pDiffS, i32* %pDiffD, i32* %sr, i8* %flagSA, i8* %flagDA, i32* %h.addr) #1
+  ret void
+}
+
+attributes #0 = { nounwind sanitize_address }
+attributes #1 = { nounwind }
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll
index 7617dbd..f67155a 100644
--- a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll
+++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll
@@ -1,12 +1,15 @@
-; This test checks that we are not instrumenting globals in llvm.metadata.
+; This test checks that we are not instrumenting globals in llvm.metadata
+; and other llvm internal globals.
 ; RUN: opt < %s -asan -asan-module -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 @.str_noinst = private unnamed_addr constant [4 x i8] c"aaa\00", section "llvm.metadata"
+@.str_noinst_prof = private unnamed_addr constant [4 x i8] c"aaa\00", section "__DATA,__llvm_covmap"
 @.str_inst = private unnamed_addr constant [4 x i8] c"aaa\00"
 
 ; CHECK-NOT: {{asan_gen.*str_noinst}}
+; CHECK-NOT: {{asan_gen.*str_noinst_prof}}
 ; CHECK: {{asan_gen.*str_inst}}
 ; CHECK: @asan.module_ctor
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll
index d02f12a..93eca5b 100644
--- a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll
+++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll
@@ -1,12 +1,15 @@
-; This test checks that we are not instrumenting globals in llvm.metadata.
+; This test checks that we are not instrumenting globals in llvm.metadata
+; and other llvm internal globals.
 ; RUN: opt < %s -asan -asan-module -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 @.str_noinst = private unnamed_addr constant [4 x i8] c"aaa\00", section "llvm.metadata"
+@.str_noinst_prof = private unnamed_addr constant [4 x i8] c"aaa\00", section "__llvm_prf_data"
 @.str_inst = private unnamed_addr constant [4 x i8] c"aaa\00"
 
 ; CHECK-NOT: {{asan_gen.*str_noinst}}
+; CHECK-NOT: {{asan_gen.*str_noinst_prof}}
 ; CHECK: {{asan_gen.*str_inst}}
 ; CHECK: @asan.module_ctor
diff --git a/test/Instrumentation/SanitizerCoverage/coverage.ll b/test/Instrumentation/SanitizerCoverage/coverage.ll
index b2f0ab0..659c030 100644
--- a/test/Instrumentation/SanitizerCoverage/coverage.ll
+++ b/test/Instrumentation/SanitizerCoverage/coverage.ll
@@ -119,3 +119,12 @@ entry:
 ; CHECK4: call void @__sanitizer_cov_indir_call16({{.*}},[[CACHE:.*]])
 ; CHECK4-NOT: call void @__sanitizer_cov_indir_call16({{.*}},[[CACHE]])
 ; CHECK4: ret void
+
+define void @call_unreachable() uwtable sanitize_address {
+entry:
+  unreachable
+}
+
+; CHECK4-LABEL: define void @call_unreachable
+; CHECK4-NOT: __sanitizer_cov
+; CHECK4: unreachable
diff --git a/test/Instrumentation/ThreadSanitizer/atomic.ll b/test/Instrumentation/ThreadSanitizer/atomic.ll
index 1d6ac38..db01bab 100644
--- a/test/Instrumentation/ThreadSanitizer/atomic.ll
+++ b/test/Instrumentation/ThreadSanitizer/atomic.ll
@@ -4,1984 +4,1995 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 define i8 @atomic8_load_unordered(i8* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i8, i8* %a unordered, align 1
-  ret i8 %0
+  %0 = load atomic i8, i8* %a unordered, align 1, !dbg !7
+  ret i8 %0, !dbg !7
 }
-; CHECK: atomic8_load_unordered
-; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 0)
+; CHECK-LABEL: atomic8_load_unordered
+; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 0), !dbg
 
 define i8 @atomic8_load_monotonic(i8* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i8, i8* %a monotonic, align 1
-  ret i8 %0
+  %0 = load atomic i8, i8* %a monotonic, align 1, !dbg !7
+  ret i8 %0, !dbg !7
 }
-; CHECK: atomic8_load_monotonic
-; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 0)
+; CHECK-LABEL: atomic8_load_monotonic
+; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 0), !dbg
 
 define i8 @atomic8_load_acquire(i8* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i8, i8* %a acquire, align 1
-  ret i8 %0
+  %0 = load atomic i8, i8* %a acquire, align 1, !dbg !7
+  ret i8 %0, !dbg !7
 }
-; CHECK: atomic8_load_acquire
-; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 2)
+; CHECK-LABEL: atomic8_load_acquire
+; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 2), !dbg
 
 define i8 @atomic8_load_seq_cst(i8* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i8, i8* %a seq_cst, align 1
-  ret i8 %0
+  %0 = load atomic i8, i8* %a seq_cst, align 1, !dbg !7
+  ret i8 %0, !dbg !7
 }
-; CHECK: atomic8_load_seq_cst
-; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 5)
+; CHECK-LABEL: atomic8_load_seq_cst
+; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 5), !dbg
 
 define void @atomic8_store_unordered(i8* %a) nounwind uwtable {
 entry:
-  store atomic i8 0, i8* %a unordered, align 1
-  ret void
+  store atomic i8 0, i8* %a unordered, align 1, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_store_unordered
-; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 0)
+; CHECK-LABEL: atomic8_store_unordered
+; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 0), !dbg
 
 define void @atomic8_store_monotonic(i8* %a) nounwind uwtable {
 entry:
-  store atomic i8 0, i8* %a monotonic, align 1
-  ret void
+  store atomic i8 0, i8* %a monotonic, align 1, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_store_monotonic
-; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 0)
+; CHECK-LABEL: atomic8_store_monotonic
+; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 0), !dbg
 
 define void @atomic8_store_release(i8* %a) nounwind uwtable {
 entry:
-  store atomic i8 0, i8* %a release, align 1
-  ret void
+  store atomic i8 0, i8* %a release, align 1, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_store_release
-; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 3)
+; CHECK-LABEL: atomic8_store_release
+; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 3), !dbg
 
 define void @atomic8_store_seq_cst(i8* %a) nounwind uwtable {
 entry:
-  store atomic i8 0, i8* %a seq_cst, align 1
-  ret void
+  store atomic i8 0, i8* %a seq_cst, align 1, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_store_seq_cst
-; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 5)
+; CHECK-LABEL: atomic8_store_seq_cst
+; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 5), !dbg
 
 define void @atomic8_xchg_monotonic(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i8* %a, i8 0 monotonic
-  ret void
+  atomicrmw xchg i8* %a, i8 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_xchg_monotonic
-; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 0)
+; CHECK-LABEL: atomic8_xchg_monotonic
+; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 0), !dbg
 
 define void @atomic8_add_monotonic(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw add i8* %a, i8 0 monotonic
-  ret void
+  atomicrmw add i8* %a, i8 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_add_monotonic
-; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 0)
+; CHECK-LABEL: atomic8_add_monotonic
+; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 0), !dbg
 
 define void @atomic8_sub_monotonic(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i8* %a, i8 0 monotonic
-  ret void
+  atomicrmw sub i8* %a, i8 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_sub_monotonic
-; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 0)
+; CHECK-LABEL: atomic8_sub_monotonic
+; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 0), !dbg
 
 define void @atomic8_and_monotonic(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw and i8* %a, i8 0 monotonic
-  ret void
+  atomicrmw and i8* %a, i8 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_and_monotonic
-; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 0)
+; CHECK-LABEL: atomic8_and_monotonic
+; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 0), !dbg
 
 define void @atomic8_or_monotonic(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw or i8* %a, i8 0 monotonic
-  ret void
+  atomicrmw or i8* %a, i8 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_or_monotonic
-; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 0)
+; CHECK-LABEL: atomic8_or_monotonic
+; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 0), !dbg
 
 define void @atomic8_xor_monotonic(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i8* %a, i8 0 monotonic
-  ret void
+  atomicrmw xor i8* %a, i8 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_xor_monotonic
-; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 0)
+; CHECK-LABEL: atomic8_xor_monotonic
+; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 0), !dbg
 
 define void @atomic8_nand_monotonic(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i8* %a, i8 0 monotonic
-  ret void
+  atomicrmw nand i8* %a, i8 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_nand_monotonic
-; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 0)
+; CHECK-LABEL: atomic8_nand_monotonic
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 0), !dbg
 
 define void @atomic8_xchg_acquire(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i8* %a, i8 0 acquire
-  ret void
+  atomicrmw xchg i8* %a, i8 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_xchg_acquire
-; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 2)
+; CHECK-LABEL: atomic8_xchg_acquire
+; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 2), !dbg
 
 define void @atomic8_add_acquire(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw add i8* %a, i8 0 acquire
-  ret void
+  atomicrmw add i8* %a, i8 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_add_acquire
-; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 2)
+; CHECK-LABEL: atomic8_add_acquire
+; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 2), !dbg
 
 define void @atomic8_sub_acquire(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i8* %a, i8 0 acquire
-  ret void
+  atomicrmw sub i8* %a, i8 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_sub_acquire
-; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 2)
+; CHECK-LABEL: atomic8_sub_acquire
+; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 2), !dbg
 
 define void @atomic8_and_acquire(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw and i8* %a, i8 0 acquire
-  ret void
+  atomicrmw and i8* %a, i8 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_and_acquire
-; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 2)
+; CHECK-LABEL: atomic8_and_acquire
+; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 2), !dbg
 
 define void @atomic8_or_acquire(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw or i8* %a, i8 0 acquire
-  ret void
+  atomicrmw or i8* %a, i8 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_or_acquire
-; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 2)
+; CHECK-LABEL: atomic8_or_acquire
+; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 2), !dbg
 
 define void @atomic8_xor_acquire(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i8* %a, i8 0 acquire
-  ret void
+  atomicrmw xor i8* %a, i8 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_xor_acquire
-; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 2)
+; CHECK-LABEL: atomic8_xor_acquire
+; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 2), !dbg
 
 define void @atomic8_nand_acquire(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i8* %a, i8 0 acquire
-  ret void
+  atomicrmw nand i8* %a, i8 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_nand_acquire
-; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 2)
+; CHECK-LABEL: atomic8_nand_acquire
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 2), !dbg
 
 define void @atomic8_xchg_release(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i8* %a, i8 0 release
-  ret void
+  atomicrmw xchg i8* %a, i8 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_xchg_release
-; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 3)
+; CHECK-LABEL: atomic8_xchg_release
+; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 3), !dbg
 
 define void @atomic8_add_release(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw add i8* %a, i8 0 release
-  ret void
+  atomicrmw add i8* %a, i8 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_add_release
-; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 3)
+; CHECK-LABEL: atomic8_add_release
+; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 3), !dbg
 
 define void @atomic8_sub_release(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i8* %a, i8 0 release
-  ret void
+  atomicrmw sub i8* %a, i8 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_sub_release
-; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 3)
+; CHECK-LABEL: atomic8_sub_release
+; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 3), !dbg
 
 define void @atomic8_and_release(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw and i8* %a, i8 0 release
-  ret void
+  atomicrmw and i8* %a, i8 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_and_release
-; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 3)
+; CHECK-LABEL: atomic8_and_release
+; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 3), !dbg
 
 define void @atomic8_or_release(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw or i8* %a, i8 0 release
-  ret void
+  atomicrmw or i8* %a, i8 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_or_release
-; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 3)
+; CHECK-LABEL: atomic8_or_release
+; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 3), !dbg
 
 define void @atomic8_xor_release(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i8* %a, i8 0 release
-  ret void
+  atomicrmw xor i8* %a, i8 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_xor_release
-; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 3)
+; CHECK-LABEL: atomic8_xor_release
+; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 3), !dbg
 
 define void @atomic8_nand_release(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i8* %a, i8 0 release
-  ret void
+  atomicrmw nand i8* %a, i8 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_nand_release
-; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 3)
+; CHECK-LABEL: atomic8_nand_release
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 3), !dbg
 
 define void @atomic8_xchg_acq_rel(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i8* %a, i8 0 acq_rel
-  ret void
+  atomicrmw xchg i8* %a, i8 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_xchg_acq_rel
-; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 4)
+; CHECK-LABEL: atomic8_xchg_acq_rel
+; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 4), !dbg
 
 define void @atomic8_add_acq_rel(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw add i8* %a, i8 0 acq_rel
-  ret void
+  atomicrmw add i8* %a, i8 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_add_acq_rel
-; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 4)
+; CHECK-LABEL: atomic8_add_acq_rel
+; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 4), !dbg
 
 define void @atomic8_sub_acq_rel(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i8* %a, i8 0 acq_rel
-  ret void
+  atomicrmw sub i8* %a, i8 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_sub_acq_rel
-; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 4)
+; CHECK-LABEL: atomic8_sub_acq_rel
+; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 4), !dbg
 
 define void @atomic8_and_acq_rel(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw and i8* %a, i8 0 acq_rel
-  ret void
+  atomicrmw and i8* %a, i8 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_and_acq_rel
-; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 4)
+; CHECK-LABEL: atomic8_and_acq_rel
+; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 4), !dbg
 
 define void @atomic8_or_acq_rel(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw or i8* %a, i8 0 acq_rel
-  ret void
+  atomicrmw or i8* %a, i8 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_or_acq_rel
-; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 4)
+; CHECK-LABEL: atomic8_or_acq_rel
+; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 4), !dbg
 
 define void @atomic8_xor_acq_rel(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i8* %a, i8 0 acq_rel
-  ret void
+  atomicrmw xor i8* %a, i8 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_xor_acq_rel
-; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 4)
+; CHECK-LABEL: atomic8_xor_acq_rel
+; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 4), !dbg
 
 define void @atomic8_nand_acq_rel(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i8* %a, i8 0 acq_rel
-  ret void
+  atomicrmw nand i8* %a, i8 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_nand_acq_rel
-; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 4)
+; CHECK-LABEL: atomic8_nand_acq_rel
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 4), !dbg
 
 define void @atomic8_xchg_seq_cst(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i8* %a, i8 0 seq_cst
-  ret void
+  atomicrmw xchg i8* %a, i8 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_xchg_seq_cst
-; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 5)
+; CHECK-LABEL: atomic8_xchg_seq_cst
+; CHECK: call i8 @__tsan_atomic8_exchange(i8* %a, i8 0, i32 5), !dbg
 
 define void @atomic8_add_seq_cst(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw add i8* %a, i8 0 seq_cst
-  ret void
+  atomicrmw add i8* %a, i8 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_add_seq_cst
-; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 5)
+; CHECK-LABEL: atomic8_add_seq_cst
+; CHECK: call i8 @__tsan_atomic8_fetch_add(i8* %a, i8 0, i32 5), !dbg
 
 define void @atomic8_sub_seq_cst(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i8* %a, i8 0 seq_cst
-  ret void
+  atomicrmw sub i8* %a, i8 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_sub_seq_cst
-; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 5)
+; CHECK-LABEL: atomic8_sub_seq_cst
+; CHECK: call i8 @__tsan_atomic8_fetch_sub(i8* %a, i8 0, i32 5), !dbg
 
 define void @atomic8_and_seq_cst(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw and i8* %a, i8 0 seq_cst
-  ret void
+  atomicrmw and i8* %a, i8 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_and_seq_cst
-; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 5)
+; CHECK-LABEL: atomic8_and_seq_cst
+; CHECK: call i8 @__tsan_atomic8_fetch_and(i8* %a, i8 0, i32 5), !dbg
 
 define void @atomic8_or_seq_cst(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw or i8* %a, i8 0 seq_cst
-  ret void
+  atomicrmw or i8* %a, i8 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_or_seq_cst
-; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 5)
+; CHECK-LABEL: atomic8_or_seq_cst
+; CHECK: call i8 @__tsan_atomic8_fetch_or(i8* %a, i8 0, i32 5), !dbg
 
 define void @atomic8_xor_seq_cst(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i8* %a, i8 0 seq_cst
-  ret void
+  atomicrmw xor i8* %a, i8 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_xor_seq_cst
-; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 5)
+; CHECK-LABEL: atomic8_xor_seq_cst
+; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 5), !dbg
 
 define void @atomic8_nand_seq_cst(i8* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i8* %a, i8 0 seq_cst
-  ret void
+  atomicrmw nand i8* %a, i8 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_nand_seq_cst
-; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 5)
+; CHECK-LABEL: atomic8_nand_seq_cst
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 5), !dbg
 
 define void @atomic8_cas_monotonic(i8* %a) nounwind uwtable {
 entry:
-  cmpxchg i8* %a, i8 0, i8 1 monotonic monotonic
-  ret void
+  cmpxchg i8* %a, i8 0, i8 1 monotonic monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_cas_monotonic
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 0, i32 0)
+; CHECK-LABEL: atomic8_cas_monotonic
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 0, i32 0), !dbg
 
 define void @atomic8_cas_acquire(i8* %a) nounwind uwtable {
 entry:
-  cmpxchg i8* %a, i8 0, i8 1 acquire acquire
-  ret void
+  cmpxchg i8* %a, i8 0, i8 1 acquire acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_cas_acquire
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 2, i32 2)
+; CHECK-LABEL: atomic8_cas_acquire
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 2, i32 2), !dbg
 
 define void @atomic8_cas_release(i8* %a) nounwind uwtable {
 entry:
-  cmpxchg i8* %a, i8 0, i8 1 release monotonic
-  ret void
+  cmpxchg i8* %a, i8 0, i8 1 release monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_cas_release
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 3, i32 0)
+; CHECK-LABEL: atomic8_cas_release
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 3, i32 0), !dbg
 
 define void @atomic8_cas_acq_rel(i8* %a) nounwind uwtable {
 entry:
-  cmpxchg i8* %a, i8 0, i8 1 acq_rel acquire
-  ret void
+  cmpxchg i8* %a, i8 0, i8 1 acq_rel acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_cas_acq_rel
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 4, i32 2)
+; CHECK-LABEL: atomic8_cas_acq_rel
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 4, i32 2), !dbg
 
 define void @atomic8_cas_seq_cst(i8* %a) nounwind uwtable {
 entry:
-  cmpxchg i8* %a, i8 0, i8 1 seq_cst seq_cst
-  ret void
+  cmpxchg i8* %a, i8 0, i8 1 seq_cst seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic8_cas_seq_cst
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 5, i32 5)
+; CHECK-LABEL: atomic8_cas_seq_cst
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 5, i32 5), !dbg
 
 define i16 @atomic16_load_unordered(i16* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i16, i16* %a unordered, align 2
-  ret i16 %0
+  %0 = load atomic i16, i16* %a unordered, align 2, !dbg !7
+  ret i16 %0, !dbg !7
 }
-; CHECK: atomic16_load_unordered
-; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 0)
+; CHECK-LABEL: atomic16_load_unordered
+; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 0), !dbg
 
 define i16 @atomic16_load_monotonic(i16* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i16, i16* %a monotonic, align 2
-  ret i16 %0
+  %0 = load atomic i16, i16* %a monotonic, align 2, !dbg !7
+  ret i16 %0, !dbg !7
 }
-; CHECK: atomic16_load_monotonic
-; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 0)
+; CHECK-LABEL: atomic16_load_monotonic
+; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 0), !dbg
 
 define i16 @atomic16_load_acquire(i16* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i16, i16* %a acquire, align 2
-  ret i16 %0
+  %0 = load atomic i16, i16* %a acquire, align 2, !dbg !7
+  ret i16 %0, !dbg !7
 }
-; CHECK: atomic16_load_acquire
-; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 2)
+; CHECK-LABEL: atomic16_load_acquire
+; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 2), !dbg
 
 define i16 @atomic16_load_seq_cst(i16* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i16, i16* %a seq_cst, align 2
-  ret i16 %0
+  %0 = load atomic i16, i16* %a seq_cst, align 2, !dbg !7
+  ret i16 %0, !dbg !7
 }
-; CHECK: atomic16_load_seq_cst
-; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 5)
+; CHECK-LABEL: atomic16_load_seq_cst
+; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 5), !dbg
 
 define void @atomic16_store_unordered(i16* %a) nounwind uwtable {
 entry:
-  store atomic i16 0, i16* %a unordered, align 2
-  ret void
+  store atomic i16 0, i16* %a unordered, align 2, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_store_unordered
-; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 0)
+; CHECK-LABEL: atomic16_store_unordered
+; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 0), !dbg
 
 define void @atomic16_store_monotonic(i16* %a) nounwind uwtable {
 entry:
-  store atomic i16 0, i16* %a monotonic, align 2
-  ret void
+  store atomic i16 0, i16* %a monotonic, align 2, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_store_monotonic
-; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 0)
+; CHECK-LABEL: atomic16_store_monotonic
+; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 0), !dbg
 
 define void @atomic16_store_release(i16* %a) nounwind uwtable {
 entry:
-  store atomic i16 0, i16* %a release, align 2
-  ret void
+  store atomic i16 0, i16* %a release, align 2, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_store_release
-; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 3)
+; CHECK-LABEL: atomic16_store_release
+; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 3), !dbg
 
 define void @atomic16_store_seq_cst(i16* %a) nounwind uwtable {
 entry:
-  store atomic i16 0, i16* %a seq_cst, align 2
-  ret void
+  store atomic i16 0, i16* %a seq_cst, align 2, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_store_seq_cst
-; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 5)
+; CHECK-LABEL: atomic16_store_seq_cst
+; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 5), !dbg
 
 define void @atomic16_xchg_monotonic(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i16* %a, i16 0 monotonic
-  ret void
+  atomicrmw xchg i16* %a, i16 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_xchg_monotonic
-; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 0)
+; CHECK-LABEL: atomic16_xchg_monotonic
+; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 0), !dbg
 
 define void @atomic16_add_monotonic(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw add i16* %a, i16 0 monotonic
-  ret void
+  atomicrmw add i16* %a, i16 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_add_monotonic
-; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 0)
+; CHECK-LABEL: atomic16_add_monotonic
+; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 0), !dbg
 
 define void @atomic16_sub_monotonic(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i16* %a, i16 0 monotonic
-  ret void
+  atomicrmw sub i16* %a, i16 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_sub_monotonic
-; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 0)
+; CHECK-LABEL: atomic16_sub_monotonic
+; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 0), !dbg
 
 define void @atomic16_and_monotonic(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw and i16* %a, i16 0 monotonic
-  ret void
+  atomicrmw and i16* %a, i16 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_and_monotonic
-; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 0)
+; CHECK-LABEL: atomic16_and_monotonic
+; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 0), !dbg
 
 define void @atomic16_or_monotonic(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw or i16* %a, i16 0 monotonic
-  ret void
+  atomicrmw or i16* %a, i16 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_or_monotonic
-; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 0)
+; CHECK-LABEL: atomic16_or_monotonic
+; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 0), !dbg
 
 define void @atomic16_xor_monotonic(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i16* %a, i16 0 monotonic
-  ret void
+  atomicrmw xor i16* %a, i16 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_xor_monotonic
-; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 0)
+; CHECK-LABEL: atomic16_xor_monotonic
+; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 0), !dbg
 
 define void @atomic16_nand_monotonic(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i16* %a, i16 0 monotonic
-  ret void
+  atomicrmw nand i16* %a, i16 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_nand_monotonic
-; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 0)
+; CHECK-LABEL: atomic16_nand_monotonic
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 0), !dbg
 
 define void @atomic16_xchg_acquire(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i16* %a, i16 0 acquire
-  ret void
+  atomicrmw xchg i16* %a, i16 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_xchg_acquire
-; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 2)
+; CHECK-LABEL: atomic16_xchg_acquire
+; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 2), !dbg
 
 define void @atomic16_add_acquire(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw add i16* %a, i16 0 acquire
-  ret void
+  atomicrmw add i16* %a, i16 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_add_acquire
-; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 2)
+; CHECK-LABEL: atomic16_add_acquire
+; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 2), !dbg
 
 define void @atomic16_sub_acquire(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i16* %a, i16 0 acquire
-  ret void
+  atomicrmw sub i16* %a, i16 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_sub_acquire
-; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 2)
+; CHECK-LABEL: atomic16_sub_acquire
+; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 2), !dbg
 
 define void @atomic16_and_acquire(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw and i16* %a, i16 0 acquire
-  ret void
+  atomicrmw and i16* %a, i16 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_and_acquire
-; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 2)
+; CHECK-LABEL: atomic16_and_acquire
+; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 2), !dbg
 
 define void @atomic16_or_acquire(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw or i16* %a, i16 0 acquire
-  ret void
+  atomicrmw or i16* %a, i16 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_or_acquire
-; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 2)
+; CHECK-LABEL: atomic16_or_acquire
+; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 2), !dbg
 
 define void @atomic16_xor_acquire(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i16* %a, i16 0 acquire
-  ret void
+  atomicrmw xor i16* %a, i16 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_xor_acquire
-; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 2)
+; CHECK-LABEL: atomic16_xor_acquire
+; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 2), !dbg
 
 define void @atomic16_nand_acquire(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i16* %a, i16 0 acquire
-  ret void
+  atomicrmw nand i16* %a, i16 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_nand_acquire
-; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 2)
+; CHECK-LABEL: atomic16_nand_acquire
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 2), !dbg
 
 define void @atomic16_xchg_release(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i16* %a, i16 0 release
-  ret void
+  atomicrmw xchg i16* %a, i16 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_xchg_release
-; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 3)
+; CHECK-LABEL: atomic16_xchg_release
+; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 3), !dbg
 
 define void @atomic16_add_release(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw add i16* %a, i16 0 release
-  ret void
+  atomicrmw add i16* %a, i16 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_add_release
-; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 3)
+; CHECK-LABEL: atomic16_add_release
+; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 3), !dbg
 
 define void @atomic16_sub_release(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i16* %a, i16 0 release
-  ret void
+  atomicrmw sub i16* %a, i16 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_sub_release
-; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 3)
+; CHECK-LABEL: atomic16_sub_release
+; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 3), !dbg
 
 define void @atomic16_and_release(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw and i16* %a, i16 0 release
-  ret void
+  atomicrmw and i16* %a, i16 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_and_release
-; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 3)
+; CHECK-LABEL: atomic16_and_release
+; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 3), !dbg
 
 define void @atomic16_or_release(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw or i16* %a, i16 0 release
-  ret void
+  atomicrmw or i16* %a, i16 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_or_release
-; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 3)
+; CHECK-LABEL: atomic16_or_release
+; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 3), !dbg
 
 define void @atomic16_xor_release(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i16* %a, i16 0 release
-  ret void
+  atomicrmw xor i16* %a, i16 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_xor_release
-; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 3)
+; CHECK-LABEL: atomic16_xor_release
+; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 3), !dbg
 
 define void @atomic16_nand_release(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i16* %a, i16 0 release
-  ret void
+  atomicrmw nand i16* %a, i16 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_nand_release
-; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 3)
+; CHECK-LABEL: atomic16_nand_release
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 3), !dbg
 
 define void @atomic16_xchg_acq_rel(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i16* %a, i16 0 acq_rel
-  ret void
+  atomicrmw xchg i16* %a, i16 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_xchg_acq_rel
-; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 4)
+; CHECK-LABEL: atomic16_xchg_acq_rel
+; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 4), !dbg
 
 define void @atomic16_add_acq_rel(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw add i16* %a, i16 0 acq_rel
-  ret void
+  atomicrmw add i16* %a, i16 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_add_acq_rel
-; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 4)
+; CHECK-LABEL: atomic16_add_acq_rel
+; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 4), !dbg
 
 define void @atomic16_sub_acq_rel(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i16* %a, i16 0 acq_rel
-  ret void
+  atomicrmw sub i16* %a, i16 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_sub_acq_rel
-; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 4)
+; CHECK-LABEL: atomic16_sub_acq_rel
+; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 4), !dbg
 
 define void @atomic16_and_acq_rel(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw and i16* %a, i16 0 acq_rel
-  ret void
+  atomicrmw and i16* %a, i16 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_and_acq_rel
-; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 4)
+; CHECK-LABEL: atomic16_and_acq_rel
+; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 4), !dbg
 
 define void @atomic16_or_acq_rel(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw or i16* %a, i16 0 acq_rel
-  ret void
+  atomicrmw or i16* %a, i16 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_or_acq_rel
-; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 4)
+; CHECK-LABEL: atomic16_or_acq_rel
+; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 4), !dbg
 
 define void @atomic16_xor_acq_rel(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i16* %a, i16 0 acq_rel
-  ret void
+  atomicrmw xor i16* %a, i16 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_xor_acq_rel
-; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 4)
+; CHECK-LABEL: atomic16_xor_acq_rel
+; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 4), !dbg
 
 define void @atomic16_nand_acq_rel(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i16* %a, i16 0 acq_rel
-  ret void
+  atomicrmw nand i16* %a, i16 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_nand_acq_rel
-; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 4)
+; CHECK-LABEL: atomic16_nand_acq_rel
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 4), !dbg
 
 define void @atomic16_xchg_seq_cst(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i16* %a, i16 0 seq_cst
-  ret void
+  atomicrmw xchg i16* %a, i16 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_xchg_seq_cst
-; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 5)
+; CHECK-LABEL: atomic16_xchg_seq_cst
+; CHECK: call i16 @__tsan_atomic16_exchange(i16* %a, i16 0, i32 5), !dbg
 
 define void @atomic16_add_seq_cst(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw add i16* %a, i16 0 seq_cst
-  ret void
+  atomicrmw add i16* %a, i16 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_add_seq_cst
-; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 5)
+; CHECK-LABEL: atomic16_add_seq_cst
+; CHECK: call i16 @__tsan_atomic16_fetch_add(i16* %a, i16 0, i32 5), !dbg
 
 define void @atomic16_sub_seq_cst(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i16* %a, i16 0 seq_cst
-  ret void
+  atomicrmw sub i16* %a, i16 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_sub_seq_cst
-; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 5)
+; CHECK-LABEL: atomic16_sub_seq_cst
+; CHECK: call i16 @__tsan_atomic16_fetch_sub(i16* %a, i16 0, i32 5), !dbg
 
 define void @atomic16_and_seq_cst(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw and i16* %a, i16 0 seq_cst
-  ret void
+  atomicrmw and i16* %a, i16 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_and_seq_cst
-; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 5)
+; CHECK-LABEL: atomic16_and_seq_cst
+; CHECK: call i16 @__tsan_atomic16_fetch_and(i16* %a, i16 0, i32 5), !dbg
 
 define void @atomic16_or_seq_cst(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw or i16* %a, i16 0 seq_cst
-  ret void
+  atomicrmw or i16* %a, i16 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_or_seq_cst
-; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 5)
+; CHECK-LABEL: atomic16_or_seq_cst
+; CHECK: call i16 @__tsan_atomic16_fetch_or(i16* %a, i16 0, i32 5), !dbg
 
 define void @atomic16_xor_seq_cst(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i16* %a, i16 0 seq_cst
-  ret void
+  atomicrmw xor i16* %a, i16 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_xor_seq_cst
-; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 5)
+; CHECK-LABEL: atomic16_xor_seq_cst
+; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 5), !dbg
 
 define void @atomic16_nand_seq_cst(i16* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i16* %a, i16 0 seq_cst
-  ret void
+  atomicrmw nand i16* %a, i16 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_nand_seq_cst
-; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 5)
+; CHECK-LABEL: atomic16_nand_seq_cst
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 5), !dbg
 
 define void @atomic16_cas_monotonic(i16* %a) nounwind uwtable {
 entry:
-  cmpxchg i16* %a, i16 0, i16 1 monotonic monotonic
-  ret void
+  cmpxchg i16* %a, i16 0, i16 1 monotonic monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_cas_monotonic
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 0, i32 0)
+; CHECK-LABEL: atomic16_cas_monotonic
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 0, i32 0), !dbg
 
 define void @atomic16_cas_acquire(i16* %a) nounwind uwtable {
 entry:
-  cmpxchg i16* %a, i16 0, i16 1 acquire acquire
-  ret void
+  cmpxchg i16* %a, i16 0, i16 1 acquire acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_cas_acquire
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 2, i32 2)
+; CHECK-LABEL: atomic16_cas_acquire
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 2, i32 2), !dbg
 
 define void @atomic16_cas_release(i16* %a) nounwind uwtable {
 entry:
-  cmpxchg i16* %a, i16 0, i16 1 release monotonic
-  ret void
+  cmpxchg i16* %a, i16 0, i16 1 release monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_cas_release
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 3, i32 0)
+; CHECK-LABEL: atomic16_cas_release
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 3, i32 0), !dbg
 
 define void @atomic16_cas_acq_rel(i16* %a) nounwind uwtable {
 entry:
-  cmpxchg i16* %a, i16 0, i16 1 acq_rel acquire
-  ret void
+  cmpxchg i16* %a, i16 0, i16 1 acq_rel acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_cas_acq_rel
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 4, i32 2)
+; CHECK-LABEL: atomic16_cas_acq_rel
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 4, i32 2), !dbg
 
 define void @atomic16_cas_seq_cst(i16* %a) nounwind uwtable {
 entry:
-  cmpxchg i16* %a, i16 0, i16 1 seq_cst seq_cst
-  ret void
+  cmpxchg i16* %a, i16 0, i16 1 seq_cst seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic16_cas_seq_cst
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 5, i32 5)
+; CHECK-LABEL: atomic16_cas_seq_cst
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 5, i32 5), !dbg
 
 define i32 @atomic32_load_unordered(i32* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i32, i32* %a unordered, align 4
-  ret i32 %0
+  %0 = load atomic i32, i32* %a unordered, align 4, !dbg !7
+  ret i32 %0, !dbg !7
 }
-; CHECK: atomic32_load_unordered
-; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 0)
+; CHECK-LABEL: atomic32_load_unordered
+; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 0), !dbg
 
 define i32 @atomic32_load_monotonic(i32* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i32, i32* %a monotonic, align 4
-  ret i32 %0
+  %0 = load atomic i32, i32* %a monotonic, align 4, !dbg !7
+  ret i32 %0, !dbg !7
 }
-; CHECK: atomic32_load_monotonic
-; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 0)
+; CHECK-LABEL: atomic32_load_monotonic
+; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 0), !dbg
 
 define i32 @atomic32_load_acquire(i32* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i32, i32* %a acquire, align 4
-  ret i32 %0
+  %0 = load atomic i32, i32* %a acquire, align 4, !dbg !7
+  ret i32 %0, !dbg !7
 }
-; CHECK: atomic32_load_acquire
-; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 2)
+; CHECK-LABEL: atomic32_load_acquire
+; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 2), !dbg
 
 define i32 @atomic32_load_seq_cst(i32* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i32, i32* %a seq_cst, align 4
-  ret i32 %0
+  %0 = load atomic i32, i32* %a seq_cst, align 4, !dbg !7
+  ret i32 %0, !dbg !7
 }
-; CHECK: atomic32_load_seq_cst
-; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 5)
+; CHECK-LABEL: atomic32_load_seq_cst
+; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 5), !dbg
 
 define void @atomic32_store_unordered(i32* %a) nounwind uwtable {
 entry:
-  store atomic i32 0, i32* %a unordered, align 4
-  ret void
+  store atomic i32 0, i32* %a unordered, align 4, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_store_unordered
-; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 0)
+; CHECK-LABEL: atomic32_store_unordered
+; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 0), !dbg
 
 define void @atomic32_store_monotonic(i32* %a) nounwind uwtable {
 entry:
-  store atomic i32 0, i32* %a monotonic, align 4
-  ret void
+  store atomic i32 0, i32* %a monotonic, align 4, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_store_monotonic
-; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 0)
+; CHECK-LABEL: atomic32_store_monotonic
+; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 0), !dbg
 
 define void @atomic32_store_release(i32* %a) nounwind uwtable {
 entry:
-  store atomic i32 0, i32* %a release, align 4
-  ret void
+  store atomic i32 0, i32* %a release, align 4, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_store_release
-; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 3)
+; CHECK-LABEL: atomic32_store_release
+; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 3), !dbg
 
 define void @atomic32_store_seq_cst(i32* %a) nounwind uwtable {
 entry:
-  store atomic i32 0, i32* %a seq_cst, align 4
-  ret void
+  store atomic i32 0, i32* %a seq_cst, align 4, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_store_seq_cst
-; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 5)
+; CHECK-LABEL: atomic32_store_seq_cst
+; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 5), !dbg
 
 define void @atomic32_xchg_monotonic(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i32* %a, i32 0 monotonic
-  ret void
+  atomicrmw xchg i32* %a, i32 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_xchg_monotonic
-; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 0)
+; CHECK-LABEL: atomic32_xchg_monotonic
+; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 0), !dbg
 
 define void @atomic32_add_monotonic(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw add i32* %a, i32 0 monotonic
-  ret void
+  atomicrmw add i32* %a, i32 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_add_monotonic
-; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 0)
+; CHECK-LABEL: atomic32_add_monotonic
+; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 0), !dbg
 
 define void @atomic32_sub_monotonic(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i32* %a, i32 0 monotonic
-  ret void
+  atomicrmw sub i32* %a, i32 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_sub_monotonic
-; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 0)
+; CHECK-LABEL: atomic32_sub_monotonic
+; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 0), !dbg
 
 define void @atomic32_and_monotonic(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw and i32* %a, i32 0 monotonic
-  ret void
+  atomicrmw and i32* %a, i32 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_and_monotonic
-; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 0)
+; CHECK-LABEL: atomic32_and_monotonic
+; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 0), !dbg
 
 define void @atomic32_or_monotonic(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw or i32* %a, i32 0 monotonic
-  ret void
+  atomicrmw or i32* %a, i32 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_or_monotonic
-; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 0)
+; CHECK-LABEL: atomic32_or_monotonic
+; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 0), !dbg
 
 define void @atomic32_xor_monotonic(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i32* %a, i32 0 monotonic
-  ret void
+  atomicrmw xor i32* %a, i32 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_xor_monotonic
-; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 0)
+; CHECK-LABEL: atomic32_xor_monotonic
+; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 0), !dbg
 
 define void @atomic32_nand_monotonic(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i32* %a, i32 0 monotonic
-  ret void
+  atomicrmw nand i32* %a, i32 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_nand_monotonic
-; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 0)
+; CHECK-LABEL: atomic32_nand_monotonic
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 0), !dbg
 
 define void @atomic32_xchg_acquire(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i32* %a, i32 0 acquire
-  ret void
+  atomicrmw xchg i32* %a, i32 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_xchg_acquire
-; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 2)
+; CHECK-LABEL: atomic32_xchg_acquire
+; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 2), !dbg
 
 define void @atomic32_add_acquire(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw add i32* %a, i32 0 acquire
-  ret void
+  atomicrmw add i32* %a, i32 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_add_acquire
-; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 2)
+; CHECK-LABEL: atomic32_add_acquire
+; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 2), !dbg
 
 define void @atomic32_sub_acquire(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i32* %a, i32 0 acquire
-  ret void
+  atomicrmw sub i32* %a, i32 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_sub_acquire
-; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 2)
+; CHECK-LABEL: atomic32_sub_acquire
+; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 2), !dbg
 
 define void @atomic32_and_acquire(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw and i32* %a, i32 0 acquire
-  ret void
+  atomicrmw and i32* %a, i32 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_and_acquire
-; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 2)
+; CHECK-LABEL: atomic32_and_acquire
+; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 2), !dbg
 
 define void @atomic32_or_acquire(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw or i32* %a, i32 0 acquire
-  ret void
+  atomicrmw or i32* %a, i32 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_or_acquire
-; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 2)
+; CHECK-LABEL: atomic32_or_acquire
+; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 2), !dbg
 
 define void @atomic32_xor_acquire(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i32* %a, i32 0 acquire
-  ret void
+  atomicrmw xor i32* %a, i32 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_xor_acquire
-; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 2)
+; CHECK-LABEL: atomic32_xor_acquire
+; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 2), !dbg
 
 define void @atomic32_nand_acquire(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i32* %a, i32 0 acquire
-  ret void
+  atomicrmw nand i32* %a, i32 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_nand_acquire
-; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 2)
+; CHECK-LABEL: atomic32_nand_acquire
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 2), !dbg
 
 define void @atomic32_xchg_release(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i32* %a, i32 0 release
-  ret void
+  atomicrmw xchg i32* %a, i32 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_xchg_release
-; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 3)
+; CHECK-LABEL: atomic32_xchg_release
+; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 3), !dbg
 
 define void @atomic32_add_release(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw add i32* %a, i32 0 release
-  ret void
+  atomicrmw add i32* %a, i32 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_add_release
-; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 3)
+; CHECK-LABEL: atomic32_add_release
+; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 3), !dbg
 
 define void @atomic32_sub_release(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i32* %a, i32 0 release
-  ret void
+  atomicrmw sub i32* %a, i32 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_sub_release
-; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 3)
+; CHECK-LABEL: atomic32_sub_release
+; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 3), !dbg
 
 define void @atomic32_and_release(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw and i32* %a, i32 0 release
-  ret void
+  atomicrmw and i32* %a, i32 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_and_release
-; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 3)
+; CHECK-LABEL: atomic32_and_release
+; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 3), !dbg
 
 define void @atomic32_or_release(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw or i32* %a, i32 0 release
-  ret void
+  atomicrmw or i32* %a, i32 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_or_release
-; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 3)
+; CHECK-LABEL: atomic32_or_release
+; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 3), !dbg
 
 define void @atomic32_xor_release(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i32* %a, i32 0 release
-  ret void
+  atomicrmw xor i32* %a, i32 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_xor_release
-; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 3)
+; CHECK-LABEL: atomic32_xor_release
+; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 3), !dbg
 
 define void @atomic32_nand_release(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i32* %a, i32 0 release
-  ret void
+  atomicrmw nand i32* %a, i32 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_nand_release
-; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 3)
+; CHECK-LABEL: atomic32_nand_release
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 3), !dbg
 
 define void @atomic32_xchg_acq_rel(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i32* %a, i32 0 acq_rel
-  ret void
+  atomicrmw xchg i32* %a, i32 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_xchg_acq_rel
-; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 4)
+; CHECK-LABEL: atomic32_xchg_acq_rel
+; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 4), !dbg
 
 define void @atomic32_add_acq_rel(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw add i32* %a, i32 0 acq_rel
-  ret void
+  atomicrmw add i32* %a, i32 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_add_acq_rel
-; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 4)
+; CHECK-LABEL: atomic32_add_acq_rel
+; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 4), !dbg
 
 define void @atomic32_sub_acq_rel(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i32* %a, i32 0 acq_rel
-  ret void
+  atomicrmw sub i32* %a, i32 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_sub_acq_rel
-; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 4)
+; CHECK-LABEL: atomic32_sub_acq_rel
+; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 4), !dbg
 
 define void @atomic32_and_acq_rel(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw and i32* %a, i32 0 acq_rel
-  ret void
+  atomicrmw and i32* %a, i32 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_and_acq_rel
-; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 4)
+; CHECK-LABEL: atomic32_and_acq_rel
+; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 4), !dbg
 
 define void @atomic32_or_acq_rel(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw or i32* %a, i32 0 acq_rel
-  ret void
+  atomicrmw or i32* %a, i32 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_or_acq_rel
-; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 4)
+; CHECK-LABEL: atomic32_or_acq_rel
+; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 4), !dbg
 
 define void @atomic32_xor_acq_rel(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i32* %a, i32 0 acq_rel
-  ret void
+  atomicrmw xor i32* %a, i32 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_xor_acq_rel
-; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 4)
+; CHECK-LABEL: atomic32_xor_acq_rel
+; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 4), !dbg
 
 define void @atomic32_nand_acq_rel(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i32* %a, i32 0 acq_rel
-  ret void
+  atomicrmw nand i32* %a, i32 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_nand_acq_rel
-; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 4)
+; CHECK-LABEL: atomic32_nand_acq_rel
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 4), !dbg
 
 define void @atomic32_xchg_seq_cst(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i32* %a, i32 0 seq_cst
-  ret void
+  atomicrmw xchg i32* %a, i32 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_xchg_seq_cst
-; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 5)
+; CHECK-LABEL: atomic32_xchg_seq_cst
+; CHECK: call i32 @__tsan_atomic32_exchange(i32* %a, i32 0, i32 5), !dbg
 
 define void @atomic32_add_seq_cst(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw add i32* %a, i32 0 seq_cst
-  ret void
+  atomicrmw add i32* %a, i32 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_add_seq_cst
-; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 5)
+; CHECK-LABEL: atomic32_add_seq_cst
+; CHECK: call i32 @__tsan_atomic32_fetch_add(i32* %a, i32 0, i32 5), !dbg
 
 define void @atomic32_sub_seq_cst(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i32* %a, i32 0 seq_cst
-  ret void
+  atomicrmw sub i32* %a, i32 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_sub_seq_cst
-; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 5)
+; CHECK-LABEL: atomic32_sub_seq_cst
+; CHECK: call i32 @__tsan_atomic32_fetch_sub(i32* %a, i32 0, i32 5), !dbg
 
 define void @atomic32_and_seq_cst(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw and i32* %a, i32 0 seq_cst
-  ret void
+  atomicrmw and i32* %a, i32 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_and_seq_cst
-; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 5)
+; CHECK-LABEL: atomic32_and_seq_cst
+; CHECK: call i32 @__tsan_atomic32_fetch_and(i32* %a, i32 0, i32 5), !dbg
 
 define void @atomic32_or_seq_cst(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw or i32* %a, i32 0 seq_cst
-  ret void
+  atomicrmw or i32* %a, i32 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_or_seq_cst
-; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 5)
+; CHECK-LABEL: atomic32_or_seq_cst
+; CHECK: call i32 @__tsan_atomic32_fetch_or(i32* %a, i32 0, i32 5), !dbg
 
 define void @atomic32_xor_seq_cst(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i32* %a, i32 0 seq_cst
-  ret void
+  atomicrmw xor i32* %a, i32 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_xor_seq_cst
-; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 5)
+; CHECK-LABEL: atomic32_xor_seq_cst
+; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 5), !dbg
 
 define void @atomic32_nand_seq_cst(i32* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i32* %a, i32 0 seq_cst
-  ret void
+  atomicrmw nand i32* %a, i32 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_nand_seq_cst
-; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 5)
+; CHECK-LABEL: atomic32_nand_seq_cst
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 5), !dbg
 
 define void @atomic32_cas_monotonic(i32* %a) nounwind uwtable {
 entry:
-  cmpxchg i32* %a, i32 0, i32 1 monotonic monotonic
-  ret void
+  cmpxchg i32* %a, i32 0, i32 1 monotonic monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_cas_monotonic
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 0, i32 0)
+; CHECK-LABEL: atomic32_cas_monotonic
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 0, i32 0), !dbg
 
 define void @atomic32_cas_acquire(i32* %a) nounwind uwtable {
 entry:
-  cmpxchg i32* %a, i32 0, i32 1 acquire acquire
-  ret void
+  cmpxchg i32* %a, i32 0, i32 1 acquire acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_cas_acquire
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 2, i32 2)
+; CHECK-LABEL: atomic32_cas_acquire
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 2, i32 2), !dbg
 
 define void @atomic32_cas_release(i32* %a) nounwind uwtable {
 entry:
-  cmpxchg i32* %a, i32 0, i32 1 release monotonic
-  ret void
+  cmpxchg i32* %a, i32 0, i32 1 release monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_cas_release
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 3, i32 0)
+; CHECK-LABEL: atomic32_cas_release
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 3, i32 0), !dbg
 
 define void @atomic32_cas_acq_rel(i32* %a) nounwind uwtable {
 entry:
-  cmpxchg i32* %a, i32 0, i32 1 acq_rel acquire
-  ret void
+  cmpxchg i32* %a, i32 0, i32 1 acq_rel acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_cas_acq_rel
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 4, i32 2)
+; CHECK-LABEL: atomic32_cas_acq_rel
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 4, i32 2), !dbg
 
 define void @atomic32_cas_seq_cst(i32* %a) nounwind uwtable {
 entry:
-  cmpxchg i32* %a, i32 0, i32 1 seq_cst seq_cst
-  ret void
+  cmpxchg i32* %a, i32 0, i32 1 seq_cst seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic32_cas_seq_cst
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 5, i32 5)
+; CHECK-LABEL: atomic32_cas_seq_cst
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 5, i32 5), !dbg
 
 define i64 @atomic64_load_unordered(i64* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i64, i64* %a unordered, align 8
-  ret i64 %0
+  %0 = load atomic i64, i64* %a unordered, align 8, !dbg !7
+  ret i64 %0, !dbg !7
 }
-; CHECK: atomic64_load_unordered
-; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 0)
+; CHECK-LABEL: atomic64_load_unordered
+; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 0), !dbg
 
 define i64 @atomic64_load_monotonic(i64* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i64, i64* %a monotonic, align 8
-  ret i64 %0
+  %0 = load atomic i64, i64* %a monotonic, align 8, !dbg !7
+  ret i64 %0, !dbg !7
 }
-; CHECK: atomic64_load_monotonic
-; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 0)
+; CHECK-LABEL: atomic64_load_monotonic
+; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 0), !dbg
 
 define i64 @atomic64_load_acquire(i64* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i64, i64* %a acquire, align 8
-  ret i64 %0
+  %0 = load atomic i64, i64* %a acquire, align 8, !dbg !7
+  ret i64 %0, !dbg !7
 }
-; CHECK: atomic64_load_acquire
-; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 2)
+; CHECK-LABEL: atomic64_load_acquire
+; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 2), !dbg
 
 define i64 @atomic64_load_seq_cst(i64* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i64, i64* %a seq_cst, align 8
-  ret i64 %0
+  %0 = load atomic i64, i64* %a seq_cst, align 8, !dbg !7
+  ret i64 %0, !dbg !7
 }
-; CHECK: atomic64_load_seq_cst
-; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 5)
+; CHECK-LABEL: atomic64_load_seq_cst
+; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 5), !dbg
 
 define void @atomic64_store_unordered(i64* %a) nounwind uwtable {
 entry:
-  store atomic i64 0, i64* %a unordered, align 8
-  ret void
+  store atomic i64 0, i64* %a unordered, align 8, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_store_unordered
-; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 0)
+; CHECK-LABEL: atomic64_store_unordered
+; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 0), !dbg
 
 define void @atomic64_store_monotonic(i64* %a) nounwind uwtable {
 entry:
-  store atomic i64 0, i64* %a monotonic, align 8
-  ret void
+  store atomic i64 0, i64* %a monotonic, align 8, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_store_monotonic
-; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 0)
+; CHECK-LABEL: atomic64_store_monotonic
+; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 0), !dbg
 
 define void @atomic64_store_release(i64* %a) nounwind uwtable {
 entry:
-  store atomic i64 0, i64* %a release, align 8
-  ret void
+  store atomic i64 0, i64* %a release, align 8, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_store_release
-; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 3)
+; CHECK-LABEL: atomic64_store_release
+; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 3), !dbg
 
 define void @atomic64_store_seq_cst(i64* %a) nounwind uwtable {
 entry:
-  store atomic i64 0, i64* %a seq_cst, align 8
-  ret void
+  store atomic i64 0, i64* %a seq_cst, align 8, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_store_seq_cst
-; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 5)
+; CHECK-LABEL: atomic64_store_seq_cst
+; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 5), !dbg
 
 define void @atomic64_xchg_monotonic(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i64* %a, i64 0 monotonic
-  ret void
+  atomicrmw xchg i64* %a, i64 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_xchg_monotonic
-; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 0)
+; CHECK-LABEL: atomic64_xchg_monotonic
+; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 0), !dbg
 
 define void @atomic64_add_monotonic(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw add i64* %a, i64 0 monotonic
-  ret void
+  atomicrmw add i64* %a, i64 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_add_monotonic
-; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 0)
+; CHECK-LABEL: atomic64_add_monotonic
+; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 0), !dbg
 
 define void @atomic64_sub_monotonic(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i64* %a, i64 0 monotonic
-  ret void
+  atomicrmw sub i64* %a, i64 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_sub_monotonic
-; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 0)
+; CHECK-LABEL: atomic64_sub_monotonic
+; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 0), !dbg
 
 define void @atomic64_and_monotonic(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw and i64* %a, i64 0 monotonic
-  ret void
+  atomicrmw and i64* %a, i64 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_and_monotonic
-; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 0)
+; CHECK-LABEL: atomic64_and_monotonic
+; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 0), !dbg
 
 define void @atomic64_or_monotonic(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw or i64* %a, i64 0 monotonic
-  ret void
+  atomicrmw or i64* %a, i64 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_or_monotonic
-; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 0)
+; CHECK-LABEL: atomic64_or_monotonic
+; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 0), !dbg
 
 define void @atomic64_xor_monotonic(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i64* %a, i64 0 monotonic
-  ret void
+  atomicrmw xor i64* %a, i64 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_xor_monotonic
-; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 0)
+; CHECK-LABEL: atomic64_xor_monotonic
+; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 0), !dbg
 
 define void @atomic64_nand_monotonic(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i64* %a, i64 0 monotonic
-  ret void
+  atomicrmw nand i64* %a, i64 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_nand_monotonic
-; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 0)
+; CHECK-LABEL: atomic64_nand_monotonic
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 0), !dbg
 
 define void @atomic64_xchg_acquire(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i64* %a, i64 0 acquire
-  ret void
+  atomicrmw xchg i64* %a, i64 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_xchg_acquire
-; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 2)
+; CHECK-LABEL: atomic64_xchg_acquire
+; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 2), !dbg
 
 define void @atomic64_add_acquire(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw add i64* %a, i64 0 acquire
-  ret void
+  atomicrmw add i64* %a, i64 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_add_acquire
-; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 2)
+; CHECK-LABEL: atomic64_add_acquire
+; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 2), !dbg
 
 define void @atomic64_sub_acquire(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i64* %a, i64 0 acquire
-  ret void
+  atomicrmw sub i64* %a, i64 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_sub_acquire
-; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 2)
+; CHECK-LABEL: atomic64_sub_acquire
+; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 2), !dbg
 
 define void @atomic64_and_acquire(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw and i64* %a, i64 0 acquire
-  ret void
+  atomicrmw and i64* %a, i64 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_and_acquire
-; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 2)
+; CHECK-LABEL: atomic64_and_acquire
+; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 2), !dbg
 
 define void @atomic64_or_acquire(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw or i64* %a, i64 0 acquire
-  ret void
+  atomicrmw or i64* %a, i64 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_or_acquire
-; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 2)
+; CHECK-LABEL: atomic64_or_acquire
+; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 2), !dbg
 
 define void @atomic64_xor_acquire(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i64* %a, i64 0 acquire
-  ret void
+  atomicrmw xor i64* %a, i64 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_xor_acquire
-; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 2)
+; CHECK-LABEL: atomic64_xor_acquire
+; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 2), !dbg
 
 define void @atomic64_nand_acquire(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i64* %a, i64 0 acquire
-  ret void
+  atomicrmw nand i64* %a, i64 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_nand_acquire
-; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 2)
+; CHECK-LABEL: atomic64_nand_acquire
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 2), !dbg
 
 define void @atomic64_xchg_release(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i64* %a, i64 0 release
-  ret void
+  atomicrmw xchg i64* %a, i64 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_xchg_release
-; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 3)
+; CHECK-LABEL: atomic64_xchg_release
+; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 3), !dbg
 
 define void @atomic64_add_release(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw add i64* %a, i64 0 release
-  ret void
+  atomicrmw add i64* %a, i64 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_add_release
-; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 3)
+; CHECK-LABEL: atomic64_add_release
+; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 3), !dbg
 
 define void @atomic64_sub_release(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i64* %a, i64 0 release
-  ret void
+  atomicrmw sub i64* %a, i64 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_sub_release
-; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 3)
+; CHECK-LABEL: atomic64_sub_release
+; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 3), !dbg
 
 define void @atomic64_and_release(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw and i64* %a, i64 0 release
-  ret void
+  atomicrmw and i64* %a, i64 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_and_release
-; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 3)
+; CHECK-LABEL: atomic64_and_release
+; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 3), !dbg
 
 define void @atomic64_or_release(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw or i64* %a, i64 0 release
-  ret void
+  atomicrmw or i64* %a, i64 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_or_release
-; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 3)
+; CHECK-LABEL: atomic64_or_release
+; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 3), !dbg
 
 define void @atomic64_xor_release(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i64* %a, i64 0 release
-  ret void
+  atomicrmw xor i64* %a, i64 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_xor_release
-; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 3)
+; CHECK-LABEL: atomic64_xor_release
+; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 3), !dbg
 
 define void @atomic64_nand_release(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i64* %a, i64 0 release
-  ret void
+  atomicrmw nand i64* %a, i64 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_nand_release
-; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 3)
+; CHECK-LABEL: atomic64_nand_release
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 3), !dbg
 
 define void @atomic64_xchg_acq_rel(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i64* %a, i64 0 acq_rel
-  ret void
+  atomicrmw xchg i64* %a, i64 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_xchg_acq_rel
-; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 4)
+; CHECK-LABEL: atomic64_xchg_acq_rel
+; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 4), !dbg
 
 define void @atomic64_add_acq_rel(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw add i64* %a, i64 0 acq_rel
-  ret void
+  atomicrmw add i64* %a, i64 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_add_acq_rel
-; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 4)
+; CHECK-LABEL: atomic64_add_acq_rel
+; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 4), !dbg
 
 define void @atomic64_sub_acq_rel(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i64* %a, i64 0 acq_rel
-  ret void
+  atomicrmw sub i64* %a, i64 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_sub_acq_rel
-; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 4)
+; CHECK-LABEL: atomic64_sub_acq_rel
+; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 4), !dbg
 
 define void @atomic64_and_acq_rel(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw and i64* %a, i64 0 acq_rel
-  ret void
+  atomicrmw and i64* %a, i64 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_and_acq_rel
-; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 4)
+; CHECK-LABEL: atomic64_and_acq_rel
+; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 4), !dbg
 
 define void @atomic64_or_acq_rel(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw or i64* %a, i64 0 acq_rel
-  ret void
+  atomicrmw or i64* %a, i64 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_or_acq_rel
-; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 4)
+; CHECK-LABEL: atomic64_or_acq_rel
+; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 4), !dbg
 
 define void @atomic64_xor_acq_rel(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i64* %a, i64 0 acq_rel
-  ret void
+  atomicrmw xor i64* %a, i64 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_xor_acq_rel
-; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 4)
+; CHECK-LABEL: atomic64_xor_acq_rel
+; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 4), !dbg
 
 define void @atomic64_nand_acq_rel(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i64* %a, i64 0 acq_rel
-  ret void
+  atomicrmw nand i64* %a, i64 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_nand_acq_rel
-; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 4)
+; CHECK-LABEL: atomic64_nand_acq_rel
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 4), !dbg
 
 define void @atomic64_xchg_seq_cst(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i64* %a, i64 0 seq_cst
-  ret void
+  atomicrmw xchg i64* %a, i64 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_xchg_seq_cst
-; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 5)
+; CHECK-LABEL: atomic64_xchg_seq_cst
+; CHECK: call i64 @__tsan_atomic64_exchange(i64* %a, i64 0, i32 5), !dbg
 
 define void @atomic64_add_seq_cst(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw add i64* %a, i64 0 seq_cst
-  ret void
+  atomicrmw add i64* %a, i64 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_add_seq_cst
-; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 5)
+; CHECK-LABEL: atomic64_add_seq_cst
+; CHECK: call i64 @__tsan_atomic64_fetch_add(i64* %a, i64 0, i32 5), !dbg
 
 define void @atomic64_sub_seq_cst(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i64* %a, i64 0 seq_cst
-  ret void
+  atomicrmw sub i64* %a, i64 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_sub_seq_cst
-; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 5)
+; CHECK-LABEL: atomic64_sub_seq_cst
+; CHECK: call i64 @__tsan_atomic64_fetch_sub(i64* %a, i64 0, i32 5), !dbg
 
 define void @atomic64_and_seq_cst(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw and i64* %a, i64 0 seq_cst
-  ret void
+  atomicrmw and i64* %a, i64 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_and_seq_cst
-; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 5)
+; CHECK-LABEL: atomic64_and_seq_cst
+; CHECK: call i64 @__tsan_atomic64_fetch_and(i64* %a, i64 0, i32 5), !dbg
 
 define void @atomic64_or_seq_cst(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw or i64* %a, i64 0 seq_cst
-  ret void
+  atomicrmw or i64* %a, i64 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_or_seq_cst
-; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 5)
+; CHECK-LABEL: atomic64_or_seq_cst
+; CHECK: call i64 @__tsan_atomic64_fetch_or(i64* %a, i64 0, i32 5), !dbg
 
 define void @atomic64_xor_seq_cst(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i64* %a, i64 0 seq_cst
-  ret void
+  atomicrmw xor i64* %a, i64 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_xor_seq_cst
-; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 5)
+; CHECK-LABEL: atomic64_xor_seq_cst
+; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 5), !dbg
 
 define void @atomic64_nand_seq_cst(i64* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i64* %a, i64 0 seq_cst
-  ret void
+  atomicrmw nand i64* %a, i64 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_nand_seq_cst
-; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 5)
+; CHECK-LABEL: atomic64_nand_seq_cst
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 5), !dbg
 
 define void @atomic64_cas_monotonic(i64* %a) nounwind uwtable {
 entry:
-  cmpxchg i64* %a, i64 0, i64 1 monotonic monotonic
-  ret void
+  cmpxchg i64* %a, i64 0, i64 1 monotonic monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_cas_monotonic
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 0, i32 0)
+; CHECK-LABEL: atomic64_cas_monotonic
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 0, i32 0), !dbg
 
 define void @atomic64_cas_acquire(i64* %a) nounwind uwtable {
 entry:
-  cmpxchg i64* %a, i64 0, i64 1 acquire acquire
-  ret void
+  cmpxchg i64* %a, i64 0, i64 1 acquire acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_cas_acquire
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 2, i32 2)
+; CHECK-LABEL: atomic64_cas_acquire
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 2, i32 2), !dbg
 
 define void @atomic64_cas_release(i64* %a) nounwind uwtable {
 entry:
-  cmpxchg i64* %a, i64 0, i64 1 release monotonic
-  ret void
+  cmpxchg i64* %a, i64 0, i64 1 release monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_cas_release
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 3, i32 0)
+; CHECK-LABEL: atomic64_cas_release
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 3, i32 0), !dbg
 
 define void @atomic64_cas_acq_rel(i64* %a) nounwind uwtable {
 entry:
-  cmpxchg i64* %a, i64 0, i64 1 acq_rel acquire
-  ret void
+  cmpxchg i64* %a, i64 0, i64 1 acq_rel acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_cas_acq_rel
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 4, i32 2)
+; CHECK-LABEL: atomic64_cas_acq_rel
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 4, i32 2), !dbg
 
 define void @atomic64_cas_seq_cst(i64* %a) nounwind uwtable {
 entry:
-  cmpxchg i64* %a, i64 0, i64 1 seq_cst seq_cst
-  ret void
+  cmpxchg i64* %a, i64 0, i64 1 seq_cst seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic64_cas_seq_cst
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 5, i32 5)
+; CHECK-LABEL: atomic64_cas_seq_cst
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 5, i32 5), !dbg
 
 define i128 @atomic128_load_unordered(i128* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i128, i128* %a unordered, align 16
-  ret i128 %0
+  %0 = load atomic i128, i128* %a unordered, align 16, !dbg !7
+  ret i128 %0, !dbg !7
 }
-; CHECK: atomic128_load_unordered
-; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 0)
+; CHECK-LABEL: atomic128_load_unordered
+; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 0), !dbg
 
 define i128 @atomic128_load_monotonic(i128* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i128, i128* %a monotonic, align 16
-  ret i128 %0
+  %0 = load atomic i128, i128* %a monotonic, align 16, !dbg !7
+  ret i128 %0, !dbg !7
 }
-; CHECK: atomic128_load_monotonic
-; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 0)
+; CHECK-LABEL: atomic128_load_monotonic
+; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 0), !dbg
 
 define i128 @atomic128_load_acquire(i128* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i128, i128* %a acquire, align 16
-  ret i128 %0
+  %0 = load atomic i128, i128* %a acquire, align 16, !dbg !7
+  ret i128 %0, !dbg !7
 }
-; CHECK: atomic128_load_acquire
-; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 2)
+; CHECK-LABEL: atomic128_load_acquire
+; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 2), !dbg
 
 define i128 @atomic128_load_seq_cst(i128* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i128, i128* %a seq_cst, align 16
-  ret i128 %0
+  %0 = load atomic i128, i128* %a seq_cst, align 16, !dbg !7
+  ret i128 %0, !dbg !7
 }
-; CHECK: atomic128_load_seq_cst
-; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 5)
+; CHECK-LABEL: atomic128_load_seq_cst
+; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 5), !dbg
 
 define void @atomic128_store_unordered(i128* %a) nounwind uwtable {
 entry:
-  store atomic i128 0, i128* %a unordered, align 16
-  ret void
+  store atomic i128 0, i128* %a unordered, align 16, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_store_unordered
-; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 0)
+; CHECK-LABEL: atomic128_store_unordered
+; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 0), !dbg
 
 define void @atomic128_store_monotonic(i128* %a) nounwind uwtable {
 entry:
-  store atomic i128 0, i128* %a monotonic, align 16
-  ret void
+  store atomic i128 0, i128* %a monotonic, align 16, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_store_monotonic
-; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 0)
+; CHECK-LABEL: atomic128_store_monotonic
+; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 0), !dbg
 
 define void @atomic128_store_release(i128* %a) nounwind uwtable {
 entry:
-  store atomic i128 0, i128* %a release, align 16
-  ret void
+  store atomic i128 0, i128* %a release, align 16, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_store_release
-; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 3)
+; CHECK-LABEL: atomic128_store_release
+; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 3), !dbg
 
 define void @atomic128_store_seq_cst(i128* %a) nounwind uwtable {
 entry:
-  store atomic i128 0, i128* %a seq_cst, align 16
-  ret void
+  store atomic i128 0, i128* %a seq_cst, align 16, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_store_seq_cst
-; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 5)
+; CHECK-LABEL: atomic128_store_seq_cst
+; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 5), !dbg
 
 define void @atomic128_xchg_monotonic(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i128* %a, i128 0 monotonic
-  ret void
+  atomicrmw xchg i128* %a, i128 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_xchg_monotonic
-; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 0)
+; CHECK-LABEL: atomic128_xchg_monotonic
+; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 0), !dbg
 
 define void @atomic128_add_monotonic(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw add i128* %a, i128 0 monotonic
-  ret void
+  atomicrmw add i128* %a, i128 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_add_monotonic
-; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 0)
+; CHECK-LABEL: atomic128_add_monotonic
+; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 0), !dbg
 
 define void @atomic128_sub_monotonic(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i128* %a, i128 0 monotonic
-  ret void
+  atomicrmw sub i128* %a, i128 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_sub_monotonic
-; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 0)
+; CHECK-LABEL: atomic128_sub_monotonic
+; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 0), !dbg
 
 define void @atomic128_and_monotonic(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw and i128* %a, i128 0 monotonic
-  ret void
+  atomicrmw and i128* %a, i128 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_and_monotonic
-; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 0)
+; CHECK-LABEL: atomic128_and_monotonic
+; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 0), !dbg
 
 define void @atomic128_or_monotonic(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw or i128* %a, i128 0 monotonic
-  ret void
+  atomicrmw or i128* %a, i128 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_or_monotonic
-; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 0)
+; CHECK-LABEL: atomic128_or_monotonic
+; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 0), !dbg
 
 define void @atomic128_xor_monotonic(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i128* %a, i128 0 monotonic
-  ret void
+  atomicrmw xor i128* %a, i128 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_xor_monotonic
-; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 0)
+; CHECK-LABEL: atomic128_xor_monotonic
+; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 0), !dbg
 
 define void @atomic128_nand_monotonic(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i128* %a, i128 0 monotonic
-  ret void
+  atomicrmw nand i128* %a, i128 0 monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_nand_monotonic
-; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 0)
+; CHECK-LABEL: atomic128_nand_monotonic
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 0), !dbg
 
 define void @atomic128_xchg_acquire(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i128* %a, i128 0 acquire
-  ret void
+  atomicrmw xchg i128* %a, i128 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_xchg_acquire
-; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 2)
+; CHECK-LABEL: atomic128_xchg_acquire
+; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 2), !dbg
 
 define void @atomic128_add_acquire(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw add i128* %a, i128 0 acquire
-  ret void
+  atomicrmw add i128* %a, i128 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_add_acquire
-; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 2)
+; CHECK-LABEL: atomic128_add_acquire
+; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 2), !dbg
 
 define void @atomic128_sub_acquire(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i128* %a, i128 0 acquire
-  ret void
+  atomicrmw sub i128* %a, i128 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_sub_acquire
-; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 2)
+; CHECK-LABEL: atomic128_sub_acquire
+; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 2), !dbg
 
 define void @atomic128_and_acquire(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw and i128* %a, i128 0 acquire
-  ret void
+  atomicrmw and i128* %a, i128 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_and_acquire
-; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 2)
+; CHECK-LABEL: atomic128_and_acquire
+; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 2), !dbg
 
 define void @atomic128_or_acquire(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw or i128* %a, i128 0 acquire
-  ret void
+  atomicrmw or i128* %a, i128 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_or_acquire
-; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 2)
+; CHECK-LABEL: atomic128_or_acquire
+; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 2), !dbg
 
 define void @atomic128_xor_acquire(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i128* %a, i128 0 acquire
-  ret void
+  atomicrmw xor i128* %a, i128 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_xor_acquire
-; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 2)
+; CHECK-LABEL: atomic128_xor_acquire
+; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 2), !dbg
 
 define void @atomic128_nand_acquire(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i128* %a, i128 0 acquire
-  ret void
+  atomicrmw nand i128* %a, i128 0 acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_nand_acquire
-; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 2)
+; CHECK-LABEL: atomic128_nand_acquire
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 2), !dbg
 
 define void @atomic128_xchg_release(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i128* %a, i128 0 release
-  ret void
+  atomicrmw xchg i128* %a, i128 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_xchg_release
-; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 3)
+; CHECK-LABEL: atomic128_xchg_release
+; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 3), !dbg
 
 define void @atomic128_add_release(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw add i128* %a, i128 0 release
-  ret void
+  atomicrmw add i128* %a, i128 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_add_release
-; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 3)
+; CHECK-LABEL: atomic128_add_release
+; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 3), !dbg
 
 define void @atomic128_sub_release(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i128* %a, i128 0 release
-  ret void
+  atomicrmw sub i128* %a, i128 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_sub_release
-; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 3)
+; CHECK-LABEL: atomic128_sub_release
+; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 3), !dbg
 
 define void @atomic128_and_release(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw and i128* %a, i128 0 release
-  ret void
+  atomicrmw and i128* %a, i128 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_and_release
-; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 3)
+; CHECK-LABEL: atomic128_and_release
+; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 3), !dbg
 
 define void @atomic128_or_release(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw or i128* %a, i128 0 release
-  ret void
+  atomicrmw or i128* %a, i128 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_or_release
-; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 3)
+; CHECK-LABEL: atomic128_or_release
+; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 3), !dbg
 
 define void @atomic128_xor_release(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i128* %a, i128 0 release
-  ret void
+  atomicrmw xor i128* %a, i128 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_xor_release
-; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 3)
+; CHECK-LABEL: atomic128_xor_release
+; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 3), !dbg
 
 define void @atomic128_nand_release(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i128* %a, i128 0 release
-  ret void
+  atomicrmw nand i128* %a, i128 0 release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_nand_release
-; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 3)
+; CHECK-LABEL: atomic128_nand_release
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 3), !dbg
 
 define void @atomic128_xchg_acq_rel(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i128* %a, i128 0 acq_rel
-  ret void
+  atomicrmw xchg i128* %a, i128 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_xchg_acq_rel
-; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 4)
+; CHECK-LABEL: atomic128_xchg_acq_rel
+; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 4), !dbg
 
 define void @atomic128_add_acq_rel(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw add i128* %a, i128 0 acq_rel
-  ret void
+  atomicrmw add i128* %a, i128 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_add_acq_rel
-; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 4)
+; CHECK-LABEL: atomic128_add_acq_rel
+; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 4), !dbg
 
 define void @atomic128_sub_acq_rel(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i128* %a, i128 0 acq_rel
-  ret void
+  atomicrmw sub i128* %a, i128 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_sub_acq_rel
-; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 4)
+; CHECK-LABEL: atomic128_sub_acq_rel
+; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 4), !dbg
 
 define void @atomic128_and_acq_rel(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw and i128* %a, i128 0 acq_rel
-  ret void
+  atomicrmw and i128* %a, i128 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_and_acq_rel
-; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 4)
+; CHECK-LABEL: atomic128_and_acq_rel
+; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 4), !dbg
 
 define void @atomic128_or_acq_rel(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw or i128* %a, i128 0 acq_rel
-  ret void
+  atomicrmw or i128* %a, i128 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_or_acq_rel
-; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 4)
+; CHECK-LABEL: atomic128_or_acq_rel
+; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 4), !dbg
 
 define void @atomic128_xor_acq_rel(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i128* %a, i128 0 acq_rel
-  ret void
+  atomicrmw xor i128* %a, i128 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_xor_acq_rel
-; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 4)
+; CHECK-LABEL: atomic128_xor_acq_rel
+; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 4), !dbg
 
 define void @atomic128_nand_acq_rel(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i128* %a, i128 0 acq_rel
-  ret void
+  atomicrmw nand i128* %a, i128 0 acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_nand_acq_rel
-; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 4)
+; CHECK-LABEL: atomic128_nand_acq_rel
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 4), !dbg
 
 define void @atomic128_xchg_seq_cst(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw xchg i128* %a, i128 0 seq_cst
-  ret void
+  atomicrmw xchg i128* %a, i128 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_xchg_seq_cst
-; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 5)
+; CHECK-LABEL: atomic128_xchg_seq_cst
+; CHECK: call i128 @__tsan_atomic128_exchange(i128* %a, i128 0, i32 5), !dbg
 
 define void @atomic128_add_seq_cst(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw add i128* %a, i128 0 seq_cst
-  ret void
+  atomicrmw add i128* %a, i128 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_add_seq_cst
-; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 5)
+; CHECK-LABEL: atomic128_add_seq_cst
+; CHECK: call i128 @__tsan_atomic128_fetch_add(i128* %a, i128 0, i32 5), !dbg
 
 define void @atomic128_sub_seq_cst(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw sub i128* %a, i128 0 seq_cst
-  ret void
+  atomicrmw sub i128* %a, i128 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_sub_seq_cst
-; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 5)
+; CHECK-LABEL: atomic128_sub_seq_cst
+; CHECK: call i128 @__tsan_atomic128_fetch_sub(i128* %a, i128 0, i32 5), !dbg
 
 define void @atomic128_and_seq_cst(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw and i128* %a, i128 0 seq_cst
-  ret void
+  atomicrmw and i128* %a, i128 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_and_seq_cst
-; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 5)
+; CHECK-LABEL: atomic128_and_seq_cst
+; CHECK: call i128 @__tsan_atomic128_fetch_and(i128* %a, i128 0, i32 5), !dbg
 
 define void @atomic128_or_seq_cst(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw or i128* %a, i128 0 seq_cst
-  ret void
+  atomicrmw or i128* %a, i128 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_or_seq_cst
-; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 5)
+; CHECK-LABEL: atomic128_or_seq_cst
+; CHECK: call i128 @__tsan_atomic128_fetch_or(i128* %a, i128 0, i32 5), !dbg
 
 define void @atomic128_xor_seq_cst(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw xor i128* %a, i128 0 seq_cst
-  ret void
+  atomicrmw xor i128* %a, i128 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_xor_seq_cst
-; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 5)
+; CHECK-LABEL: atomic128_xor_seq_cst
+; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 5), !dbg
 
 define void @atomic128_nand_seq_cst(i128* %a) nounwind uwtable {
 entry:
-  atomicrmw nand i128* %a, i128 0 seq_cst
-  ret void
+  atomicrmw nand i128* %a, i128 0 seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_nand_seq_cst
-; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 5)
+; CHECK-LABEL: atomic128_nand_seq_cst
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 5), !dbg
 
 define void @atomic128_cas_monotonic(i128* %a) nounwind uwtable {
 entry:
-  cmpxchg i128* %a, i128 0, i128 1 monotonic monotonic
-  ret void
+  cmpxchg i128* %a, i128 0, i128 1 monotonic monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_cas_monotonic
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 0, i32 0)
+; CHECK-LABEL: atomic128_cas_monotonic
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 0, i32 0), !dbg
 
 define void @atomic128_cas_acquire(i128* %a) nounwind uwtable {
 entry:
-  cmpxchg i128* %a, i128 0, i128 1 acquire acquire
-  ret void
+  cmpxchg i128* %a, i128 0, i128 1 acquire acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_cas_acquire
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 2, i32 2)
+; CHECK-LABEL: atomic128_cas_acquire
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 2, i32 2), !dbg
 
 define void @atomic128_cas_release(i128* %a) nounwind uwtable {
 entry:
-  cmpxchg i128* %a, i128 0, i128 1 release monotonic
-  ret void
+  cmpxchg i128* %a, i128 0, i128 1 release monotonic, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_cas_release
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 3, i32 0)
+; CHECK-LABEL: atomic128_cas_release
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 3, i32 0), !dbg
 
 define void @atomic128_cas_acq_rel(i128* %a) nounwind uwtable {
 entry:
-  cmpxchg i128* %a, i128 0, i128 1 acq_rel acquire
-  ret void
+  cmpxchg i128* %a, i128 0, i128 1 acq_rel acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_cas_acq_rel
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 4, i32 2)
+; CHECK-LABEL: atomic128_cas_acq_rel
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 4, i32 2), !dbg
 
 define void @atomic128_cas_seq_cst(i128* %a) nounwind uwtable {
 entry:
-  cmpxchg i128* %a, i128 0, i128 1 seq_cst seq_cst
-  ret void
+  cmpxchg i128* %a, i128 0, i128 1 seq_cst seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic128_cas_seq_cst
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 5, i32 5)
+; CHECK-LABEL: atomic128_cas_seq_cst
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 5, i32 5), !dbg
 
 define void @atomic_signal_fence_acquire() nounwind uwtable {
 entry:
-  fence singlethread acquire
-  ret void
+  fence singlethread acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic_signal_fence_acquire
-; CHECK: call void @__tsan_atomic_signal_fence(i32 2)
+; CHECK-LABEL: atomic_signal_fence_acquire
+; CHECK: call void @__tsan_atomic_signal_fence(i32 2), !dbg
 
 define void @atomic_thread_fence_acquire() nounwind uwtable {
 entry:
-  fence  acquire
-  ret void
+  fence  acquire, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic_thread_fence_acquire
-; CHECK: call void @__tsan_atomic_thread_fence(i32 2)
+; CHECK-LABEL: atomic_thread_fence_acquire
+; CHECK: call void @__tsan_atomic_thread_fence(i32 2), !dbg
 
 define void @atomic_signal_fence_release() nounwind uwtable {
 entry:
-  fence singlethread release
-  ret void
+  fence singlethread release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic_signal_fence_release
-; CHECK: call void @__tsan_atomic_signal_fence(i32 3)
+; CHECK-LABEL: atomic_signal_fence_release
+; CHECK: call void @__tsan_atomic_signal_fence(i32 3), !dbg
 
 define void @atomic_thread_fence_release() nounwind uwtable {
 entry:
-  fence  release
-  ret void
+  fence  release, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic_thread_fence_release
-; CHECK: call void @__tsan_atomic_thread_fence(i32 3)
+; CHECK-LABEL: atomic_thread_fence_release
+; CHECK: call void @__tsan_atomic_thread_fence(i32 3), !dbg
 
 define void @atomic_signal_fence_acq_rel() nounwind uwtable {
 entry:
-  fence singlethread acq_rel
-  ret void
+  fence singlethread acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic_signal_fence_acq_rel
-; CHECK: call void @__tsan_atomic_signal_fence(i32 4)
+; CHECK-LABEL: atomic_signal_fence_acq_rel
+; CHECK: call void @__tsan_atomic_signal_fence(i32 4), !dbg
 
 define void @atomic_thread_fence_acq_rel() nounwind uwtable {
 entry:
-  fence  acq_rel
-  ret void
+  fence  acq_rel, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic_thread_fence_acq_rel
-; CHECK: call void @__tsan_atomic_thread_fence(i32 4)
+; CHECK-LABEL: atomic_thread_fence_acq_rel
+; CHECK: call void @__tsan_atomic_thread_fence(i32 4), !dbg
 
 define void @atomic_signal_fence_seq_cst() nounwind uwtable {
 entry:
-  fence singlethread seq_cst
-  ret void
+  fence singlethread seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic_signal_fence_seq_cst
-; CHECK: call void @__tsan_atomic_signal_fence(i32 5)
+; CHECK-LABEL: atomic_signal_fence_seq_cst
+; CHECK: call void @__tsan_atomic_signal_fence(i32 5), !dbg
 
 define void @atomic_thread_fence_seq_cst() nounwind uwtable {
 entry:
-  fence  seq_cst
-  ret void
+  fence  seq_cst, !dbg !7
+  ret void, !dbg !7
 }
-; CHECK: atomic_thread_fence_seq_cst
-; CHECK: call void @__tsan_atomic_thread_fence(i32 5)
+; CHECK-LABEL: atomic_thread_fence_seq_cst
+; CHECK: call void @__tsan_atomic_thread_fence(i32 5), !dbg
+
+!llvm.module.flags = !{!0, !1, !2}
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = !{i32 1, !"PIC Level", i32 2}
+
+!3 = !{}
+!4 = !DISubroutineType(types: !3)
+!5 = !DIFile(filename: "atomic.cpp", directory: "/tmp")
+!6 = !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
+!7 = !DILocation(line: 100, column: 1, scope: !6)
diff --git a/test/Linker/comdat10.ll b/test/Linker/comdat10.ll
new file mode 100644
index 0000000..8a32c42
--- /dev/null
+++ b/test/Linker/comdat10.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-link %s /dev/null -S -o - | FileCheck %s
+
+$c = comdat largest
+
+; CHECK: @c = global i32 0, comdat
+@c = global i32 0, comdat
diff --git a/test/MC/AArch64/alias-addsubimm.s b/test/MC/AArch64/alias-addsubimm.s
new file mode 100644
index 0000000..75e0a18
--- /dev/null
+++ b/test/MC/AArch64/alias-addsubimm.s
@@ -0,0 +1,94 @@
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+// CHECK: sub w0, w2, #2, lsl #12
+// CHECK: sub w0, w2, #2, lsl #12
+        sub w0, w2, #2, lsl 12
+        add w0, w2, #-2, lsl 12
+// CHECK: sub x1, x3, #2, lsl #12
+// CHECK: sub x1, x3, #2, lsl #12
+        sub x1, x3, #2, lsl 12
+        add x1, x3, #-2, lsl 12
+// CHECK: sub x1, x3, #4
+// CHECK: sub x1, x3, #4
+        sub x1, x3, #4
+        add x1, x3, #-4
+// CHECK: sub x1, x3, #4095
+// CHECK: sub x1, x3, #4095
+        sub x1, x3, #4095, lsl 0
+        add x1, x3, #-4095, lsl 0
+// CHECK: sub x3, x4, #0
+        sub x3, x4, #0
+
+// CHECK: add w0, w2, #2, lsl #12
+// CHECK: add w0, w2, #2, lsl #12
+        add w0, w2, #2, lsl 12
+        sub w0, w2, #-2, lsl 12
+// CHECK: add x1, x3, #2, lsl #12
+// CHECK: add x1, x3, #2, lsl #12
+        add x1, x3, #2, lsl 12
+        sub x1, x3, #-2, lsl 12
+// CHECK: add x1, x3, #4
+// CHECK: add x1, x3, #4
+        add x1, x3, #4
+        sub x1, x3, #-4
+// CHECK: add x1, x3, #4095
+// CHECK: add x1, x3, #4095
+        add x1, x3, #4095, lsl 0
+        sub x1, x3, #-4095, lsl 0
+// CHECK: add x2, x5, #0
+        add x2, x5, #0
+
+// CHECK: subs w0, w2, #2, lsl #12
+// CHECK: subs w0, w2, #2, lsl #12
+        subs w0, w2, #2, lsl 12
+        adds w0, w2, #-2, lsl 12
+// CHECK: subs x1, x3, #2, lsl #12
+// CHECK: subs x1, x3, #2, lsl #12
+        subs x1, x3, #2, lsl 12
+        adds x1, x3, #-2, lsl 12
+// CHECK: subs x1, x3, #4
+// CHECK: subs x1, x3, #4
+        subs x1, x3, #4
+        adds x1, x3, #-4
+// CHECK: subs x1, x3, #4095
+// CHECK: subs x1, x3, #4095
+        subs x1, x3, #4095, lsl 0
+        adds x1, x3, #-4095, lsl 0
+// CHECK: subs x3, x4, #0
+        subs x3, x4, #0
+
+// CHECK: adds w0, w2, #2, lsl #12
+// CHECK: adds w0, w2, #2, lsl #12
+        adds w0, w2, #2, lsl 12
+        subs w0, w2, #-2, lsl 12
+// CHECK: adds x1, x3, #2, lsl #12
+// CHECK: adds x1, x3, #2, lsl #12
+        adds x1, x3, #2, lsl 12
+        subs x1, x3, #-2, lsl 12
+// CHECK: adds x1, x3, #4
+// CHECK: adds x1, x3, #4
+        adds x1, x3, #4
+        subs x1, x3, #-4
+// CHECK: adds x1, x3, #4095
+// CHECK: adds x1, x3, #4095
+        adds x1, x3, #4095, lsl 0
+        subs x1, x3, #-4095, lsl 0
+// CHECK: adds x2, x5, #0
+        adds x2, x5, #0
+
+// CHECK: {{adds xzr,|cmn}} x5, #5
+// CHECK: {{adds xzr,|cmn}} x5, #5
+        cmn x5, #5
+        cmp x5, #-5
+// CHECK: {{subs xzr,|cmp}} x6, #4095
+// CHECK: {{subs xzr,|cmp}} x6, #4095
+        cmp x6, #4095
+        cmn x6, #-4095
+// CHECK: {{adds wzr,|cmn}} w7, #5
+// CHECK: {{adds wzr,|cmn}} w7, #5
+        cmn w7, #5
+        cmp w7, #-5
+// CHECK: {{subs wzr,|cmp}} w8, #4095
+// CHECK: {{subs wzr,|cmp}} w8, #4095
+        cmp w8, #4095
+        cmn w8, #-4095
diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s
index bf7db13..0c2bc68 100644
--- a/test/MC/AArch64/basic-a64-diagnostics.s
+++ b/test/MC/AArch64/basic-a64-diagnostics.s
@@ -75,19 +75,19 @@
 // Add/sub (immediate)
 //------------------------------------------------------------------------------
 
-// Out of range immediates: < 0 or more than 12 bits
-        add w4, w5, #-1
+// Out of range immediates: more than 12 bits
+        add w4, w5, #-4096
         add w5, w6, #0x1000
-        add w4, w5, #-1, lsl #12
+        add w4, w5, #-4096, lsl #12
         add w5, w6, #0x1000, lsl #12
 // CHECK-ERROR: error: expected compatible register, symbol or integer in range [0, 4095]
-// CHECK-ERROR-NEXT:         add w4, w5, #-1
+// CHECK-ERROR-NEXT:         add w4, w5, #-4096
 // CHECK-ERROR-NEXT:                     ^
 // CHECK-ERROR-AARCH64-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
 // CHECK-ERROR-AARCH64-NEXT:         add w5, w6, #0x1000
 // CHECK-ERROR-AARCH64-NEXT:                     ^
 // CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
-// CHECK-ERROR-NEXT:         add w4, w5, #-1, lsl #12
+// CHECK-ERROR-NEXT:         add w4, w5, #-4096, lsl #12
 // CHECK-ERROR-NEXT:                     ^
 // CHECK-ERROR-NEXT: error: expected compatible register, symbol or integer in range [0, 4095]
 // CHECK-ERROR-NEXT:         add w5, w6, #0x1000, lsl #12
diff --git a/test/MC/AMDGPU/hsa.s b/test/MC/AMDGPU/hsa.s
new file mode 100644
index 0000000..7dfea0f
--- /dev/null
+++ b/test/MC/AMDGPU/hsa.s
@@ -0,0 +1,233 @@
+// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM
+// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | llvm-readobj -s -sd | FileCheck %s --check-prefix=ELF
+
+// ELF: SHT_NOTE
+// ELF: 0000: 04000000 08000000 01000000 414D4400
+// ELF: 0010: 01000000 00000000 04000000 1B000000
+// ELF: 0020: 03000000 414D4400 04000700 07000000
+// ELF: 0030: 00000000 00000000 414D4400 414D4447
+// ELF: 0040: 50550000
+
+.hsa_code_object_version 1,0
+// ASM: .hsa_code_object_version 1,0
+
+.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
+// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
+
+.text
+amd_kernel_code_t_test_all:
+; Test all amd_kernel_code_t members with non-default values.
+.amd_kernel_code_t
+	kernel_code_version_major = 100
+	kernel_code_version_minor = 100
+	machine_kind = 0
+	machine_version_major = 5
+	machine_version_minor = 5
+	machine_version_stepping = 5
+	kernel_code_entry_byte_offset = 512
+	kernel_code_prefetch_byte_size = 1
+	max_scratch_backing_memory_byte_size = 1
+	compute_pgm_rsrc1_vgprs = 1
+	compute_pgm_rsrc1_sgprs = 1
+	compute_pgm_rsrc1_priority = 1
+	compute_pgm_rsrc1_float_mode = 1
+	compute_pgm_rsrc1_priv = 1
+	compute_pgm_rsrc1_dx10_clamp = 1
+	compute_pgm_rsrc1_debug_mode = 1
+	compute_pgm_rsrc1_ieee_mode = 1
+	compute_pgm_rsrc2_scratch_en = 1
+	compute_pgm_rsrc2_user_sgpr = 1
+	compute_pgm_rsrc2_tgid_x_en = 1
+	compute_pgm_rsrc2_tgid_y_en = 1
+	compute_pgm_rsrc2_tgid_z_en = 1
+	compute_pgm_rsrc2_tg_size_en = 1
+	compute_pgm_rsrc2_tidig_comp_cnt = 1
+	compute_pgm_rsrc2_excp_en_msb = 1
+	compute_pgm_rsrc2_lds_size = 1
+	compute_pgm_rsrc2_excp_en = 1
+	enable_sgpr_private_segment_buffer = 1
+	enable_sgpr_dispatch_ptr = 1
+	enable_sgpr_queue_ptr = 1
+	enable_sgpr_kernarg_segment_ptr = 1
+	enable_sgpr_dispatch_id = 1
+	enable_sgpr_flat_scratch_init = 1
+	enable_sgpr_private_segment_size = 1
+	enable_sgpr_grid_workgroup_count_x = 1
+	enable_sgpr_grid_workgroup_count_y = 1
+	enable_sgpr_grid_workgroup_count_z = 1
+	enable_ordered_append_gds = 1
+	private_element_size = 1
+	is_ptr64 = 1
+	is_dynamic_callstack = 1
+	is_debug_enabled = 1
+	is_xnack_enabled = 1
+	workitem_private_segment_byte_size = 1
+	workgroup_group_segment_byte_size = 1
+	gds_segment_byte_size = 1
+	kernarg_segment_byte_size = 1
+	workgroup_fbarrier_count = 1
+	wavefront_sgpr_count = 1
+	workitem_vgpr_count = 1
+	reserved_vgpr_first = 1
+	reserved_vgpr_count = 1
+	reserved_sgpr_first = 1
+	reserved_sgpr_count = 1
+	debug_wavefront_private_segment_offset_sgpr = 1
+	debug_private_segment_buffer_sgpr = 1
+	kernarg_segment_alignment = 5
+	group_segment_alignment = 5
+	private_segment_alignment = 5
+	wavefront_size = 5
+	call_convention = 1
+	runtime_loader_kernel_symbol = 1
+.end_amd_kernel_code_t
+
+// ASM-LABEL: {{^}}amd_kernel_code_t_test_all:
+// ASM: .amd_kernel_code_t
+// ASM: kernel_code_version_major = 100
+// ASM: kernel_code_version_minor = 100
+// ASM: machine_kind = 0
+// ASM: machine_version_major = 5
+// ASM: machine_version_minor = 5
+// ASM: machine_version_stepping = 5
+// ASM: kernel_code_entry_byte_offset = 512
+// ASM: kernel_code_prefetch_byte_size = 1
+// ASM: max_scratch_backing_memory_byte_size = 1
+// ASM: compute_pgm_rsrc1_vgprs = 1
+// ASM: compute_pgm_rsrc1_sgprs = 1
+// ASM: compute_pgm_rsrc1_priority = 1
+// ASM: compute_pgm_rsrc1_float_mode = 1 
+// ASM: compute_pgm_rsrc1_priv = 1
+// ASM: compute_pgm_rsrc1_dx10_clamp = 1
+// ASM: compute_pgm_rsrc1_debug_mode = 1 
+// ASM: compute_pgm_rsrc1_ieee_mode = 1
+// ASM: compute_pgm_rsrc2_scratch_en = 1
+// ASM: compute_pgm_rsrc2_user_sgpr = 1
+// ASM: compute_pgm_rsrc2_tgid_x_en = 1
+// ASM: compute_pgm_rsrc2_tgid_y_en = 1
+// ASM: compute_pgm_rsrc2_tgid_z_en = 1
+// ASM: compute_pgm_rsrc2_tg_size_en = 1
+// ASM: compute_pgm_rsrc2_tidig_comp_cnt = 1
+// ASM: compute_pgm_rsrc2_excp_en_msb = 1
+// ASM: compute_pgm_rsrc2_lds_size = 1
+// ASM: compute_pgm_rsrc2_excp_en = 1
+// ASM: enable_sgpr_private_segment_buffer = 1
+// ASM: enable_sgpr_dispatch_ptr = 1
+// ASM: enable_sgpr_queue_ptr = 1
+// ASM: enable_sgpr_kernarg_segment_ptr = 1
+// ASM: enable_sgpr_dispatch_id = 1
+// ASM: enable_sgpr_flat_scratch_init = 1
+// ASM: enable_sgpr_private_segment_size = 1
+// ASM: enable_sgpr_grid_workgroup_count_x = 1
+// ASM: enable_sgpr_grid_workgroup_count_y = 1
+// ASM: enable_sgpr_grid_workgroup_count_z = 1
+// ASM: enable_ordered_append_gds = 1
+// ASM: private_element_size = 1
+// ASM: is_ptr64 = 1
+// ASM: is_dynamic_callstack = 1
+// ASM: is_debug_enabled = 1
+// ASM: is_xnack_enabled = 1
+// ASM: workitem_private_segment_byte_size = 1
+// ASM: workgroup_group_segment_byte_size = 1
+// ASM: gds_segment_byte_size = 1
+// ASM: kernarg_segment_byte_size = 1
+// ASM: workgroup_fbarrier_count = 1
+// ASM: wavefront_sgpr_count = 1
+// ASM: workitem_vgpr_count = 1
+// ASM: reserved_vgpr_first = 1
+// ASM: reserved_vgpr_count = 1
+// ASM: reserved_sgpr_first = 1
+// ASM: reserved_sgpr_count = 1
+// ASM: debug_wavefront_private_segment_offset_sgpr = 1
+// ASM: debug_private_segment_buffer_sgpr = 1
+// ASM: kernarg_segment_alignment = 5
+// ASM: group_segment_alignment = 5
+// ASM: private_segment_alignment = 5
+// ASM: wavefront_size = 5
+// ASM: call_convention = 1
+// ASM: runtime_loader_kernel_symbol = 1
+// ASM: .end_amd_kernel_code_t
+
+amd_kernel_code_t_minimal:
+.amd_kernel_code_t
+	enable_sgpr_kernarg_segment_ptr = 1
+	is_ptr64 = 1
+	compute_pgm_rsrc1_vgprs = 1
+	compute_pgm_rsrc1_sgprs = 1
+	compute_pgm_rsrc2_user_sgpr = 2
+	kernarg_segment_byte_size = 16
+	wavefront_sgpr_count = 8
+//      wavefront_sgpr_count = 7
+;	wavefront_sgpr_count = 7
+// Make sure a blank line won't break anything:
+
+// Make sure a line with whitespace won't break anything:
+   
+	workitem_vgpr_count = 16
+.end_amd_kernel_code_t
+
+// ASM-LABEL: {{^}}amd_kernel_code_t_minimal:
+// ASM: .amd_kernel_code_t
+// ASM:	kernel_code_version_major = 1
+// ASM:	kernel_code_version_minor = 0
+// ASM:	machine_kind = 1
+// ASM:	machine_version_major = 7
+// ASM:	machine_version_minor = 0
+// ASM:	machine_version_stepping = 0
+// ASM:	kernel_code_entry_byte_offset = 256
+// ASM:	kernel_code_prefetch_byte_size = 0
+// ASM:	max_scratch_backing_memory_byte_size = 0
+// ASM:	compute_pgm_rsrc1_vgprs = 1
+// ASM:	compute_pgm_rsrc1_sgprs = 1
+// ASM:	compute_pgm_rsrc1_priority = 0
+// ASM:	compute_pgm_rsrc1_float_mode = 0
+// ASM:	compute_pgm_rsrc1_priv = 0
+// ASM:	compute_pgm_rsrc1_dx10_clamp = 0
+// ASM:	compute_pgm_rsrc1_debug_mode = 0
+// ASM:	compute_pgm_rsrc1_ieee_mode = 0
+// ASM:	compute_pgm_rsrc2_scratch_en = 0
+// ASM:	compute_pgm_rsrc2_user_sgpr = 2
+// ASM:	compute_pgm_rsrc2_tgid_x_en = 0
+// ASM:	compute_pgm_rsrc2_tgid_y_en = 0
+// ASM:	compute_pgm_rsrc2_tgid_z_en = 0
+// ASM:	compute_pgm_rsrc2_tg_size_en = 0
+// ASM:	compute_pgm_rsrc2_tidig_comp_cnt = 0
+// ASM:	compute_pgm_rsrc2_excp_en_msb = 0
+// ASM:	compute_pgm_rsrc2_lds_size = 0
+// ASM:	compute_pgm_rsrc2_excp_en = 0
+// ASM:	enable_sgpr_private_segment_buffer = 0
+// ASM:	enable_sgpr_dispatch_ptr = 0
+// ASM:	enable_sgpr_queue_ptr = 0
+// ASM:	enable_sgpr_kernarg_segment_ptr = 1
+// ASM:	enable_sgpr_dispatch_id = 0
+// ASM:	enable_sgpr_flat_scratch_init = 0
+// ASM:	enable_sgpr_private_segment_size = 0
+// ASM:	enable_sgpr_grid_workgroup_count_x = 0
+// ASM:	enable_sgpr_grid_workgroup_count_y = 0
+// ASM:	enable_sgpr_grid_workgroup_count_z = 0
+// ASM:	enable_ordered_append_gds = 0
+// ASM:	private_element_size = 0
+// ASM:	is_ptr64 = 1
+// ASM:	is_dynamic_callstack = 0
+// ASM:	is_debug_enabled = 0
+// ASM:	is_xnack_enabled = 0
+// ASM:	workitem_private_segment_byte_size = 0
+// ASM:	workgroup_group_segment_byte_size = 0
+// ASM:	gds_segment_byte_size = 0
+// ASM:	kernarg_segment_byte_size = 16
+// ASM:	workgroup_fbarrier_count = 0
+// ASM:	wavefront_sgpr_count = 8
+// ASM:	workitem_vgpr_count = 16
+// ASM:	reserved_vgpr_first = 0
+// ASM:	reserved_vgpr_count = 0
+// ASM:	reserved_sgpr_first = 0
+// ASM:	reserved_sgpr_count = 0
+// ASM:	debug_wavefront_private_segment_offset_sgpr = 0
+// ASM:	debug_private_segment_buffer_sgpr = 0
+// ASM:	kernarg_segment_alignment = 4
+// ASM:	group_segment_alignment = 4
+// ASM:	private_segment_alignment = 4
+// ASM:	wavefront_size = 6
+// ASM:	call_convention = 0
+// ASM:	runtime_loader_kernel_symbol = 0
+// ASM: .end_amd_kernel_code_t
diff --git a/test/MC/AMDGPU/hsa_code_object_isa_noargs.s b/test/MC/AMDGPU/hsa_code_object_isa_noargs.s
new file mode 100644
index 0000000..85f53bb
--- /dev/null
+++ b/test/MC/AMDGPU/hsa_code_object_isa_noargs.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM
+// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | llvm-readobj -s -sd | FileCheck %s --check-prefix=ELF
+
+// ELF: SHT_NOTE
+// ELF: 0000: 04000000 08000000 01000000 414D4400
+// ELF: 0010: 01000000 00000000 04000000 1B000000
+// ELF: 0020: 03000000 414D4400 04000700 07000000
+// ELF: 0030: 00000000 00000000 414D4400 414D4447
+// ELF: 0040: 50550000
+
+.hsa_code_object_version 1,0
+// ASM: .hsa_code_object_version 1,0
+
+.hsa_code_object_isa
+// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
+
diff --git a/test/MC/ARM/directive-fpu-multiple.s b/test/MC/ARM/directive-fpu-multiple.s
index 66fc274..50389a1 100644
--- a/test/MC/ARM/directive-fpu-multiple.s
+++ b/test/MC/ARM/directive-fpu-multiple.s
@@ -10,7 +10,11 @@
 	.fpu vfp
 	.fpu vfpv2
 	.fpu vfpv3
+	.fpu vfpv3-fp16
 	.fpu vfpv3-d16
+	.fpu vfpv3-d16-fp16
+	.fpu vfpv3xd
+	.fpu vfpv3xd-fp16
 	.fpu vfpv4
 	.fpu vfpv4-d16
 	.fpu fpv4-sp-d16
@@ -18,6 +22,7 @@
 	.fpu fpv5-sp-d16
 	.fpu fp-armv8
 	.fpu neon
+	.fpu neon-fp16
 	.fpu neon-vfpv4
 	.fpu neon-fp-armv8
 	.fpu crypto-neon-fp-armv8
diff --git a/test/MC/ARM/directive-type-diagnostics.s b/test/MC/ARM/directive-type-diagnostics.s
new file mode 100644
index 0000000..b166ffd
--- /dev/null
+++ b/test/MC/ARM/directive-type-diagnostics.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple arm-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple armeb-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple thumb-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple thumbeb-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+
+        .type symbol 32
+// CHECK: error: expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', '%<type>' or "<type>"
+// CHECK: .type symbol 32
+// CHECK:              ^
+
diff --git a/test/MC/ARM/thumb_set-diagnostics.s b/test/MC/ARM/thumb_set-diagnostics.s
index 5f1844d..86f1ee5 100644
--- a/test/MC/ARM/thumb_set-diagnostics.s
+++ b/test/MC/ARM/thumb_set-diagnostics.s
@@ -41,3 +41,31 @@
 @ CHECK: 	.thumb_set trailer_trash, 0x11fe1e55,
 @ CHECK:                                            ^
 
+	.type alpha,%function
+alpha:
+	nop
+
+        .type beta,%function
+beta:
+	bkpt
+
+	.thumb_set beta, alpha
+
+@ CHECK: error: redefinition of 'beta'
+@ CHECK: 	.thumb_set beta, alpha
+@ CHECK:                                            ^
+
+	.type recursive_use,%function
+	.thumb_set recursive_use, recursive_use + 1
+
+@ CHECK: error: Recursive use of 'recursive_use'
+@ CHECK: 	.thumb_set recursive_use, recursive_use + 1
+@ CHECK:                                            ^
+
+  variable_result = alpha + 1
+  .long variable_result
+	.thumb_set variable_result, 1
+
+@ CHECK: error: invalid reassignment of non-absolute variable 'variable_result'
+@ CHECK: 	.thumb_set variable_result, 1
+@ CHECK:                                            ^
\ No newline at end of file
diff --git a/test/MC/ARM/thumb_set.s b/test/MC/ARM/thumb_set.s
index d2a0dc0..00b3e53 100644
--- a/test/MC/ARM/thumb_set.s
+++ b/test/MC/ARM/thumb_set.s
@@ -54,8 +54,6 @@ alpha:
 	nop
 
         .type beta,%function
-beta:
-	bkpt
 
 	.thumb_set beta, alpha
 
diff --git a/test/MC/COFF/ARM/directive-type-diagnostics.s b/test/MC/COFF/ARM/directive-type-diagnostics.s
new file mode 100644
index 0000000..f8a52cd
--- /dev/null
+++ b/test/MC/COFF/ARM/directive-type-diagnostics.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple arm-coff -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple armeb-coff -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple thumb-coff -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple thumbeb-coff -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+
+        .type symbol 32
+// CHECK: error: expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', '%<type>' or "<type>"
+// CHECK: .type symbol 32
+// CHECK:              ^
+
diff --git a/test/MC/COFF/ARM/lit.local.cfg b/test/MC/COFF/ARM/lit.local.cfg
new file mode 100644
index 0000000..98c6700
--- /dev/null
+++ b/test/MC/COFF/ARM/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/Mips/micromips.txt b/test/MC/Disassembler/Mips/micromips.txt
index 637e889..5809ac2 100644
--- a/test/MC/Disassembler/Mips/micromips.txt
+++ b/test/MC/Disassembler/Mips/micromips.txt
@@ -336,3 +336,7 @@
 0x46 0xce # CHECK: sdbbp16 14
 
 0x84 0x34 # CHECK: movep $5, $6, $2, $3
+
+0x00 0x00 0x57 0x7c # CHECK: ei
+
+0x00 0x0a 0x57 0x7c # CHECK: ei $10
diff --git a/test/MC/Disassembler/Mips/micromips32r6.txt b/test/MC/Disassembler/Mips/micromips32r6.txt
index 47c4d08..a2691ee 100644
--- a/test/MC/Disassembler/Mips/micromips32r6.txt
+++ b/test/MC/Disassembler/Mips/micromips32r6.txt
@@ -38,6 +38,12 @@
 
 0x00 0x44 0x0b 0x3c # CHECK: bitswap $4, $2
 
+0x00 0x00 0x00 0x07 # CHECK: break
+
+0x00 0x07 0x00 0x07 # CHECK: break 7
+
+0x00 0x07 0x01 0x47 # CHECK: break 7, 5
+
 0x20 0x25 0x60 0x08 # CHECK: cache 1, 8($5)
 
 0x01 0x65 0x4b 0x3c # CHECK: clo $11, $5
@@ -48,6 +54,12 @@
 
 0x00 0xa4 0x19 0x98 # CHECK: divu $3, $4, $5
 
+0x00 0x00 0x18 0x00 # CHECK: ehb
+
+0x00 0x00 0x57 0x7c # CHECK: ei
+
+0x00 0x0a 0x57 0x7c # CHECK: ei $10
+
 0x00 0x00 0xf3 0x7c # CHECK: eret
 
 0x00 0x01 0xf3 0x7c # CHECK: eretnc
@@ -72,6 +84,8 @@
 
 0x00 0xa4,0x18,0xd8 # CHECK: muhu $3, $4, $5
 
+0x00 0x00 0x00 0x00 # CHECK: nop
+
 0x00 0xa4 0x1a 0xd0 # CHECK: nor $3, $4, $5
 
 0x00,0xa4,0x1a,0x90 # CHECK: or $3, $4, $5
@@ -84,6 +98,8 @@
 
 0x00 0x83 0x11 0x80 # CHECK: selnez $2, $3, $4
 
+0x00 0x83 0x38 0x00 # CHECK: sll $4, $3, 7
+
 0x00 0xa4 0x19 0x90 # CHECK: sub $3, $4, $5
 
 0x00 0xa4 0x19 0xd0 # CHECK: subu $3, $4, $5
diff --git a/test/MC/Disassembler/Mips/micromips_le.txt b/test/MC/Disassembler/Mips/micromips_le.txt
index 3899c51..3058bd0 100644
--- a/test/MC/Disassembler/Mips/micromips_le.txt
+++ b/test/MC/Disassembler/Mips/micromips_le.txt
@@ -336,3 +336,7 @@
 0xce 0x46 # CHECK: sdbbp16 14
 
 0x34 0x84 # CHECK: movep $5, $6, $2, $3
+
+0x00 0x00 0x7c 0x57 # CHECK: ei
+
+0x0a 0x00 0x7c 0x57 # CHECK: ei $10
diff --git a/test/MC/Disassembler/Mips/mips1/valid-mips1.txt b/test/MC/Disassembler/Mips/mips1/valid-mips1.txt
index 1a4f94f..59e702e 100644
--- a/test/MC/Disassembler/Mips/mips1/valid-mips1.txt
+++ b/test/MC/Disassembler/Mips/mips1/valid-mips1.txt
@@ -1,116 +1,110 @@
 # RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips1 | FileCheck %s
 # CHECK: .text
-0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
-0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
 0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
-0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
-0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
-0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
 0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
 0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
 0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
-0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
 0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
-0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
-0x45 0x00 0x00 0x01 # CHECK: bc1f 8
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
 0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
-0x45 0x01 0x00 0x01 # CHECK: bc1t 8
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x04 0x11 0x14 0x9b # CHECK: bal 21104
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x04 0x11 0x14 0x9b # CHECK: bal 21104
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
-0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
-0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
-0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
 0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
 0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
 0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
-0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
 0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
-0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
 0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
-0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
-0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
 0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
-0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
-0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
 0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
-0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
 0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
-0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
-0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
-0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
-0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
-0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
-0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
-0xcf 0x4a 0x81 0xf7 # CHECK: lwc3 $10, -32265($26)
 0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
 0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
-0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
-0x00 0x00 0x98 0x10 # CHECK: mfhi $19
-0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
-0x00 0x00 0x88 0x12 # CHECK: mflo $17
-0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
-0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
-0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
-0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
-0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
-0x02 0x20 0x00 0x11 # CHECK: mthi $17
-0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
-0x03 0x20 0x00 0x13 # CHECK: mtlo $25
-0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
-0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
-0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
-0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
-0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
-0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
-0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
-0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
-0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
-0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
-0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
-0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
 0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
 0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
-0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
-0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
-0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
-0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
-0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
-0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
-0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
-0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
-0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
-0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
-0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
-0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
-0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
-0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
-0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
-0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
-0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
-0x00 0x00 0x00 0x40 # CHECK: ssnop
-0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
-0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
-0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
-0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
-0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
-0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
 0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcf 0x4a 0x81 0xf7 # CHECK: lwc3 $10, -32265($26)
 0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
 0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
-0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
-0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
-0x42 0x00 0x00 0x08 # CHECK: tlbp
-0x42 0x00 0x00 0x01 # CHECK: tlbr
-0x42 0x00 0x00 0x02 # CHECK: tlbwi
-0x42 0x00 0x00 0x06 # CHECK: tlbwr
-0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
diff --git a/test/MC/Disassembler/Mips/mips2.txt b/test/MC/Disassembler/Mips/mips2.txt
deleted file mode 100644
index a604055..0000000
--- a/test/MC/Disassembler/Mips/mips2.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips2 | FileCheck %s
-
-# CHECK: sdc3  $5, 9154($6)
-0xfc 0xc5 0x23 0xc2
-
-# CHECK: swc3  $6, 9158($7)
-0xec 0xe6 0x23 0xc6
-
-# CHECK: ldc3  $7, 9162($8)
-0xdd 0x07 0x23 0xca
-
-# CHECK: lwc3  $8, 9166($9)
-0xcd 0x28 0x23 0xce
diff --git a/test/MC/Disassembler/Mips/mips2/valid-mips2.txt b/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
index 3dc5231..268bb29 100644
--- a/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
+++ b/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
@@ -1,159 +1,161 @@
 # RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips2 | FileCheck %s
 # CHECK: .text
-0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
-0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0xc0 # CHECK: ehb
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
 0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
 0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
 0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
-0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
-0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
-0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
-0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
-0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
 0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
-0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
 0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
 0x45 0x00 0x00 0x01 # CHECK: bc1f 8
-0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
 0x45 0x01 0x00 0x01 # CHECK: bc1t 8
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
 0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
-0x04 0x11 0x14 0x9b # CHECK: bal 21104
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
-0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
-0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
-0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
-0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
-0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
-0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
-0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
-0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
-0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
-0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
-0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
-0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
 0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
-0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
-0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
 0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
-0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
 0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
-0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
 0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
-0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
-0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
 0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
-0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
-0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
-0x00 0x00 0x00 0xc0 # CHECK: ehb
-0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
-0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
 0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
-0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
-0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
-0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
-0xde 0x3d 0x90 0x1b # CHECK: ldc3 $29, -28645($17)
 0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
 0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
-0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
-0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
 0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
-0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
 0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
 0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcd 0x28 0x23 0xce # CHECK: lwc3  $8, 9166($9)
 0xcf 0x4a 0x81 0xf7 # CHECK: lwc3 $10, -32265($26)
-0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
-0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
-0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
-0x00 0x00 0x98 0x10 # CHECK: mfhi $19
-0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
-0x00 0x00 0x88 0x12 # CHECK: mflo $17
-0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
-0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
-0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
-0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
-0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
-0x02 0x20 0x00 0x11 # CHECK: mthi $17
-0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
-0x03 0x20 0x00 0x13 # CHECK: mtlo $25
-0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
-0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
-0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
-0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
-0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
-0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
-0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
-0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
-0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
-0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
-0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
-0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
-0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
-0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
-0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
+0xdd 0x07 0x23 0xca # CHECK: ldc3  $7, 9162($8)
+0xde 0x3d 0x90 0x1b # CHECK: ldc3 $29, -28645($17)
 0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
-0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
-0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
-0xfd 0x4c 0x16 0xcb # CHECK: sdc3 $12, 5835($10)
-0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
-0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
-0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
-0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
-0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
-0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
-0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
-0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
-0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
-0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
-0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
-0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
-0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
-0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
-0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
-0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
-0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
-0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
-0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
-0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
-0x00 0x00 0x00 0x40 # CHECK: ssnop
-0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
-0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
-0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
-0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
-0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
-0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
-0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
 0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
 0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
+0xec 0xe6 0x23 0xc6 # CHECK: swc3  $6, 9158($7)
 0xef 0x4a 0x81 0xf7 # CHECK: swc3 $10, -32265($26)
-0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
-0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
-0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
-0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
-0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
-0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
-0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
-0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
-0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
-0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
-0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
-0x42 0x00 0x00 0x08 # CHECK: tlbp
-0x42 0x00 0x00 0x01 # CHECK: tlbr
-0x42 0x00 0x00 0x02 # CHECK: tlbwi
-0x42 0x00 0x00 0x06 # CHECK: tlbwr
-0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
-0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
-0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
-0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
-0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
-0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
-0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
-0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
-0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
-0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
-0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
-0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
+0xfc 0xc5 0x23 0xc2 # CHECK: sdc3  $5, 9154($6)
+0xfd 0x4c 0x16 0xcb # CHECK: sdc3 $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips3/valid-mips3.txt b/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
index 0bec085..2a38b19 100644
--- a/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
+++ b/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
@@ -1,211 +1,209 @@
 # RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips3 | FileCheck %s
 # CHECK: .text
-0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
-0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0xc0 # CHECK: ehb
+0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
+0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
+0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
+0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
+0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
+0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
+0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
+0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
+0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
+0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
+0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
+0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
+0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
+0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
+0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
+0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
+0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
+0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
 0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
+0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
+0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
+0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
+0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
+0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
+0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
 0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
 0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
-0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
-0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
-0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
-0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
-0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
 0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
-0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
 0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
+0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
+0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
 0x45 0x00 0x00 0x01 # CHECK: bc1f 8
-0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
 0x45 0x01 0x00 0x01 # CHECK: bc1t 8
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
 0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
-0x04 0x11 0x14 0x9b # CHECK: bal 21104
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
-0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
-0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
-0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
-0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
-0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
-0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
-0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
-0xbc 0xa1 0x00 0x08 # CHECK: cache 1, 8($5)
-0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
-0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
-0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
-0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
-0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
+0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
+0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
 0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
-0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
 0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
-0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
-0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
 0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
-0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
-0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
-0x46 0x20 0x7e 0x25 # CHECK: cvt.l.d $f24, $f15
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
 0x46 0x00 0xea 0xe5 # CHECK: cvt.l.s $f11, $f29
-0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
+0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
+0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
+0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
 0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
-0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
 0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
-0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
-0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x7e 0x25 # CHECK: cvt.l.d $f24, $f15
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
+0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
+0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
-0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
 0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
-0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
-0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
 0x64 0x58 0x46 0x9f # CHECK: daddiu $24, $2, 18079
 0x66 0x73 0x69 0x3f # CHECK: daddiu $19, $19, 26943
-0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
-0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
-0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
-0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
-0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
-0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
-0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
-0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
-0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
-0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
-0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
-0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
-0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
-0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
-0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
-0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
-0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
-0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
-0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
-0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
-0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
-0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
-0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
-0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
-0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
-0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
-0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
-0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
-0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
-0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
-0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
-0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
-0x00 0x00 0x00 0xc0 # CHECK: ehb
-0x42 0x00 0x00 0x18 # CHECK: eret
-0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
-0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
-0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
-0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
+0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
 0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
-0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
-0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
-0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
 0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
 0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
-0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
-0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
+0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
+0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0xa1 0x00 0x08 # CHECK: cache 1, 8($5)
 0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
-0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
 0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
 0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
-0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
-0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
-0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
-0x00 0x00 0x98 0x10 # CHECK: mfhi $19
-0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
-0x00 0x00 0x88 0x12 # CHECK: mflo $17
-0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
-0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
-0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
-0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
-0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
-0x02 0x20 0x00 0x11 # CHECK: mthi $17
-0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
-0x03 0x20 0x00 0x13 # CHECK: mtlo $25
-0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
-0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
-0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
-0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
-0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
-0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
-0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
-0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
-0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
-0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
-0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
-0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
-0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
-0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
-0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
-0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
-0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
 0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
 0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
-0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
-0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
-0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
 0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
 0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
-0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
-0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
-0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
-0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
-0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
-0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
-0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
-0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
-0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
-0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
-0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
-0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
-0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
-0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
-0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
-0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
-0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
-0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
-0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
-0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
-0x00 0x00 0x00 0x40 # CHECK: ssnop
-0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
-0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
-0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
-0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
-0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
-0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
-0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
-0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
-0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
-0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
-0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
-0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
-0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
-0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
-0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
-0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
-0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
-0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
-0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
-0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
-0x42 0x00 0x00 0x08 # CHECK: tlbp
-0x42 0x00 0x00 0x01 # CHECK: tlbr
-0x42 0x00 0x00 0x02 # CHECK: tlbwi
-0x42 0x00 0x00 0x06 # CHECK: tlbwr
-0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
-0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
-0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
-0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
-0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
-0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
-0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
-0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
-0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
-0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
-0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
-0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
-0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
-0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
+0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips32.txt b/test/MC/Disassembler/Mips/mips32.txt
deleted file mode 100644
index bd4ae4d..0000000
--- a/test/MC/Disassembler/Mips/mips32.txt
+++ /dev/null
@@ -1,451 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux | FileCheck %s
-
-# CHECK: abs.d $f12, $f14
-0x46 0x20 0x73 0x05
-
-# CHECK: abs.s $f6, $f7
-0x46 0x00 0x39 0x85
-
-# CHECK: add $9, $6, $7
-0x00 0xc7 0x48 0x20
-
-# CHECK: add.d $f8, $f12, $f14
-0x46 0x2e 0x62 0x00
-
-# CHECK: add.s $f9, $f6, $f7
-0x46 0x07 0x32 0x40
-
-# CHECK: addi $9, $6, 17767
-0x20 0xc9 0x45 0x67
-
-# CHECK: addiu $9, $6, -15001
-0x24 0xc9 0xc5 0x67
-
-# CHECK: addu $9, $6, $7
-0x00 0xc7 0x48 0x21
-
-# CHECK: and $9, $6, $7
-0x00 0xc7 0x48 0x24
-
-# CHECK: andi $9, $6, 17767
-0x30 0xc9 0x45 0x67
-
-# CHECK: b 1332
-0x10 0x00 0x01 0x4c
-
-# CHECK: bc1f 1332
-0x45 0x00 0x01 0x4c
-
-# CHECK: bc1f $fcc7, 1332
-0x45 0x1c 0x01 0x4c
-
-# CHECK: bc1t 1332
-0x45 0x01 0x01 0x4c
-
-# CHECK: bc1t $fcc7, 1332
-0x45 0x1d 0x01 0x4c
-
-# CHECK: beq $9, $6, 1332
-0x11 0x26 0x01 0x4c
-
-# CHECK: bgez  $6, 1332
-0x04 0xc1 0x01 0x4c
-
-# CHECK: bgezal  $6, 1332
-0x04 0xd1 0x01 0x4c
-
-# CHECK: bgtz  $6, 1332
-0x1c 0xc0 0x01 0x4c
-
-# CHECK: blez  $6, 1332
-0x18 0xc0 0x01 0x4c
-
-# CHECK: bne $9, $6, 1332
-0x15 0x26 0x01 0x4c
-
-# CHECK: c.eq.d $f12, $f14
-0x46 0x2e 0x60 0x32
-
-# CHECK: c.eq.s $f6, $f7
-0x46 0x07 0x30 0x32
-
-# CHECK: c.f.d $f12, $f14
-0x46 0x2e 0x60 0x30
-
-# CHECK: c.f.s $f6, $f7
-0x46 0x07 0x30 0x30
-
-# CHECK: c.le.d $f12, $f14
-0x46 0x2e 0x60 0x3e
-
-# CHECK: c.le.s $f6, $f7
-0x46 0x07 0x30 0x3e
-
-# CHECK: c.lt.d $f12, $f14
-0x46 0x2e 0x60 0x3c
-
-# CHECK: c.lt.s $f6, $f7
-0x46 0x07 0x30 0x3c
-
-# CHECK: c.nge.d $f12, $f14
-0x46 0x2e 0x60 0x3d
-
-# CHECK: c.nge.s $f6, $f7
-0x46 0x07 0x30 0x3d
-
-# CHECK: c.ngl.d $f12, $f14
-0x46 0x2e 0x60 0x3b
-
-# CHECK: c.ngl.s $f6, $f7
-0x46 0x07 0x30 0x3b
-
-# CHECK: c.ngle.d $f12, $f14
-0x46 0x2e 0x60 0x39
-
-# CHECK: c.ngle.s $f6, $f7
-0x46 0x07 0x30 0x39
-
-# CHECK: c.ngt.d $f12, $f14
-0x46 0x2e 0x60 0x3f
-
-# CHECK: c.ngt.s $f6, $f7
-0x46 0x07 0x30 0x3f
-
-# CHECK: c.ole.d $f12, $f14
-0x46 0x2e 0x60 0x36
-
-# CHECK: c.ole.s $f6, $f7
-0x46 0x07 0x30 0x36
-
-# CHECK: c.olt.d $f12, $f14
-0x46 0x2e 0x60 0x34
-
-# CHECK: c.olt.s $f6, $f7
-0x46 0x07 0x30 0x34
-
-# CHECK: c.seq.d $f12, $f14
-0x46 0x2e 0x60 0x3a
-
-# CHECK: c.seq.s $f6, $f7
-0x46 0x07 0x30 0x3a
-
-# CHECK: c.sf.d $f12, $f14
-0x46 0x2e 0x60 0x38
-
-# CHECK: c.sf.s $f6, $f7
-0x46 0x07 0x30 0x38
-
-# CHECK: c.ueq.d $f12, $f14
-0x46 0x2e 0x60 0x33
-
-# CHECK: c.ueq.s $f28, $f18
-0x46 0x12 0xe0 0x33
-
-# CHECK: c.ule.d $f12, $f14
-0x46 0x2e 0x60 0x37
-
-# CHECK: c.ule.s $f6, $f7
-0x46 0x07 0x30 0x37
-
-# CHECK: c.ult.d $f12, $f14
-0x46 0x2e 0x60 0x35
-
-# CHECK: c.ult.s $f6, $f7
-0x46 0x07 0x30 0x35
-
-# CHECK: c.un.d $f12, $f14
-0x46 0x2e 0x60 0x31
-
-# CHECK: c.un.s $f6, $f7
-0x46 0x07 0x30 0x31
-
-# CHECK: ceil.w.d $f12, $f14
-0x46 0x20 0x73 0x0e
-
-# CHECK: ceil.w.s $f6, $f7
-0x46 0x00 0x39 0x8e
-
-# CHECK: cfc1  $6, $7
-0x44 0x46 0x38 0x00
-
-# CHECK: clo  $6, $7
-0x70 0xe6 0x30 0x21
-
-# CHECK: clz  $6, $7
-0x70 0xe6 0x30 0x20
-
-# CHECK: ctc1  $6, $7
-0x44 0xc6 0x38 0x00
-
-# CHECK: cvt.d.s $f6, $f7
-0x46 0x00 0x39 0xa1
-
-# CHECK: cvt.d.w $f12, $f14
-0x46 0x80 0x73 0x21
-
-# CHECK: cvt.s.d $f12, $f14
-0x46 0x20 0x73 0x20
-
-# CHECK: cvt.s.w $f6, $f7
-0x46 0x80 0x39 0xa0
-
-# CHECK: cvt.w.d $f12, $f14
-0x46 0x20 0x73 0x24
-
-# CHECK: cvt.w.s $f6, $f7
-0x46 0x00 0x39 0xa4
-
-# CHECK: floor.w.d $f12, $f14
-0x46 0x20 0x73 0x0f
-
-# CHECK: floor.w.s $f6, $f7
-0x46 0x00 0x39 0x8f
-
-# CHECK: j 1328
-0x08 0x00 0x01 0x4c
-
-# CHECK: jal 1328
-0x0c 0x00 0x01 0x4c
-
-# CHECK: jalx 1328
-0x74 0x00 0x01 0x4c
-
-# CHECK: jalr  $7
-0x00 0xe0 0xf8 0x09
-
-# CHECK: jr  $7
-0x00 0xe0 0x00 0x08
-
-# CHECK: lb  $4, 9158($5)
-0x80 0xa4 0x23 0xc6
-
-# CHECK: lbu $4, 6($5)
-0x90 0xa4 0x00 0x06
-
-# CHECK: ldc1  $f9, 9158($7)
-0xd4 0xe9 0x23 0xc6
-
-# CHECK: lh  $4, 12($5)
-0x84 0xa4 0x00 0x0c
-
-# CHECK: lh  $4, 12($5)
-0x84 0xa4 0x00 0x0c
-
-# CHECK: ll  $9, 9158($7)
-0xc0 0xe9 0x23 0xc6
-
-# CHECK: lui  $6, 17767
-0x3c 0x06 0x45 0x67
-
-# CHECK: lw  $4, 24($5)
-0x8c 0xa4 0x00 0x18
-
-# CHECK: lwc1  $f9, 9158($7)
-0xc4 0xe9 0x23 0xc6
-
-# CHECK: lwl   $2,  3($4)
-0x88 0x82 0x00 0x03
-
-# CHECK: lwr   $3, 16($5)
-0x98 0xa3 0x00 0x10
-
-# CHECK: madd   $6,  $7
-0x70 0xc7 0x00 0x00
-
-# CHECK: maddu  $6,  $7
-0x70 0xc7 0x00 0x01
-
-# CHECK: mfc1   $6, $f7
-0x44 0x06 0x38 0x00
-
-# CHECK: mfhi  $5
-0x00 0x00 0x28 0x10
-
-# CHECK: mflo  $5
-0x00 0x00 0x28 0x12
-
-# CHECK: mov.d $f6, $f8
-0x46 0x20 0x41 0x86
-
-# CHECK: mov.s $f6, $f7
-0x46 0x00 0x39 0x86
-
-# CHECK: movf $3, $2, $fcc7
-0x00,0x5c,0x18,0x01
-
-# CHECK: movf.d $f4, $f2, $fcc7
-0x46,0x3c,0x11,0x11
-
-# CHECK: movf.s $f4, $f2, $fcc7
-0x46,0x1c,0x11,0x11
-
-# CHECK: movt $3, $2, $fcc7
-0x00,0x5d,0x18,0x01
-
-# CHECK: movt.d $f4, $f2, $fcc7
-0x46,0x3d,0x11,0x11
-
-# CHECK: movt.s $f4, $f2, $fcc7
-0x46,0x1d,0x11,0x11
-
-# CHECK: msub   $6,  $7
-0x70 0xc7 0x00 0x04
-
-# CHECK: msubu  $6,  $7
-0x70 0xc7 0x00 0x05
-
-# CHECK: mtc1   $6, $f7
-0x44 0x86 0x38 0x00
-
-# CHECK: mthi   $7
-0x00 0xe0 0x00 0x11
-
-# CHECK: mtlo   $7
-0x00 0xe0 0x00 0x13
-
-# CHECK: mul.d $f8, $f12, $f14
-0x46 0x2e 0x62 0x02
-
-# CHECK: mul.s $f9, $f6, $f7
-0x46 0x07 0x32 0x42
-
-# CHECK: mul $9,  $6,  $7
-0x70 0xc7 0x48 0x02
-
-# CHECK: mult  $3, $5
-0x00 0x65 0x00 0x18
-
-# CHECK: multu $3, $5
-0x00 0x65 0x00 0x19
-
-# CHECK: neg.d $f12, $f14
-0x46 0x20 0x73 0x07
-
-# CHECK: neg.s $f6, $f7
-0x46 0x00 0x39 0x87
-
-# CHECK: nop
-0x00 0x00 0x00 0x00
-
-# CHECK: nor $9,  $6, $7
-0x00 0xc7 0x48 0x27
-
-# CHECK: or  $3, $3, $5
-0x00 0x65 0x18 0x25
-
-# CHECK: ori $9,  $6, 17767
-0x34 0xc9 0x45 0x67
-
-# CHECK: round.w.d $f12, $f14
-0x46 0x20 0x73 0x0c
-
-# CHECK: round.w.s $f6, $f7
-0x46 0x00 0x39 0x8c
-
-# CHECK: sb  $4, 9158($5)
-0xa0 0xa4 0x23 0xc6
-
-# CHECK: sb  $4, 6($5)
-0xa0 0xa4 0x00 0x06
-
-# CHECK: sc  $9, 9158($7)
-0xe0 0xe9 0x23 0xc6
-
-# CHECK: sdc1  $f9, 9158($7)
-0xf4 0xe9 0x23 0xc6
-
-# CHECK: sh  $4, 9158($5)
-0xa4 0xa4 0x23 0xc6
-
-# CHECK: sll $4, $3, 7
-0x00 0x03 0x21 0xc0
-
-# CHECK: sllv  $2, $3, $5
-0x00 0xa3 0x10 0x04
-
-# CHECK: slt $3, $3, $5
-0x00 0x65 0x18 0x2a
-
-# CHECK: slti  $3, $3, 103
-0x28 0x63 0x00 0x67
-
-# CHECK: sltiu $3, $3, 103
-0x2c 0x63 0x00 0x67
-
-# CHECK: sltu  $3, $3, $5
-0x00 0x65 0x18 0x2b
-
-# CHECK: sqrt.d  $f12, $f14
-0x46 0x20 0x73 0x04
-
-# CHECK: sqrt.s  $f6, $f7
-0x46 0x00 0x39 0x84
-
-# CHECK: sra $4, $3, 7
-0x00 0x03 0x21 0xc3
-
-# CHECK: srav  $2, $3, $5
-0x00 0xa3 0x10 0x07
-
-# CHECK: srl $4, $3, 7
-0x00 0x03 0x21 0xc2
-
-# CHECK: srlv  $2, $3, $5
-0x00 0xa3 0x10 0x06
-
-# CHECK: sub.d $f8, $f12, $f14
-0x46 0x2e 0x62 0x01
-
-# CHECK: sub.s $f9, $f6, $f7
-0x46 0x07 0x32 0x41
-
-# CHECK: sub $9,  $6, $7
-0x00 0xc7 0x48 0x22
-
-# CHECK: subu  $4, $3, $5
-0x00 0x65 0x20 0x23
-
-# CHECK: sw  $4, 24($5)
-0xac 0xa4 0x00 0x18
-
-# CHECK: swc1  $f9, 9158($7)
-0xe4 0xe9 0x23 0xc6
-
-# CHECK: swl $4,  16($5)
-0xa8 0xa4 0x00 0x10
-
-# CHECK: swr $6, 16($7)
-0xb8 0xe6 0x00 0x10
-
-# CHECK: sync  7
-0x00 0x00 0x01 0xcf
-
-# CHECK: trunc.w.d $f12, $f14
-0x46 0x20 0x73 0x0d
-
-# CHECK: trunc.w.s $f6, $f7
-0x46 0x00 0x39 0x8d
-
-# CHECK: xor $3, $3, $5
-0x00 0x65 0x18 0x26
-
-# CHECK: xori  $9,  $6, 17767
-0x38 0xc9 0x45 0x67
-
-# CHECK: .set    push
-# CHECK: .set    mips32r2
-# CHECK: rdhwr   $5, $29
-# CHECK: .set    pop
-0x7c 0x05 0xe8 0x3b
-
-# CHECK: cache 1, 2($3)
-0xbc 0x61 0x00 0x02
-
-# CHECK: pref 3, 4($2)
-0xcc 0x43 0x00 0x04
-
-# CHECK: swc2  $9, 9158($7)
-0xe8 0xe9 0x23 0xc6
-
-# CHECK: lwc2  $8, 9162($6)
-0xc8 0xc8 0x23 0xca
diff --git a/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt b/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt
index ea209d1..f229973 100644
--- a/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt
+++ b/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt
@@ -86,6 +86,7 @@
 0x10 0x00 0xa3 0x98 # CHECK: lwr $3, 16($5)
 0x00 0x00 0xc7 0x70 # CHECK: madd $6, $7
 0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
+0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
 0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
 0x10 0x28 0x00 0x00 # CHECK: mfhi $5
 0x12 0x28 0x00 0x00 # CHECK: mflo $5
@@ -93,6 +94,7 @@
 0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
 0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
 0x05 0x00 0xc7 0x70 # CHECK: msubu $6, $7
+0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
 0x00 0x38 0x86 0x44 # CHECK: mtc1 $6, $f7
 0x11 0x00 0xe0 0x00 # CHECK: mthi $7
 0x13 0x00 0xe0 0x00 # CHECK: mtlo $7
diff --git a/test/MC/Disassembler/Mips/mips32/valid-mips32.txt b/test/MC/Disassembler/Mips/mips32/valid-mips32.txt
index 45b672b..09f1e56 100644
--- a/test/MC/Disassembler/Mips/mips32/valid-mips32.txt
+++ b/test/MC/Disassembler/Mips/mips32/valid-mips32.txt
@@ -1,149 +1,158 @@
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux | FileCheck %s
-0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
-0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x28 0x10 # CHECK: mfhi $5
+0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
+0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
+0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
+0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
+0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
+0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
+0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
+0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
+0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
+0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
+0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
+0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
+0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
-0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
-0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
-0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
-0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
+0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
-0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xe0 0x00 0x08 # CHECK: jr $7
+0x00 0xe0 0x00 0x11 # CHECK: mthi $7
+0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
+0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
+0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
+0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
+0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
+0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
+0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
+0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
+0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
+0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
-0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
-0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
-0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
-0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
-0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
-0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
-0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
-0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
-0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
-0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
+0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
+0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
+0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
+0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
+0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
+0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
+0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
+0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
-0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
-0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
-0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
+0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
+0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
+0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
+0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
+0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
+0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
+0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
+0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
 0x46 0x07 0x30 0x3c # CHECK: c.lt.s $f6, $f7
-0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
 0x46 0x07 0x30 0x3d # CHECK: c.nge.s $f6, $f7
-0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
-0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
-0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
-0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
-0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
 0x46 0x07 0x30 0x3f # CHECK: c.ngt.s $f6, $f7
-0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
-0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
-0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
-0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
-0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
-0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
-0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
-0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
-0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
+0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
+0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
-0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
-0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
-0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
-0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
-0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
-0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
+0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
+0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
+0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
+0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
+0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
 0x46 0x20 0x73 0x0e # CHECK: ceil.w.d $f12, $f14
-0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
-0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
-0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
-0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
-0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
-0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
-0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
-0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
-0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
-0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
-0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
-0x08 0x00 0x01 0x4c # CHECK: j 1328
-0x0c 0x00 0x01 0x4c # CHECK: jal 1328
+0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
+0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
+0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
+0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
+0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
+0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
+0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
+0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
+0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
+0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
+0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
+0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
+0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
+0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
+0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
+0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
+0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
+0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
+0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
 0x74 0x00 0x01 0x4c # CHECK: jalx 1328
-0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
-0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
-0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
-0x00 0xe0 0x00 0x08 # CHECK: jr $7
+0x7c 0x05 0xe8 0x3b # CHECK: .set push
+                    # CHECK: .set mips32r2
+                    # CHECK: rdhwr $5, $29
+                    # CHECK: .set pop
 0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
-0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
-0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
-0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
-0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
-0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
-0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
 0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
 0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
-0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
-0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
-0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
-0x00 0x00 0x28 0x10 # CHECK: mfhi $5
-0x00 0x00 0x28 0x12 # CHECK: mflo $5
-0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
-0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
-0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
-0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
-0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
-0x00 0xe0 0x00 0x11 # CHECK: mthi $7
-0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
-0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
-0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
-0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
-0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
-0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
-0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
-0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
-0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
-0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
-0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
-0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
-0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
 0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
-0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
-0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
-0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
-0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
-0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
-0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
-0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
-0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
-0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
-0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
-0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
-0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
-0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
-0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
-0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
-0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
-0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
-0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
-0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
-0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
-0x00 0x00 0x01 0xcf # CHECK: sync 7
-0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
-0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
-0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
-0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
-0x7c 0x05 0xe8 0x3b # CHECK: .set push
-                    # CHECK: .set mips32r2
-                    # CHECK: rdhwr $5, $29
-                    # CHECK: .set pop
 0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
 0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
 0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
-0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
diff --git a/test/MC/Disassembler/Mips/mips32_le.txt b/test/MC/Disassembler/Mips/mips32_le.txt
index 533fc69..c019c41 100644
--- a/test/MC/Disassembler/Mips/mips32_le.txt
+++ b/test/MC/Disassembler/Mips/mips32_le.txt
@@ -254,6 +254,9 @@
 # CHECK: maddu  $6,  $7
 0x01 0x00 0xc7 0x70
 
+# CHECK: mfc0 $8, $16, 4
+0x04 0x80 0x08 0x40
+
 # CHECK: mfc1   $6, $f7
 0x00 0x38 0x06 0x44
 
@@ -299,6 +302,9 @@
 # CHECK: msubu  $6,  $7
 0x05 0x00 0xc7 0x70
 
+# CHECK: mtc0 $9, $15, 1
+0x01 0x78 0x89 0x40
+
 # CHECK: mtc1   $6, $f7
 0x00 0x38 0x86 0x44
 
diff --git a/test/MC/Disassembler/Mips/mips32r2.txt b/test/MC/Disassembler/Mips/mips32r2.txt
deleted file mode 100644
index 354ef74..0000000
--- a/test/MC/Disassembler/Mips/mips32r2.txt
+++ /dev/null
@@ -1,453 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 | FileCheck %s
-# CHECK: abs.d $f12, $f14
-0x46 0x20 0x73 0x05
-
-# CHECK: abs.s $f6, $f7
-0x46 0x00 0x39 0x85
-
-# CHECK: add $9, $6, $7
-0x00 0xc7 0x48 0x20
-
-# CHECK: add.d $f8, $f12, $f14
-0x46 0x2e 0x62 0x00
-
-# CHECK: add.s $f9, $f6, $f7
-0x46 0x07 0x32 0x40
-
-# CHECK: addi $9, $6, 17767
-0x20 0xc9 0x45 0x67
-
-# CHECK: addiu $9, $6, -15001
-0x24 0xc9 0xc5 0x67
-
-# CHECK: addu $9, $6, $7
-0x00 0xc7 0x48 0x21
-
-# CHECK: and $9, $6, $7
-0x00 0xc7 0x48 0x24
-
-# CHECK: andi $9, $6, 17767
-0x30 0xc9 0x45 0x67
-
-# CHECK: b 1332
-0x10 0x00 0x01 0x4c
-
-# CHECK: bc1f 1332
-0x45 0x00 0x01 0x4c
-
-# CHECK: bc1f $fcc7, 1332
-0x45 0x1c 0x01 0x4c
-
-# CHECK: bc1t 1332
-0x45 0x01 0x01 0x4c
-
-# CHECK: bc1t $fcc7, 1332
-0x45 0x1d 0x01 0x4c
-
-# CHECK: beq $9, $6, 1332
-0x11 0x26 0x01 0x4c
-
-# CHECK: bgez  $6, 1332
-0x04 0xc1 0x01 0x4c
-
-# CHECK: bgezal  $6, 1332
-0x04 0xd1 0x01 0x4c
-
-# CHECK: bgtz  $6, 1332
-0x1c 0xc0 0x01 0x4c
-
-# CHECK: blez  $6, 1332
-0x18 0xc0 0x01 0x4c
-
-# CHECK: bne $9, $6, 1332
-0x15 0x26 0x01 0x4c
-
-# CHECK: c.eq.d $f12, $f14
-0x46 0x2e 0x60 0x32
-
-# CHECK: c.eq.s $f6, $f7
-0x46 0x07 0x30 0x32
-
-# CHECK: c.f.d $f12, $f14
-0x46 0x2e 0x60 0x30
-
-# CHECK: c.f.s $f6, $f7
-0x46 0x07 0x30 0x30
-
-# CHECK: c.le.d $f12, $f14
-0x46 0x2e 0x60 0x3e
-
-# CHECK: c.le.s $f6, $f7
-0x46 0x07 0x30 0x3e
-
-# CHECK: c.lt.d $f12, $f14
-0x46 0x2e 0x60 0x3c
-
-# CHECK: c.lt.s $f6, $f7
-0x46 0x07 0x30 0x3c
-
-# CHECK: c.nge.d $f12, $f14
-0x46 0x2e 0x60 0x3d
-
-# CHECK: c.nge.s $f6, $f7
-0x46 0x07 0x30 0x3d
-
-# CHECK: c.ngl.d $f12, $f14
-0x46 0x2e 0x60 0x3b
-
-# CHECK: c.ngl.s $f6, $f7
-0x46 0x07 0x30 0x3b
-
-# CHECK: c.ngle.d $f12, $f14
-0x46 0x2e 0x60 0x39
-
-# CHECK: c.ngle.s $f6, $f7
-0x46 0x07 0x30 0x39
-
-# CHECK: c.ngt.d $f12, $f14
-0x46 0x2e 0x60 0x3f
-
-# CHECK: c.ngt.s $f6, $f7
-0x46 0x07 0x30 0x3f
-
-# CHECK: c.ole.d $f12, $f14
-0x46 0x2e 0x60 0x36
-
-# CHECK: c.ole.s $f6, $f7
-0x46 0x07 0x30 0x36
-
-# CHECK: c.olt.d $f12, $f14
-0x46 0x2e 0x60 0x34
-
-# CHECK: c.olt.s $f6, $f7
-0x46 0x07 0x30 0x34
-
-# CHECK: c.seq.d $f12, $f14
-0x46 0x2e 0x60 0x3a
-
-# CHECK: c.seq.s $f6, $f7
-0x46 0x07 0x30 0x3a
-
-# CHECK: c.sf.d $f12, $f14
-0x46 0x2e 0x60 0x38
-
-# CHECK: c.sf.s $f6, $f7
-0x46 0x07 0x30 0x38
-
-# CHECK: c.ueq.d $f12, $f14
-0x46 0x2e 0x60 0x33
-
-# CHECK: c.ueq.s $f28, $f18
-0x46 0x12 0xe0 0x33
-
-# CHECK: c.ule.d $f12, $f14
-0x46 0x2e 0x60 0x37
-
-# CHECK: c.ule.s $f6, $f7
-0x46 0x07 0x30 0x37
-
-# CHECK: c.ult.d $f12, $f14
-0x46 0x2e 0x60 0x35
-
-# CHECK: c.ult.s $f6, $f7
-0x46 0x07 0x30 0x35
-
-# CHECK: c.un.d $f12, $f14
-0x46 0x2e 0x60 0x31
-
-# CHECK: c.un.s $f6, $f7
-0x46 0x07 0x30 0x31
-
-# CHECK: ceil.w.d $f12, $f14
-0x46 0x20 0x73 0x0e
-
-# CHECK: ceil.w.s $f6, $f7
-0x46 0x00 0x39 0x8e
-
-# CHECK: cfc1  $6, $7
-0x44 0x46 0x38 0x00
-
-# CHECK: clo  $6, $7
-0x70 0xe6 0x30 0x21
-
-# CHECK: clz  $6, $7
-0x70 0xe6 0x30 0x20
-
-# CHECK: ctc1  $6, $7
-0x44 0xc6 0x38 0x00
-
-# CHECK: cvt.d.s $f6, $f7
-0x46 0x00 0x39 0xa1
-
-# CHECK: cvt.d.w $f12, $f14
-0x46 0x80 0x73 0x21
-
-# CHECK: cvt.l.d $f12, $f14
-0x46 0x20 0x73 0x25
-
-# CHECK: cvt.l.s $f6, $f7
-0x46 0x00 0x39 0xa5
-
-# CHECK: cvt.s.d $f12, $f14
-0x46 0x20 0x73 0x20
-
-# CHECK: cvt.s.w $f6, $f7
-0x46 0x80 0x39 0xa0
-
-# CHECK: cvt.w.d $f12, $f14
-0x46 0x20 0x73 0x24
-
-# CHECK: cvt.w.s $f6, $f7
-0x46 0x00 0x39 0xa4
-
-# CHECK: floor.w.d $f12, $f14
-0x46 0x20 0x73 0x0f
-
-# CHECK: floor.w.s $f6, $f7
-0x46 0x00 0x39 0x8f
-
-# CHECK: ins $19, $9, 6, 7
-0x7d 0x33 0x61 0x84
-
-# CHECK: j 1328
-0x08 0x00 0x01 0x4c
-
-# CHECK: jal 1328
-0x0c 0x00 0x01 0x4c
-
-# CHECK: jalx 1328
-0x74 0x00 0x01 0x4c
-
-# CHECK: jalr  $7
-0x00 0xe0 0xf8 0x09
-
-# CHECK: jr  $7
-0x00 0xe0 0x00 0x08
-
-# CHECK: lb  $4, 9158($5)
-0x80 0xa4 0x23 0xc6
-
-# CHECK: lbu $4, 6($5)
-0x90 0xa4 0x00 0x06
-
-# CHECK: ldc1  $f9, 9158($7)
-0xd4 0xe9 0x23 0xc6
-
-# CHECK: lh  $4, 12($5)
-0x84 0xa4 0x00 0x0c
-
-# CHECK: lh  $4, 12($5)
-0x84 0xa4 0x00 0x0c
-
-# CHECK: ll  $9, 9158($7)
-0xc0 0xe9 0x23 0xc6
-
-# CHECK: lui  $6, 17767
-0x3c 0x06 0x45 0x67
-
-# CHECK: luxc1 $f0, $6($5)
-0x4c 0xa6 0x00 0x05
-
-# CHECK: lw  $4, 24($5)
-0x8c 0xa4 0x00 0x18
-
-# CHECK: lwc1  $f9, 9158($7)
-0xc4 0xe9 0x23 0xc6
-
-# CHECK: lwl   $2,  3($4)
-0x88 0x82 0x00 0x03
-
-# CHECK: lwr   $3, 16($5)
-0x98 0xa3 0x00 0x10
-
-# CHECK: lwxc1 $f20, $12($14)
-0x4d 0xcc 0x05 0x00
-
-# CHECK: madd   $6,  $7
-0x70 0xc7 0x00 0x00
-
-# CHECK: maddu  $6,  $7
-0x70 0xc7 0x00 0x01
-
-# CHECK: mfc1   $6, $f7
-0x44 0x06 0x38 0x00
-
-# CHECK: mfhi  $5
-0x00 0x00 0x28 0x10
-
-# CHECK: mflo  $5
-0x00 0x00 0x28 0x12
-
-# CHECK: mov.d $f6, $f8
-0x46 0x20 0x41 0x86
-
-# CHECK: mov.s $f6, $f7
-0x46 0x00 0x39 0x86
-
-# CHECK: msub   $6,  $7
-0x70 0xc7 0x00 0x04
-
-# CHECK: msubu  $6,  $7
-0x70 0xc7 0x00 0x05
-
-# CHECK: mtc1   $6, $f7
-0x44 0x86 0x38 0x00
-
-# CHECK: mthi   $7
-0x00 0xe0 0x00 0x11
-
-# CHECK: mtlo   $7
-0x00 0xe0 0x00 0x13
-
-# CHECK: mul.d $f8, $f12, $f14
-0x46 0x2e 0x62 0x02
-
-# CHECK: mul.s $f9, $f6, $f7
-0x46 0x07 0x32 0x42
-
-# CHECK: mul $9,  $6,  $7
-0x70 0xc7 0x48 0x02
-
-# CHECK: mult  $3, $5
-0x00 0x65 0x00 0x18
-
-# CHECK: multu $3, $5
-0x00 0x65 0x00 0x19
-
-# CHECK: neg.d $f12, $f14
-0x46 0x20 0x73 0x07
-
-# CHECK: neg.s $f6, $f7
-0x46 0x00 0x39 0x87
-
-# CHECK: nop
-0x00 0x00 0x00 0x00
-
-# CHECK: nor $9,  $6, $7
-0x00 0xc7 0x48 0x27
-
-# CHECK: or  $3, $3, $5
-0x00 0x65 0x18 0x25
-
-# CHECK: ori $9,  $6, 17767
-0x34 0xc9 0x45 0x67
-
-# CHECK: rotr $9, $6, 7
-0x00 0x26 0x49 0xc2
-
-# CHECK:  rotrv $9, $6, $7
-0x00 0xe6 0x48 0x46
-
-# CHECK: round.w.d $f12, $f14
-0x46 0x20 0x73 0x0c
-
-# CHECK: round.w.s $f6, $f7
-0x46 0x00 0x39 0x8c
-
-# CHECK: sb  $4, 9158($5)
-0xa0 0xa4 0x23 0xc6
-
-# CHECK: sb  $4, 6($5)
-0xa0 0xa4 0x00 0x06
-
-# CHECK: sc  $9, 9158($7)
-0xe0 0xe9 0x23 0xc6
-
-# CHECK: sdc1  $f9, 9158($7)
-0xf4 0xe9 0x23 0xc6
-
-# CHECK: seb $6, $7
-0x7c 0x07 0x34 0x20
-
-# CHECK: seh $6, $7
-0x7c 0x07 0x36 0x20
-
-# CHECK: sh  $4, 9158($5)
-0xa4 0xa4 0x23 0xc6
-
-# CHECK: sll $4, $3, 7
-0x00 0x03 0x21 0xc0
-
-# CHECK: sllv  $2, $3, $5
-0x00 0xa3 0x10 0x04
-
-# CHECK: slt $3, $3, $5
-0x00 0x65 0x18 0x2a
-
-# CHECK: slti  $3, $3, 103
-0x28 0x63 0x00 0x67
-
-# CHECK: sltiu $3, $3, 103
-0x2c 0x63 0x00 0x67
-
-# CHECK: sltu  $3, $3, $5
-0x00 0x65 0x18 0x2b
-
-# CHECK: sqrt.d  $f12, $f14
-0x46 0x20 0x73 0x04
-
-# CHECK: sqrt.s  $f6, $f7
-0x46 0x00 0x39 0x84
-
-# CHECK: sra $4, $3, 7
-0x00 0x03 0x21 0xc3
-
-# CHECK: srav  $2, $3, $5
-0x00 0xa3 0x10 0x07
-
-# CHECK: srl $4, $3, 7
-0x00 0x03 0x21 0xc2
-
-# CHECK: srlv  $2, $3, $5
-0x00 0xa3 0x10 0x06
-
-# CHECK: sub.d $f8, $f12, $f14
-0x46 0x2e 0x62 0x01
-
-# CHECK: sub.s $f9, $f6, $f7
-0x46 0x07 0x32 0x41
-
-# CHECK: sub $9,  $6, $7
-0x00 0xc7 0x48 0x22
-
-# CHECK: subu  $4, $3, $5
-0x00 0x65 0x20 0x23
-
-# CHECK: suxc1 $f4, $24($5)
-0x4c 0xb8 0x20 0x0d
-
-# CHECK: sw  $4, 24($5)
-0xac 0xa4 0x00 0x18
-
-# CHECK: swc1  $f9, 9158($7)
-0xe4 0xe9 0x23 0xc6
-
-# CHECK: swl $4,  16($5)
-0xa8 0xa4 0x00 0x10
-
-# CHECK: swr $6, 16($7)
-0xb8 0xe6 0x00 0x10
-
-# CHECK: swxc1 $f26, $18($22)
-0x4e 0xd2 0xd0 0x08
-
-# CHECK: sync  7
-0x00 0x00 0x01 0xcf
-
-# CHECK: trunc.w.d $f12, $f14
-0x46 0x20 0x73 0x0d
-
-# CHECK: trunc.w.s $f6, $f7
-0x46 0x00 0x39 0x8d
-
-# CHECK: wsbh  $6, $7
-0x7c 0x07 0x30 0xa0
-
-# CHECK: xor $3, $3, $5
-0x00 0x65 0x18 0x26
-
-# CHECK: xori  $9,  $6, 17767
-0x38 0xc9 0x45 0x67
-
-# CHECK: synci -6137($fp)
-0x07 0xdf 0xe8 0x07
diff --git a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-el.txt b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-el.txt
new file mode 100644
index 0000000..c487b6d
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-el.txt
@@ -0,0 +1,174 @@
+# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r2 | FileCheck %s
+# Try a mips64* triple to confirm that mips* vs mips64* triples no longer have
+# an effect on the disassembler behaviour.
+# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux -mcpu=mips32r2 | FileCheck %s
+0x05 0x73 0x20 0x46 # CHECK: abs.d $f12, $f14
+0x85 0x39 0x00 0x46 # CHECK: abs.s $f6, $f7
+0x20 0x48 0xc7 0x00 # CHECK: add $9, $6, $7
+0x00 0x62 0x2e 0x46 # CHECK: add.d $f8, $f12, $f14
+0x40 0x32 0x07 0x46 # CHECK: add.s $f9, $f6, $f7
+0x67 0x45 0xc9 0x20 # CHECK: addi $9, $6, 17767
+0x67 0xc5 0xc9 0x24 # CHECK: addiu $9, $6, -15001
+0x21 0x48 0xc7 0x00 # CHECK: addu $9, $6, $7
+0x24 0x48 0xc7 0x00 # CHECK: and $9, $6, $7
+0x67 0x45 0xc9 0x30 # CHECK: andi $9, $6, 17767
+0x4c 0x01 0x00 0x10 # CHECK: b 1332
+0x4c 0x01 0x00 0x45 # CHECK: bc1f 1332
+0x4c 0x01 0x1c 0x45 # CHECK: bc1f $fcc7, 1332
+0x4c 0x01 0x01 0x45 # CHECK: bc1t 1332
+0x4c 0x01 0x1d 0x45 # CHECK: bc1t $fcc7, 1332
+0x4c 0x01 0x26 0x11 # CHECK: beq $9, $6, 1332
+0x4c 0x01 0xc1 0x04 # CHECK: bgez $6, 1332
+0x4c 0x01 0xd1 0x04 # CHECK: bgezal $6, 1332
+0x4c 0x01 0xc0 0x1c # CHECK: bgtz $6, 1332
+0x4c 0x01 0xc0 0x18 # CHECK: blez $6, 1332
+0x4c 0x01 0x26 0x15 # CHECK: bne $9, $6, 1332
+0x32 0x60 0x2e 0x46 # CHECK: c.eq.d $f12, $f14
+0x32 0x30 0x07 0x46 # CHECK: c.eq.s $f6, $f7
+0x30 0x60 0x2e 0x46 # CHECK: c.f.d $f12, $f14
+0x30 0x30 0x07 0x46 # CHECK: c.f.s $f6, $f7
+0x3e 0x60 0x2e 0x46 # CHECK: c.le.d $f12, $f14
+0x3e 0x30 0x07 0x46 # CHECK: c.le.s $f6, $f7
+0x3c 0x60 0x2e 0x46 # CHECK: c.lt.d $f12, $f14
+0x3c 0x30 0x07 0x46 # CHECK: c.lt.s $f6, $f7
+0x3d 0x60 0x2e 0x46 # CHECK: c.nge.d $f12, $f14
+0x3d 0x30 0x07 0x46 # CHECK: c.nge.s $f6, $f7
+0x3b 0x60 0x2e 0x46 # CHECK: c.ngl.d $f12, $f14
+0x3b 0x30 0x07 0x46 # CHECK: c.ngl.s $f6, $f7
+0x39 0x60 0x2e 0x46 # CHECK: c.ngle.d $f12, $f14
+0x39 0x30 0x07 0x46 # CHECK: c.ngle.s $f6, $f7
+0x3f 0x60 0x2e 0x46 # CHECK: c.ngt.d $f12, $f14
+0x3f 0x30 0x07 0x46 # CHECK: c.ngt.s $f6, $f7
+0x36 0x60 0x2e 0x46 # CHECK: c.ole.d $f12, $f14
+0x36 0x30 0x07 0x46 # CHECK: c.ole.s $f6, $f7
+0x34 0x60 0x2e 0x46 # CHECK: c.olt.d $f12, $f14
+0x34 0x30 0x07 0x46 # CHECK: c.olt.s $f6, $f7
+0x3a 0x60 0x2e 0x46 # CHECK: c.seq.d $f12, $f14
+0x3a 0x30 0x07 0x46 # CHECK: c.seq.s $f6, $f7
+0x38 0x60 0x2e 0x46 # CHECK: c.sf.d $f12, $f14
+0x38 0x30 0x07 0x46 # CHECK: c.sf.s $f6, $f7
+0x33 0x60 0x2e 0x46 # CHECK: c.ueq.d $f12, $f14
+0x33 0xe0 0x12 0x46 # CHECK: c.ueq.s $f28, $f18
+0x37 0x60 0x2e 0x46 # CHECK: c.ule.d $f12, $f14
+0x37 0x30 0x07 0x46 # CHECK: c.ule.s $f6, $f7
+0x35 0x60 0x2e 0x46 # CHECK: c.ult.d $f12, $f14
+0x35 0x30 0x07 0x46 # CHECK: c.ult.s $f6, $f7
+0x31 0x60 0x2e 0x46 # CHECK: c.un.d $f12, $f14
+0x31 0x30 0x07 0x46 # CHECK: c.un.s $f6, $f7
+0x0e 0x73 0x20 0x46 # CHECK: ceil.w.d $f12, $f14
+0x8e 0x39 0x00 0x46 # CHECK: ceil.w.s $f6, $f7
+0x00 0x38 0x46 0x44 # CHECK: cfc1 $6, $7
+0x21 0x30 0xe6 0x70 # CHECK: clo $6, $7
+0x20 0x30 0xe6 0x70 # CHECK: clz $6, $7
+0x00 0x38 0xc6 0x44 # CHECK: ctc1 $6, $7
+0xa1 0x39 0x00 0x46 # CHECK: cvt.d.s $f6, $f7
+0x21 0x73 0x80 0x46 # CHECK: cvt.d.w $f12, $f14
+0x25 0x73 0x20 0x46 # CHECK: cvt.l.d $f12, $f14
+0xa5 0x39 0x00 0x46 # CHECK: cvt.l.s $f6, $f7
+0x20 0x73 0x20 0x46 # CHECK: cvt.s.d $f12, $f14
+0xa0 0x39 0x80 0x46 # CHECK: cvt.s.w $f6, $f7
+0x24 0x73 0x20 0x46 # CHECK: cvt.w.d $f12, $f14
+0xa4 0x39 0x00 0x46 # CHECK: cvt.w.s $f6, $f7
+0x00 0x60 0x7e 0x41 # CHECK: di $fp
+0x00 0x60 0x60 0x41 # CHECK: di
+0x20 0x60 0x6e 0x41 # CHECK: ei $14
+0x20 0x60 0x60 0x41 # CHECK: ei
+0x0f 0x73 0x20 0x46 # CHECK: floor.w.d $f12, $f14
+0x8f 0x39 0x00 0x46 # CHECK: floor.w.s $f6, $f7
+0x84 0x61 0x33 0x7d # CHECK: ins $19, $9, 6, 7
+0x4c 0x01 0x00 0x08 # CHECK: j 1328
+0x4c 0x01 0x00 0x0c # CHECK: jal 1328
+0x4c 0x01 0x00 0x74 # CHECK: jalx 1328
+0x09 0xf8 0xe0 0x00 # CHECK: jalr $7
+0x09 0xfc 0x80 0x00 # CHECK: jalr.hb $4
+0x09 0x24 0xa0 0x00 # CHECK: jalr.hb $4, $5
+0x08 0x00 0xe0 0x00 # CHECK: jr $7
+0xc6 0x23 0xa4 0x80 # CHECK: lb $4, 9158($5)
+0x06 0x00 0xa4 0x90 # CHECK: lbu $4, 6($5)
+0xc6 0x23 0xe9 0xd4 # CHECK: ldc1 $f9, 9158($7)
+0x01 0x02 0xf7 0x4d # CHECK: ldxc1 $f8, $23($15)
+0x0c 0x00 0xa4 0x84 # CHECK: lh $4, 12($5)
+0x0c 0x00 0xa4 0x84 # CHECK: lh $4, 12($5)
+0xc6 0x23 0xe9 0xc0 # CHECK: ll $9, 9158($7)
+0x67 0x45 0x06 0x3c # CHECK: lui $6, 17767
+0x05 0x00 0xa6 0x4c # CHECK: luxc1 $f0, $6($5)
+0x18 0x00 0xa4 0x8c # CHECK: lw $4, 24($5)
+0xc6 0x23 0xe9 0xc4 # CHECK: lwc1 $f9, 9158($7)
+0x03 0x00 0x82 0x88 # CHECK: lwl $2, 3($4)
+0x10 0x00 0xa3 0x98 # CHECK: lwr $3, 16($5)
+0x00 0x05 0xcc 0x4d # CHECK: lwxc1 $f20, $12($14)
+0x00 0x00 0xc7 0x70 # CHECK: madd $6, $7
+0xa1 0xd4 0x94 0x4e # CHECK: madd.d $f18, $f20, $f26, $f20
+0x60 0x98 0xf9 0x4f # CHECK: madd.s $f1, $f31, $f19, $f25
+0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
+0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
+0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
+0x10 0x28 0x00 0x00 # CHECK: mfhi $5
+0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
+0x12 0x28 0x00 0x00 # CHECK: mflo $5
+0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
+0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
+0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
+0xa9 0xf2 0x52 0x4c # CHECK: msub.d $f10, $f2, $f30, $f18
+0x28 0x53 0x70 0x4e # CHECK: msub.s $f12, $f19, $f10, $f16
+0x05 0x00 0xc7 0x70 # CHECK: msubu $6, $7
+0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
+0x00 0x38 0x86 0x44 # CHECK: mtc1 $6, $f7
+0x11 0x00 0xe0 0x00 # CHECK: mthi $7
+0x00 0x80 0xe0 0x44 # CHECK: mthc1 $zero, $f16
+0x13 0x00 0xe0 0x00 # CHECK: mtlo $7
+0x02 0x62 0x2e 0x46 # CHECK: mul.d $f8, $f12, $f14
+0x42 0x32 0x07 0x46 # CHECK: mul.s $f9, $f6, $f7
+0x02 0x48 0xc7 0x70 # CHECK: mul $9, $6, $7
+0x18 0x00 0x65 0x00 # CHECK: mult $3, $5
+0x19 0x00 0x65 0x00 # CHECK: multu $3, $5
+0x07 0x73 0x20 0x46 # CHECK: neg.d $f12, $f14
+0x87 0x39 0x00 0x46 # CHECK: neg.s $f6, $f7
+0xb1 0x74 0x54 0x4d # CHECK: nmadd.d $f18, $f10, $f14, $f20
+0x30 0xc8 0xac 0x4c # CHECK: nmadd.s $f0, $f5, $f25, $f12
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x27 0x48 0xc7 0x00 # CHECK: nor $9, $6, $7
+0xb9 0x87 0x1e 0x4d # CHECK: nmsub.d $f30, $f8, $f16, $f30
+0x78 0x98 0x04 0x4f # CHECK: nmsub.s $f1, $f24, $f19, $f4
+0x25 0x18 0x65 0x00 # CHECK: or $3, $3, $5
+0x67 0x45 0xc9 0x34 # CHECK: ori $9, $6, 17767
+0xc2 0x49 0x26 0x00 # CHECK: rotr $9, $6, 7
+0x46 0x48 0xe6 0x00 # CHECK: rotrv $9, $6, $7
+0x0c 0x73 0x20 0x46 # CHECK: round.w.d $f12, $f14
+0x8c 0x39 0x00 0x46 # CHECK: round.w.s $f6, $f7
+0xc6 0x23 0xa4 0xa0 # CHECK: sb $4, 9158($5)
+0x06 0x00 0xa4 0xa0 # CHECK: sb $4, 6($5)
+0xc6 0x23 0xe9 0xe0 # CHECK: sc $9, 9158($7)
+0xc6 0x23 0xe9 0xf4 # CHECK: sdc1 $f9, 9158($7)
+0x09 0x40 0x24 0x4f # CHECK: sdxc1 $f8, $4($25)
+0x20 0x34 0x07 0x7c # CHECK: seb $6, $7
+0x20 0x36 0x07 0x7c # CHECK: seh $6, $7
+0xc6 0x23 0xa4 0xa4 # CHECK: sh $4, 9158($5)
+0xc0 0x21 0x03 0x00 # CHECK: sll $4, $3, 7
+0x04 0x10 0xa3 0x00 # CHECK: sllv $2, $3, $5
+0x2a 0x18 0x65 0x00 # CHECK: slt $3, $3, $5
+0x67 0x00 0x63 0x28 # CHECK: slti $3, $3, 103
+0x67 0x00 0x63 0x2c # CHECK: sltiu $3, $3, 103
+0x2b 0x18 0x65 0x00 # CHECK: sltu $3, $3, $5
+0x04 0x73 0x20 0x46 # CHECK: sqrt.d $f12, $f14
+0x84 0x39 0x00 0x46 # CHECK: sqrt.s $f6, $f7
+0xc3 0x21 0x03 0x00 # CHECK: sra $4, $3, 7
+0x07 0x10 0xa3 0x00 # CHECK: srav $2, $3, $5
+0xc2 0x21 0x03 0x00 # CHECK: srl $4, $3, 7
+0x06 0x10 0xa3 0x00 # CHECK: srlv $2, $3, $5
+0x01 0x62 0x2e 0x46 # CHECK: sub.d $f8, $f12, $f14
+0x41 0x32 0x07 0x46 # CHECK: sub.s $f9, $f6, $f7
+0x22 0x48 0xc7 0x00 # CHECK: sub $9, $6, $7
+0x23 0x20 0x65 0x00 # CHECK: subu $4, $3, $5
+0x0d 0x20 0xb8 0x4c # CHECK: suxc1 $f4, $24($5)
+0x18 0x00 0xa4 0xac # CHECK: sw $4, 24($5)
+0xc6 0x23 0xe9 0xe4 # CHECK: swc1 $f9, 9158($7)
+0x10 0x00 0xa4 0xa8 # CHECK: swl $4, 16($5)
+0x10 0x00 0xe6 0xb8 # CHECK: swr $6, 16($7)
+0x08 0xd0 0xd2 0x4e # CHECK: swxc1 $f26, $18($22)
+0xcf 0x01 0x00 0x00 # CHECK: sync 7
+0x0d 0x73 0x20 0x46 # CHECK: trunc.w.d $f12, $f14
+0x8d 0x39 0x00 0x46 # CHECK: trunc.w.s $f6, $f7
+0xa0 0x30 0x07 0x7c # CHECK: wsbh $6, $7
+0x26 0x18 0x65 0x00 # CHECK: xor $3, $3, $5
+0x67 0x45 0xc9 0x38 # CHECK: xori $9, $6, 17767
diff --git a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-le.txt b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-le.txt
deleted file mode 100644
index d0eb13c..0000000
--- a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-le.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r2 | FileCheck %s
-# Try a mips64* triple to confirm that mips* vs mips64* triples no longer have
-# an effect on the disassembler behaviour.
-# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux -mcpu=mips32r2 | FileCheck %s
-0x05 0x73 0x20 0x46 # CHECK: abs.d $f12, $f14
-0x85 0x39 0x00 0x46 # CHECK: abs.s $f6, $f7
-0x20 0x48 0xc7 0x00 # CHECK: add $9, $6, $7
-0x00 0x62 0x2e 0x46 # CHECK: add.d $f8, $f12, $f14
-0x40 0x32 0x07 0x46 # CHECK: add.s $f9, $f6, $f7
-0x67 0x45 0xc9 0x20 # CHECK: addi $9, $6, 17767
-0x67 0xc5 0xc9 0x24 # CHECK: addiu $9, $6, -15001
-0x21 0x48 0xc7 0x00 # CHECK: addu $9, $6, $7
-0x24 0x48 0xc7 0x00 # CHECK: and $9, $6, $7
-0x67 0x45 0xc9 0x30 # CHECK: andi $9, $6, 17767
-0x4c 0x01 0x00 0x10 # CHECK: b 1332
-0x4c 0x01 0x00 0x45 # CHECK: bc1f 1332
-0x4c 0x01 0x1c 0x45 # CHECK: bc1f $fcc7, 1332
-0x4c 0x01 0x01 0x45 # CHECK: bc1t 1332
-0x4c 0x01 0x1d 0x45 # CHECK: bc1t $fcc7, 1332
-0x4c 0x01 0x26 0x11 # CHECK: beq $9, $6, 1332
-0x4c 0x01 0xc1 0x04 # CHECK: bgez $6, 1332
-0x4c 0x01 0xd1 0x04 # CHECK: bgezal $6, 1332
-0x4c 0x01 0xc0 0x1c # CHECK: bgtz $6, 1332
-0x4c 0x01 0xc0 0x18 # CHECK: blez $6, 1332
-0x4c 0x01 0x26 0x15 # CHECK: bne $9, $6, 1332
-0x32 0x60 0x2e 0x46 # CHECK: c.eq.d $f12, $f14
-0x32 0x30 0x07 0x46 # CHECK: c.eq.s $f6, $f7
-0x30 0x60 0x2e 0x46 # CHECK: c.f.d $f12, $f14
-0x30 0x30 0x07 0x46 # CHECK: c.f.s $f6, $f7
-0x3e 0x60 0x2e 0x46 # CHECK: c.le.d $f12, $f14
-0x3e 0x30 0x07 0x46 # CHECK: c.le.s $f6, $f7
-0x3c 0x60 0x2e 0x46 # CHECK: c.lt.d $f12, $f14
-0x3c 0x30 0x07 0x46 # CHECK: c.lt.s $f6, $f7
-0x3d 0x60 0x2e 0x46 # CHECK: c.nge.d $f12, $f14
-0x3d 0x30 0x07 0x46 # CHECK: c.nge.s $f6, $f7
-0x3b 0x60 0x2e 0x46 # CHECK: c.ngl.d $f12, $f14
-0x3b 0x30 0x07 0x46 # CHECK: c.ngl.s $f6, $f7
-0x39 0x60 0x2e 0x46 # CHECK: c.ngle.d $f12, $f14
-0x39 0x30 0x07 0x46 # CHECK: c.ngle.s $f6, $f7
-0x3f 0x60 0x2e 0x46 # CHECK: c.ngt.d $f12, $f14
-0x3f 0x30 0x07 0x46 # CHECK: c.ngt.s $f6, $f7
-0x36 0x60 0x2e 0x46 # CHECK: c.ole.d $f12, $f14
-0x36 0x30 0x07 0x46 # CHECK: c.ole.s $f6, $f7
-0x34 0x60 0x2e 0x46 # CHECK: c.olt.d $f12, $f14
-0x34 0x30 0x07 0x46 # CHECK: c.olt.s $f6, $f7
-0x3a 0x60 0x2e 0x46 # CHECK: c.seq.d $f12, $f14
-0x3a 0x30 0x07 0x46 # CHECK: c.seq.s $f6, $f7
-0x38 0x60 0x2e 0x46 # CHECK: c.sf.d $f12, $f14
-0x38 0x30 0x07 0x46 # CHECK: c.sf.s $f6, $f7
-0x33 0x60 0x2e 0x46 # CHECK: c.ueq.d $f12, $f14
-0x33 0xe0 0x12 0x46 # CHECK: c.ueq.s $f28, $f18
-0x37 0x60 0x2e 0x46 # CHECK: c.ule.d $f12, $f14
-0x37 0x30 0x07 0x46 # CHECK: c.ule.s $f6, $f7
-0x35 0x60 0x2e 0x46 # CHECK: c.ult.d $f12, $f14
-0x35 0x30 0x07 0x46 # CHECK: c.ult.s $f6, $f7
-0x31 0x60 0x2e 0x46 # CHECK: c.un.d $f12, $f14
-0x31 0x30 0x07 0x46 # CHECK: c.un.s $f6, $f7
-0x0e 0x73 0x20 0x46 # CHECK: ceil.w.d $f12, $f14
-0x8e 0x39 0x00 0x46 # CHECK: ceil.w.s $f6, $f7
-0x00 0x38 0x46 0x44 # CHECK: cfc1 $6, $7
-0x21 0x30 0xe6 0x70 # CHECK: clo $6, $7
-0x20 0x30 0xe6 0x70 # CHECK: clz $6, $7
-0x00 0x38 0xc6 0x44 # CHECK: ctc1 $6, $7
-0xa1 0x39 0x00 0x46 # CHECK: cvt.d.s $f6, $f7
-0x21 0x73 0x80 0x46 # CHECK: cvt.d.w $f12, $f14
-0x25 0x73 0x20 0x46 # CHECK: cvt.l.d $f12, $f14
-0xa5 0x39 0x00 0x46 # CHECK: cvt.l.s $f6, $f7
-0x20 0x73 0x20 0x46 # CHECK: cvt.s.d $f12, $f14
-0xa0 0x39 0x80 0x46 # CHECK: cvt.s.w $f6, $f7
-0x24 0x73 0x20 0x46 # CHECK: cvt.w.d $f12, $f14
-0xa4 0x39 0x00 0x46 # CHECK: cvt.w.s $f6, $f7
-0x00 0x60 0x7e 0x41 # CHECK: di $fp
-0x00 0x60 0x60 0x41 # CHECK: di
-0x20 0x60 0x6e 0x41 # CHECK: ei $14
-0x20 0x60 0x60 0x41 # CHECK: ei
-0x0f 0x73 0x20 0x46 # CHECK: floor.w.d $f12, $f14
-0x8f 0x39 0x00 0x46 # CHECK: floor.w.s $f6, $f7
-0x84 0x61 0x33 0x7d # CHECK: ins $19, $9, 6, 7
-0x4c 0x01 0x00 0x08 # CHECK: j 1328
-0x4c 0x01 0x00 0x0c # CHECK: jal 1328
-0x4c 0x01 0x00 0x74 # CHECK: jalx 1328
-0x09 0xf8 0xe0 0x00 # CHECK: jalr $7
-0x09 0xfc 0x80 0x00 # CHECK: jalr.hb $4
-0x09 0x24 0xa0 0x00 # CHECK: jalr.hb $4, $5
-0x08 0x00 0xe0 0x00 # CHECK: jr $7
-0xc6 0x23 0xa4 0x80 # CHECK: lb $4, 9158($5)
-0x06 0x00 0xa4 0x90 # CHECK: lbu $4, 6($5)
-0xc6 0x23 0xe9 0xd4 # CHECK: ldc1 $f9, 9158($7)
-0x01 0x02 0xf7 0x4d # CHECK: ldxc1 $f8, $23($15)
-0x0c 0x00 0xa4 0x84 # CHECK: lh $4, 12($5)
-0x0c 0x00 0xa4 0x84 # CHECK: lh $4, 12($5)
-0xc6 0x23 0xe9 0xc0 # CHECK: ll $9, 9158($7)
-0x67 0x45 0x06 0x3c # CHECK: lui $6, 17767
-0x05 0x00 0xa6 0x4c # CHECK: luxc1 $f0, $6($5)
-0x18 0x00 0xa4 0x8c # CHECK: lw $4, 24($5)
-0xc6 0x23 0xe9 0xc4 # CHECK: lwc1 $f9, 9158($7)
-0x03 0x00 0x82 0x88 # CHECK: lwl $2, 3($4)
-0x10 0x00 0xa3 0x98 # CHECK: lwr $3, 16($5)
-0x00 0x05 0xcc 0x4d # CHECK: lwxc1 $f20, $12($14)
-0x00 0x00 0xc7 0x70 # CHECK: madd $6, $7
-0xa1 0xd4 0x94 0x4e # CHECK: madd.d $f18, $f20, $f26, $f20
-0x60 0x98 0xf9 0x4f # CHECK: madd.s $f1, $f31, $f19, $f25
-0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
-0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
-0x10 0x28 0x00 0x00 # CHECK: mfhi $5
-0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
-0x12 0x28 0x00 0x00 # CHECK: mflo $5
-0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
-0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
-0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
-0xa9 0xf2 0x52 0x4c # CHECK: msub.d $f10, $f2, $f30, $f18
-0x28 0x53 0x70 0x4e # CHECK: msub.s $f12, $f19, $f10, $f16
-0x05 0x00 0xc7 0x70 # CHECK: msubu $6, $7
-0x00 0x38 0x86 0x44 # CHECK: mtc1 $6, $f7
-0x11 0x00 0xe0 0x00 # CHECK: mthi $7
-0x00 0x80 0xe0 0x44 # CHECK: mthc1 $zero, $f16
-0x13 0x00 0xe0 0x00 # CHECK: mtlo $7
-0x02 0x62 0x2e 0x46 # CHECK: mul.d $f8, $f12, $f14
-0x42 0x32 0x07 0x46 # CHECK: mul.s $f9, $f6, $f7
-0x02 0x48 0xc7 0x70 # CHECK: mul $9, $6, $7
-0x18 0x00 0x65 0x00 # CHECK: mult $3, $5
-0x19 0x00 0x65 0x00 # CHECK: multu $3, $5
-0x07 0x73 0x20 0x46 # CHECK: neg.d $f12, $f14
-0x87 0x39 0x00 0x46 # CHECK: neg.s $f6, $f7
-0xb1 0x74 0x54 0x4d # CHECK: nmadd.d $f18, $f10, $f14, $f20
-0x30 0xc8 0xac 0x4c # CHECK: nmadd.s $f0, $f5, $f25, $f12
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x27 0x48 0xc7 0x00 # CHECK: nor $9, $6, $7
-0xb9 0x87 0x1e 0x4d # CHECK: nmsub.d $f30, $f8, $f16, $f30
-0x78 0x98 0x04 0x4f # CHECK: nmsub.s $f1, $f24, $f19, $f4
-0x25 0x18 0x65 0x00 # CHECK: or $3, $3, $5
-0x67 0x45 0xc9 0x34 # CHECK: ori $9, $6, 17767
-0xc2 0x49 0x26 0x00 # CHECK: rotr $9, $6, 7
-0x46 0x48 0xe6 0x00 # CHECK: rotrv $9, $6, $7
-0x0c 0x73 0x20 0x46 # CHECK: round.w.d $f12, $f14
-0x8c 0x39 0x00 0x46 # CHECK: round.w.s $f6, $f7
-0xc6 0x23 0xa4 0xa0 # CHECK: sb $4, 9158($5)
-0x06 0x00 0xa4 0xa0 # CHECK: sb $4, 6($5)
-0xc6 0x23 0xe9 0xe0 # CHECK: sc $9, 9158($7)
-0xc6 0x23 0xe9 0xf4 # CHECK: sdc1 $f9, 9158($7)
-0x09 0x40 0x24 0x4f # CHECK: sdxc1 $f8, $4($25)
-0x20 0x34 0x07 0x7c # CHECK: seb $6, $7
-0x20 0x36 0x07 0x7c # CHECK: seh $6, $7
-0xc6 0x23 0xa4 0xa4 # CHECK: sh $4, 9158($5)
-0xc0 0x21 0x03 0x00 # CHECK: sll $4, $3, 7
-0x04 0x10 0xa3 0x00 # CHECK: sllv $2, $3, $5
-0x2a 0x18 0x65 0x00 # CHECK: slt $3, $3, $5
-0x67 0x00 0x63 0x28 # CHECK: slti $3, $3, 103
-0x67 0x00 0x63 0x2c # CHECK: sltiu $3, $3, 103
-0x2b 0x18 0x65 0x00 # CHECK: sltu $3, $3, $5
-0x04 0x73 0x20 0x46 # CHECK: sqrt.d $f12, $f14
-0x84 0x39 0x00 0x46 # CHECK: sqrt.s $f6, $f7
-0xc3 0x21 0x03 0x00 # CHECK: sra $4, $3, 7
-0x07 0x10 0xa3 0x00 # CHECK: srav $2, $3, $5
-0xc2 0x21 0x03 0x00 # CHECK: srl $4, $3, 7
-0x06 0x10 0xa3 0x00 # CHECK: srlv $2, $3, $5
-0x01 0x62 0x2e 0x46 # CHECK: sub.d $f8, $f12, $f14
-0x41 0x32 0x07 0x46 # CHECK: sub.s $f9, $f6, $f7
-0x22 0x48 0xc7 0x00 # CHECK: sub $9, $6, $7
-0x23 0x20 0x65 0x00 # CHECK: subu $4, $3, $5
-0x0d 0x20 0xb8 0x4c # CHECK: suxc1 $f4, $24($5)
-0x18 0x00 0xa4 0xac # CHECK: sw $4, 24($5)
-0xc6 0x23 0xe9 0xe4 # CHECK: swc1 $f9, 9158($7)
-0x10 0x00 0xa4 0xa8 # CHECK: swl $4, 16($5)
-0x10 0x00 0xe6 0xb8 # CHECK: swr $6, 16($7)
-0x08 0xd0 0xd2 0x4e # CHECK: swxc1 $f26, $18($22)
-0xcf 0x01 0x00 0x00 # CHECK: sync 7
-0x0d 0x73 0x20 0x46 # CHECK: trunc.w.d $f12, $f14
-0x8d 0x39 0x00 0x46 # CHECK: trunc.w.s $f6, $f7
-0xa0 0x30 0x07 0x7c # CHECK: wsbh $6, $7
-0x26 0x18 0x65 0x00 # CHECK: xor $3, $3, $5
-0x67 0x45 0xc9 0x38 # CHECK: xori $9, $6, 17767
diff --git a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt
index 9637835..d013847 100644
--- a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt
+++ b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt
@@ -2,171 +2,175 @@
 # Try a mips64* triple to confirm that mips* vs mips64* triples no longer have
 # an effect on the disassembler behaviour.
 # RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips32r2 | FileCheck %s
-0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
-0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x28 0x10 # CHECK: mfhi $5
+0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
+0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
+0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
+0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
+0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
+0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
+0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
+0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
+0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
+0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
+0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
+0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
-0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
-0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
-0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
-0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
+0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
-0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xe0 0x00 0x08 # CHECK: jr $7
+0x00 0xe0 0x00 0x11 # CHECK: mthi $7
+0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
+0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x07 0xdf 0xe8 0x07 # CHECK: synci -6137($fp)
+0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
+0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
+0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
+0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
+0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
+0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
+0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
+0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
+0x41 0x60 0x60 0x00 # CHECK: di
+0x41 0x60 0x60 0x20 # CHECK: ei
+0x41 0x6e 0x60 0x20 # CHECK: ei $14
+0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
+0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
-0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
-0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
-0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
-0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
-0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
-0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
-0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
-0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
-0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
-0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
+0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
+0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
+0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
+0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
+0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
+0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
+0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
+0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
+0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
-0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
-0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
-0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
+0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
+0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
+0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
+0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
+0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
+0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
+0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
+0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
 0x46 0x07 0x30 0x3c # CHECK: c.lt.s $f6, $f7
-0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
 0x46 0x07 0x30 0x3d # CHECK: c.nge.s $f6, $f7
-0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
-0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
-0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
-0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
-0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
 0x46 0x07 0x30 0x3f # CHECK: c.ngt.s $f6, $f7
-0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
-0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
-0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
-0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
-0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
-0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
-0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
-0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
-0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
+0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
+0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
-0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
-0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
-0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
-0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
-0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
-0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
+0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
+0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
+0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
+0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
 0x46 0x20 0x73 0x0e # CHECK: ceil.w.d $f12, $f14
-0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
-0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
-0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
-0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
-0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
-0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
-0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
-0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
-0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
-0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
-0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
-0x41 0x7e 0x60 0x00 # CHECK: di $fp
-0x41 0x60 0x60 0x00 # CHECK: di
-0x41 0x6e 0x60 0x20 # CHECK: ei $14
-0x41 0x60 0x60 0x20 # CHECK: ei
-0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
-0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
-0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
-0x08 0x00 0x01 0x4c # CHECK: j 1328
-0x0c 0x00 0x01 0x4c # CHECK: jal 1328
-0x74 0x00 0x01 0x4c # CHECK: jalx 1328
-0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
-0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
-0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
-0x00 0xe0 0x00 0x08 # CHECK: jr $7
-0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
-0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
-0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
-0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
-0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
-0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
-0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
-0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
+0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
+0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
+0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
+0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
+0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
+0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
+0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
+0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
+0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
+0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
+0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
+0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x4c 0x52 0xf2 0xa9 # CHECK: msub.d $f10, $f2, $f30, $f18
 0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
-0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
-0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
-0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
-0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
+0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
+0x4d 0x1e 0x87 0xb9 # CHECK: nmsub.d $f30, $f8, $f16, $f30
+0x4d 0x54 0x74 0xb1 # CHECK: nmadd.d $f18, $f10, $f14, $f20
 0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
-0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
+0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
+0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
 0x4e 0x94 0xd4 0xa1 # CHECK: madd.d $f18, $f20, $f26, $f20
+0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
+0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
+0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
 0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
+0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
 0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
-0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
-0x00 0x00 0x28 0x10 # CHECK: mfhi $5
-0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
-0x00 0x00 0x28 0x12 # CHECK: mflo $5
-0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
-0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
 0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
-0x4c 0x52 0xf2 0xa9 # CHECK: msub.d $f10, $f2, $f30, $f18
-0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
 0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
-0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
-0x00 0xe0 0x00 0x11 # CHECK: mthi $7
-0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
-0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
-0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
-0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
 0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
-0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
-0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
-0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
-0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
-0x4d 0x54 0x74 0xb1 # CHECK: nmadd.d $f18, $f10, $f14, $f20
-0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
-0x4d 0x1e 0x87 0xb9 # CHECK: nmsub.d $f30, $f8, $f16, $f30
-0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
-0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
-0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
-0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
-0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
-0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
-0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
-0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
-0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
-0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
-0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
-0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
+0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
+0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
+0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
 0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
 0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
+0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
+0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
+0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
-0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
-0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
-0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
-0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
-0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
-0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
-0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
-0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
-0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
-0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
-0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
-0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
-0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
-0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
-0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
-0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
-0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
-0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
-0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
-0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
-0x00 0x00 0x01 0xcf # CHECK: sync 7
-0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
-0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
-0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
-0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
-0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
diff --git a/test/MC/Disassembler/Mips/mips32r2_le.txt b/test/MC/Disassembler/Mips/mips32r2_le.txt
index 81a05b3..faaed7c 100644
--- a/test/MC/Disassembler/Mips/mips32r2_le.txt
+++ b/test/MC/Disassembler/Mips/mips32r2_le.txt
@@ -269,6 +269,9 @@
 # CHECK: maddu  $6,  $7
 0x01 0x00 0xc7 0x70
 
+# CHECK: mfc0 $8, $16, 4
+0x04 0x80 0x08 0x40
+
 # CHECK: mfc1   $6, $f7
 0x00 0x38 0x06 0x44
 
@@ -290,6 +293,9 @@
 # CHECK: msubu  $6,  $7
 0x05 0x00 0xc7 0x70
 
+# CHECK: mtc0 $9, $15, 1
+0x01 0x78 0x89 0x40
+
 # CHECK: mtc1   $6, $f7
 0x00 0x38 0x86 0x44
 
diff --git a/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3-el.txt b/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3-el.txt
new file mode 100644
index 0000000..37c14de
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3-el.txt
@@ -0,0 +1,171 @@
+# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r3 | FileCheck %s
+0x05 0x73 0x20 0x46 # CHECK: abs.d $f12, $f14
+0x85 0x39 0x00 0x46 # CHECK: abs.s $f6, $f7
+0x20 0x48 0xc7 0x00 # CHECK: add $9, $6, $7
+0x00 0x62 0x2e 0x46 # CHECK: add.d $f8, $f12, $f14
+0x40 0x32 0x07 0x46 # CHECK: add.s $f9, $f6, $f7
+0x67 0x45 0xc9 0x20 # CHECK: addi $9, $6, 17767
+0x67 0xc5 0xc9 0x24 # CHECK: addiu $9, $6, -15001
+0x21 0x48 0xc7 0x00 # CHECK: addu $9, $6, $7
+0x24 0x48 0xc7 0x00 # CHECK: and $9, $6, $7
+0x67 0x45 0xc9 0x30 # CHECK: andi $9, $6, 17767
+0x4c 0x01 0x00 0x10 # CHECK: b 1332
+0x4c 0x01 0x00 0x45 # CHECK: bc1f 1332
+0x4c 0x01 0x1c 0x45 # CHECK: bc1f $fcc7, 1332
+0x4c 0x01 0x01 0x45 # CHECK: bc1t 1332
+0x4c 0x01 0x1d 0x45 # CHECK: bc1t $fcc7, 1332
+0x4c 0x01 0x26 0x11 # CHECK: beq $9, $6, 1332
+0x4c 0x01 0xc1 0x04 # CHECK: bgez $6, 1332
+0x4c 0x01 0xd1 0x04 # CHECK: bgezal $6, 1332
+0x4c 0x01 0xc0 0x1c # CHECK: bgtz $6, 1332
+0x4c 0x01 0xc0 0x18 # CHECK: blez $6, 1332
+0x4c 0x01 0x26 0x15 # CHECK: bne $9, $6, 1332
+0x32 0x60 0x2e 0x46 # CHECK: c.eq.d $f12, $f14
+0x32 0x30 0x07 0x46 # CHECK: c.eq.s $f6, $f7
+0x30 0x60 0x2e 0x46 # CHECK: c.f.d $f12, $f14
+0x30 0x30 0x07 0x46 # CHECK: c.f.s $f6, $f7
+0x3e 0x60 0x2e 0x46 # CHECK: c.le.d $f12, $f14
+0x3e 0x30 0x07 0x46 # CHECK: c.le.s $f6, $f7
+0x3c 0x60 0x2e 0x46 # CHECK: c.lt.d $f12, $f14
+0x3c 0x30 0x07 0x46 # CHECK: c.lt.s $f6, $f7
+0x3d 0x60 0x2e 0x46 # CHECK: c.nge.d $f12, $f14
+0x3d 0x30 0x07 0x46 # CHECK: c.nge.s $f6, $f7
+0x3b 0x60 0x2e 0x46 # CHECK: c.ngl.d $f12, $f14
+0x3b 0x30 0x07 0x46 # CHECK: c.ngl.s $f6, $f7
+0x39 0x60 0x2e 0x46 # CHECK: c.ngle.d $f12, $f14
+0x39 0x30 0x07 0x46 # CHECK: c.ngle.s $f6, $f7
+0x3f 0x60 0x2e 0x46 # CHECK: c.ngt.d $f12, $f14
+0x3f 0x30 0x07 0x46 # CHECK: c.ngt.s $f6, $f7
+0x36 0x60 0x2e 0x46 # CHECK: c.ole.d $f12, $f14
+0x36 0x30 0x07 0x46 # CHECK: c.ole.s $f6, $f7
+0x34 0x60 0x2e 0x46 # CHECK: c.olt.d $f12, $f14
+0x34 0x30 0x07 0x46 # CHECK: c.olt.s $f6, $f7
+0x3a 0x60 0x2e 0x46 # CHECK: c.seq.d $f12, $f14
+0x3a 0x30 0x07 0x46 # CHECK: c.seq.s $f6, $f7
+0x38 0x60 0x2e 0x46 # CHECK: c.sf.d $f12, $f14
+0x38 0x30 0x07 0x46 # CHECK: c.sf.s $f6, $f7
+0x33 0x60 0x2e 0x46 # CHECK: c.ueq.d $f12, $f14
+0x33 0xe0 0x12 0x46 # CHECK: c.ueq.s $f28, $f18
+0x37 0x60 0x2e 0x46 # CHECK: c.ule.d $f12, $f14
+0x37 0x30 0x07 0x46 # CHECK: c.ule.s $f6, $f7
+0x35 0x60 0x2e 0x46 # CHECK: c.ult.d $f12, $f14
+0x35 0x30 0x07 0x46 # CHECK: c.ult.s $f6, $f7
+0x31 0x60 0x2e 0x46 # CHECK: c.un.d $f12, $f14
+0x31 0x30 0x07 0x46 # CHECK: c.un.s $f6, $f7
+0x0e 0x73 0x20 0x46 # CHECK: ceil.w.d $f12, $f14
+0x8e 0x39 0x00 0x46 # CHECK: ceil.w.s $f6, $f7
+0x00 0x38 0x46 0x44 # CHECK: cfc1 $6, $7
+0x21 0x30 0xe6 0x70 # CHECK: clo $6, $7
+0x20 0x30 0xe6 0x70 # CHECK: clz $6, $7
+0x00 0x38 0xc6 0x44 # CHECK: ctc1 $6, $7
+0xa1 0x39 0x00 0x46 # CHECK: cvt.d.s $f6, $f7
+0x21 0x73 0x80 0x46 # CHECK: cvt.d.w $f12, $f14
+0x25 0x73 0x20 0x46 # CHECK: cvt.l.d $f12, $f14
+0xa5 0x39 0x00 0x46 # CHECK: cvt.l.s $f6, $f7
+0x20 0x73 0x20 0x46 # CHECK: cvt.s.d $f12, $f14
+0xa0 0x39 0x80 0x46 # CHECK: cvt.s.w $f6, $f7
+0x24 0x73 0x20 0x46 # CHECK: cvt.w.d $f12, $f14
+0xa4 0x39 0x00 0x46 # CHECK: cvt.w.s $f6, $f7
+0x00 0x60 0x7e 0x41 # CHECK: di $fp
+0x00 0x60 0x60 0x41 # CHECK: di
+0x20 0x60 0x6e 0x41 # CHECK: ei $14
+0x20 0x60 0x60 0x41 # CHECK: ei
+0x0f 0x73 0x20 0x46 # CHECK: floor.w.d $f12, $f14
+0x8f 0x39 0x00 0x46 # CHECK: floor.w.s $f6, $f7
+0x84 0x61 0x33 0x7d # CHECK: ins $19, $9, 6, 7
+0x4c 0x01 0x00 0x08 # CHECK: j 1328
+0x4c 0x01 0x00 0x0c # CHECK: jal 1328
+0x4c 0x01 0x00 0x74 # CHECK: jalx 1328
+0x09 0xf8 0xe0 0x00 # CHECK: jalr $7
+0x09 0xfc 0x80 0x00 # CHECK: jalr.hb $4
+0x09 0x24 0xa0 0x00 # CHECK: jalr.hb $4, $5
+0x08 0x00 0xe0 0x00 # CHECK: jr $7
+0xc6 0x23 0xa4 0x80 # CHECK: lb $4, 9158($5)
+0x06 0x00 0xa4 0x90 # CHECK: lbu $4, 6($5)
+0xc6 0x23 0xe9 0xd4 # CHECK: ldc1 $f9, 9158($7)
+0x01 0x02 0xf7 0x4d # CHECK: ldxc1 $f8, $23($15)
+0x0c 0x00 0xa4 0x84 # CHECK: lh $4, 12($5)
+0x0c 0x00 0xa4 0x84 # CHECK: lh $4, 12($5)
+0xc6 0x23 0xe9 0xc0 # CHECK: ll $9, 9158($7)
+0x67 0x45 0x06 0x3c # CHECK: lui $6, 17767
+0x05 0x00 0xa6 0x4c # CHECK: luxc1 $f0, $6($5)
+0x18 0x00 0xa4 0x8c # CHECK: lw $4, 24($5)
+0xc6 0x23 0xe9 0xc4 # CHECK: lwc1 $f9, 9158($7)
+0x03 0x00 0x82 0x88 # CHECK: lwl $2, 3($4)
+0x10 0x00 0xa3 0x98 # CHECK: lwr $3, 16($5)
+0x00 0x05 0xcc 0x4d # CHECK: lwxc1 $f20, $12($14)
+0x00 0x00 0xc7 0x70 # CHECK: madd $6, $7
+0xa1 0xd4 0x94 0x4e # CHECK: madd.d $f18, $f20, $f26, $f20
+0x60 0x98 0xf9 0x4f # CHECK: madd.s $f1, $f31, $f19, $f25
+0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
+0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
+0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
+0x10 0x28 0x00 0x00 # CHECK: mfhi $5
+0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
+0x12 0x28 0x00 0x00 # CHECK: mflo $5
+0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
+0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
+0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
+0xa9 0xf2 0x52 0x4c # CHECK: msub.d $f10, $f2, $f30, $f18
+0x28 0x53 0x70 0x4e # CHECK: msub.s $f12, $f19, $f10, $f16
+0x05 0x00 0xc7 0x70 # CHECK: msubu $6, $7
+0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
+0x00 0x38 0x86 0x44 # CHECK: mtc1 $6, $f7
+0x11 0x00 0xe0 0x00 # CHECK: mthi $7
+0x00 0x80 0xe0 0x44 # CHECK: mthc1 $zero, $f16
+0x13 0x00 0xe0 0x00 # CHECK: mtlo $7
+0x02 0x62 0x2e 0x46 # CHECK: mul.d $f8, $f12, $f14
+0x42 0x32 0x07 0x46 # CHECK: mul.s $f9, $f6, $f7
+0x02 0x48 0xc7 0x70 # CHECK: mul $9, $6, $7
+0x18 0x00 0x65 0x00 # CHECK: mult $3, $5
+0x19 0x00 0x65 0x00 # CHECK: multu $3, $5
+0x07 0x73 0x20 0x46 # CHECK: neg.d $f12, $f14
+0x87 0x39 0x00 0x46 # CHECK: neg.s $f6, $f7
+0xb1 0x74 0x54 0x4d # CHECK: nmadd.d $f18, $f10, $f14, $f20
+0x30 0xc8 0xac 0x4c # CHECK: nmadd.s $f0, $f5, $f25, $f12
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x27 0x48 0xc7 0x00 # CHECK: nor $9, $6, $7
+0xb9 0x87 0x1e 0x4d # CHECK: nmsub.d $f30, $f8, $f16, $f30
+0x78 0x98 0x04 0x4f # CHECK: nmsub.s $f1, $f24, $f19, $f4
+0x25 0x18 0x65 0x00 # CHECK: or $3, $3, $5
+0x67 0x45 0xc9 0x34 # CHECK: ori $9, $6, 17767
+0xc2 0x49 0x26 0x00 # CHECK: rotr $9, $6, 7
+0x46 0x48 0xe6 0x00 # CHECK: rotrv $9, $6, $7
+0x0c 0x73 0x20 0x46 # CHECK: round.w.d $f12, $f14
+0x8c 0x39 0x00 0x46 # CHECK: round.w.s $f6, $f7
+0xc6 0x23 0xa4 0xa0 # CHECK: sb $4, 9158($5)
+0x06 0x00 0xa4 0xa0 # CHECK: sb $4, 6($5)
+0xc6 0x23 0xe9 0xe0 # CHECK: sc $9, 9158($7)
+0xc6 0x23 0xe9 0xf4 # CHECK: sdc1 $f9, 9158($7)
+0x09 0x40 0x24 0x4f # CHECK: sdxc1 $f8, $4($25)
+0x20 0x34 0x07 0x7c # CHECK: seb $6, $7
+0x20 0x36 0x07 0x7c # CHECK: seh $6, $7
+0xc6 0x23 0xa4 0xa4 # CHECK: sh $4, 9158($5)
+0xc0 0x21 0x03 0x00 # CHECK: sll $4, $3, 7
+0x04 0x10 0xa3 0x00 # CHECK: sllv $2, $3, $5
+0x2a 0x18 0x65 0x00 # CHECK: slt $3, $3, $5
+0x67 0x00 0x63 0x28 # CHECK: slti $3, $3, 103
+0x67 0x00 0x63 0x2c # CHECK: sltiu $3, $3, 103
+0x2b 0x18 0x65 0x00 # CHECK: sltu $3, $3, $5
+0x04 0x73 0x20 0x46 # CHECK: sqrt.d $f12, $f14
+0x84 0x39 0x00 0x46 # CHECK: sqrt.s $f6, $f7
+0xc3 0x21 0x03 0x00 # CHECK: sra $4, $3, 7
+0x07 0x10 0xa3 0x00 # CHECK: srav $2, $3, $5
+0xc2 0x21 0x03 0x00 # CHECK: srl $4, $3, 7
+0x06 0x10 0xa3 0x00 # CHECK: srlv $2, $3, $5
+0x01 0x62 0x2e 0x46 # CHECK: sub.d $f8, $f12, $f14
+0x41 0x32 0x07 0x46 # CHECK: sub.s $f9, $f6, $f7
+0x22 0x48 0xc7 0x00 # CHECK: sub $9, $6, $7
+0x23 0x20 0x65 0x00 # CHECK: subu $4, $3, $5
+0x0d 0x20 0xb8 0x4c # CHECK: suxc1 $f4, $24($5)
+0x18 0x00 0xa4 0xac # CHECK: sw $4, 24($5)
+0xc6 0x23 0xe9 0xe4 # CHECK: swc1 $f9, 9158($7)
+0x10 0x00 0xa4 0xa8 # CHECK: swl $4, 16($5)
+0x10 0x00 0xe6 0xb8 # CHECK: swr $6, 16($7)
+0x08 0xd0 0xd2 0x4e # CHECK: swxc1 $f26, $18($22)
+0xcf 0x01 0x00 0x00 # CHECK: sync 7
+0x0d 0x73 0x20 0x46 # CHECK: trunc.w.d $f12, $f14
+0x8d 0x39 0x00 0x46 # CHECK: trunc.w.s $f6, $f7
+0xa0 0x30 0x07 0x7c # CHECK: wsbh $6, $7
+0x26 0x18 0x65 0x00 # CHECK: xor $3, $3, $5
+0x67 0x45 0xc9 0x38 # CHECK: xori $9, $6, 17767
diff --git a/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3-le.txt b/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3-le.txt
deleted file mode 100644
index 1909e2a..0000000
--- a/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3-le.txt
+++ /dev/null
@@ -1,169 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r3 | FileCheck %s
-0x05 0x73 0x20 0x46 # CHECK: abs.d $f12, $f14
-0x85 0x39 0x00 0x46 # CHECK: abs.s $f6, $f7
-0x20 0x48 0xc7 0x00 # CHECK: add $9, $6, $7
-0x00 0x62 0x2e 0x46 # CHECK: add.d $f8, $f12, $f14
-0x40 0x32 0x07 0x46 # CHECK: add.s $f9, $f6, $f7
-0x67 0x45 0xc9 0x20 # CHECK: addi $9, $6, 17767
-0x67 0xc5 0xc9 0x24 # CHECK: addiu $9, $6, -15001
-0x21 0x48 0xc7 0x00 # CHECK: addu $9, $6, $7
-0x24 0x48 0xc7 0x00 # CHECK: and $9, $6, $7
-0x67 0x45 0xc9 0x30 # CHECK: andi $9, $6, 17767
-0x4c 0x01 0x00 0x10 # CHECK: b 1332
-0x4c 0x01 0x00 0x45 # CHECK: bc1f 1332
-0x4c 0x01 0x1c 0x45 # CHECK: bc1f $fcc7, 1332
-0x4c 0x01 0x01 0x45 # CHECK: bc1t 1332
-0x4c 0x01 0x1d 0x45 # CHECK: bc1t $fcc7, 1332
-0x4c 0x01 0x26 0x11 # CHECK: beq $9, $6, 1332
-0x4c 0x01 0xc1 0x04 # CHECK: bgez $6, 1332
-0x4c 0x01 0xd1 0x04 # CHECK: bgezal $6, 1332
-0x4c 0x01 0xc0 0x1c # CHECK: bgtz $6, 1332
-0x4c 0x01 0xc0 0x18 # CHECK: blez $6, 1332
-0x4c 0x01 0x26 0x15 # CHECK: bne $9, $6, 1332
-0x32 0x60 0x2e 0x46 # CHECK: c.eq.d $f12, $f14
-0x32 0x30 0x07 0x46 # CHECK: c.eq.s $f6, $f7
-0x30 0x60 0x2e 0x46 # CHECK: c.f.d $f12, $f14
-0x30 0x30 0x07 0x46 # CHECK: c.f.s $f6, $f7
-0x3e 0x60 0x2e 0x46 # CHECK: c.le.d $f12, $f14
-0x3e 0x30 0x07 0x46 # CHECK: c.le.s $f6, $f7
-0x3c 0x60 0x2e 0x46 # CHECK: c.lt.d $f12, $f14
-0x3c 0x30 0x07 0x46 # CHECK: c.lt.s $f6, $f7
-0x3d 0x60 0x2e 0x46 # CHECK: c.nge.d $f12, $f14
-0x3d 0x30 0x07 0x46 # CHECK: c.nge.s $f6, $f7
-0x3b 0x60 0x2e 0x46 # CHECK: c.ngl.d $f12, $f14
-0x3b 0x30 0x07 0x46 # CHECK: c.ngl.s $f6, $f7
-0x39 0x60 0x2e 0x46 # CHECK: c.ngle.d $f12, $f14
-0x39 0x30 0x07 0x46 # CHECK: c.ngle.s $f6, $f7
-0x3f 0x60 0x2e 0x46 # CHECK: c.ngt.d $f12, $f14
-0x3f 0x30 0x07 0x46 # CHECK: c.ngt.s $f6, $f7
-0x36 0x60 0x2e 0x46 # CHECK: c.ole.d $f12, $f14
-0x36 0x30 0x07 0x46 # CHECK: c.ole.s $f6, $f7
-0x34 0x60 0x2e 0x46 # CHECK: c.olt.d $f12, $f14
-0x34 0x30 0x07 0x46 # CHECK: c.olt.s $f6, $f7
-0x3a 0x60 0x2e 0x46 # CHECK: c.seq.d $f12, $f14
-0x3a 0x30 0x07 0x46 # CHECK: c.seq.s $f6, $f7
-0x38 0x60 0x2e 0x46 # CHECK: c.sf.d $f12, $f14
-0x38 0x30 0x07 0x46 # CHECK: c.sf.s $f6, $f7
-0x33 0x60 0x2e 0x46 # CHECK: c.ueq.d $f12, $f14
-0x33 0xe0 0x12 0x46 # CHECK: c.ueq.s $f28, $f18
-0x37 0x60 0x2e 0x46 # CHECK: c.ule.d $f12, $f14
-0x37 0x30 0x07 0x46 # CHECK: c.ule.s $f6, $f7
-0x35 0x60 0x2e 0x46 # CHECK: c.ult.d $f12, $f14
-0x35 0x30 0x07 0x46 # CHECK: c.ult.s $f6, $f7
-0x31 0x60 0x2e 0x46 # CHECK: c.un.d $f12, $f14
-0x31 0x30 0x07 0x46 # CHECK: c.un.s $f6, $f7
-0x0e 0x73 0x20 0x46 # CHECK: ceil.w.d $f12, $f14
-0x8e 0x39 0x00 0x46 # CHECK: ceil.w.s $f6, $f7
-0x00 0x38 0x46 0x44 # CHECK: cfc1 $6, $7
-0x21 0x30 0xe6 0x70 # CHECK: clo $6, $7
-0x20 0x30 0xe6 0x70 # CHECK: clz $6, $7
-0x00 0x38 0xc6 0x44 # CHECK: ctc1 $6, $7
-0xa1 0x39 0x00 0x46 # CHECK: cvt.d.s $f6, $f7
-0x21 0x73 0x80 0x46 # CHECK: cvt.d.w $f12, $f14
-0x25 0x73 0x20 0x46 # CHECK: cvt.l.d $f12, $f14
-0xa5 0x39 0x00 0x46 # CHECK: cvt.l.s $f6, $f7
-0x20 0x73 0x20 0x46 # CHECK: cvt.s.d $f12, $f14
-0xa0 0x39 0x80 0x46 # CHECK: cvt.s.w $f6, $f7
-0x24 0x73 0x20 0x46 # CHECK: cvt.w.d $f12, $f14
-0xa4 0x39 0x00 0x46 # CHECK: cvt.w.s $f6, $f7
-0x00 0x60 0x7e 0x41 # CHECK: di $fp
-0x00 0x60 0x60 0x41 # CHECK: di
-0x20 0x60 0x6e 0x41 # CHECK: ei $14
-0x20 0x60 0x60 0x41 # CHECK: ei
-0x0f 0x73 0x20 0x46 # CHECK: floor.w.d $f12, $f14
-0x8f 0x39 0x00 0x46 # CHECK: floor.w.s $f6, $f7
-0x84 0x61 0x33 0x7d # CHECK: ins $19, $9, 6, 7
-0x4c 0x01 0x00 0x08 # CHECK: j 1328
-0x4c 0x01 0x00 0x0c # CHECK: jal 1328
-0x4c 0x01 0x00 0x74 # CHECK: jalx 1328
-0x09 0xf8 0xe0 0x00 # CHECK: jalr $7
-0x09 0xfc 0x80 0x00 # CHECK: jalr.hb $4
-0x09 0x24 0xa0 0x00 # CHECK: jalr.hb $4, $5
-0x08 0x00 0xe0 0x00 # CHECK: jr $7
-0xc6 0x23 0xa4 0x80 # CHECK: lb $4, 9158($5)
-0x06 0x00 0xa4 0x90 # CHECK: lbu $4, 6($5)
-0xc6 0x23 0xe9 0xd4 # CHECK: ldc1 $f9, 9158($7)
-0x01 0x02 0xf7 0x4d # CHECK: ldxc1 $f8, $23($15)
-0x0c 0x00 0xa4 0x84 # CHECK: lh $4, 12($5)
-0x0c 0x00 0xa4 0x84 # CHECK: lh $4, 12($5)
-0xc6 0x23 0xe9 0xc0 # CHECK: ll $9, 9158($7)
-0x67 0x45 0x06 0x3c # CHECK: lui $6, 17767
-0x05 0x00 0xa6 0x4c # CHECK: luxc1 $f0, $6($5)
-0x18 0x00 0xa4 0x8c # CHECK: lw $4, 24($5)
-0xc6 0x23 0xe9 0xc4 # CHECK: lwc1 $f9, 9158($7)
-0x03 0x00 0x82 0x88 # CHECK: lwl $2, 3($4)
-0x10 0x00 0xa3 0x98 # CHECK: lwr $3, 16($5)
-0x00 0x05 0xcc 0x4d # CHECK: lwxc1 $f20, $12($14)
-0x00 0x00 0xc7 0x70 # CHECK: madd $6, $7
-0xa1 0xd4 0x94 0x4e # CHECK: madd.d $f18, $f20, $f26, $f20
-0x60 0x98 0xf9 0x4f # CHECK: madd.s $f1, $f31, $f19, $f25
-0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
-0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
-0x10 0x28 0x00 0x00 # CHECK: mfhi $5
-0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
-0x12 0x28 0x00 0x00 # CHECK: mflo $5
-0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
-0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
-0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
-0xa9 0xf2 0x52 0x4c # CHECK: msub.d $f10, $f2, $f30, $f18
-0x28 0x53 0x70 0x4e # CHECK: msub.s $f12, $f19, $f10, $f16
-0x05 0x00 0xc7 0x70 # CHECK: msubu $6, $7
-0x00 0x38 0x86 0x44 # CHECK: mtc1 $6, $f7
-0x11 0x00 0xe0 0x00 # CHECK: mthi $7
-0x00 0x80 0xe0 0x44 # CHECK: mthc1 $zero, $f16
-0x13 0x00 0xe0 0x00 # CHECK: mtlo $7
-0x02 0x62 0x2e 0x46 # CHECK: mul.d $f8, $f12, $f14
-0x42 0x32 0x07 0x46 # CHECK: mul.s $f9, $f6, $f7
-0x02 0x48 0xc7 0x70 # CHECK: mul $9, $6, $7
-0x18 0x00 0x65 0x00 # CHECK: mult $3, $5
-0x19 0x00 0x65 0x00 # CHECK: multu $3, $5
-0x07 0x73 0x20 0x46 # CHECK: neg.d $f12, $f14
-0x87 0x39 0x00 0x46 # CHECK: neg.s $f6, $f7
-0xb1 0x74 0x54 0x4d # CHECK: nmadd.d $f18, $f10, $f14, $f20
-0x30 0xc8 0xac 0x4c # CHECK: nmadd.s $f0, $f5, $f25, $f12
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x27 0x48 0xc7 0x00 # CHECK: nor $9, $6, $7
-0xb9 0x87 0x1e 0x4d # CHECK: nmsub.d $f30, $f8, $f16, $f30
-0x78 0x98 0x04 0x4f # CHECK: nmsub.s $f1, $f24, $f19, $f4
-0x25 0x18 0x65 0x00 # CHECK: or $3, $3, $5
-0x67 0x45 0xc9 0x34 # CHECK: ori $9, $6, 17767
-0xc2 0x49 0x26 0x00 # CHECK: rotr $9, $6, 7
-0x46 0x48 0xe6 0x00 # CHECK: rotrv $9, $6, $7
-0x0c 0x73 0x20 0x46 # CHECK: round.w.d $f12, $f14
-0x8c 0x39 0x00 0x46 # CHECK: round.w.s $f6, $f7
-0xc6 0x23 0xa4 0xa0 # CHECK: sb $4, 9158($5)
-0x06 0x00 0xa4 0xa0 # CHECK: sb $4, 6($5)
-0xc6 0x23 0xe9 0xe0 # CHECK: sc $9, 9158($7)
-0xc6 0x23 0xe9 0xf4 # CHECK: sdc1 $f9, 9158($7)
-0x09 0x40 0x24 0x4f # CHECK: sdxc1 $f8, $4($25)
-0x20 0x34 0x07 0x7c # CHECK: seb $6, $7
-0x20 0x36 0x07 0x7c # CHECK: seh $6, $7
-0xc6 0x23 0xa4 0xa4 # CHECK: sh $4, 9158($5)
-0xc0 0x21 0x03 0x00 # CHECK: sll $4, $3, 7
-0x04 0x10 0xa3 0x00 # CHECK: sllv $2, $3, $5
-0x2a 0x18 0x65 0x00 # CHECK: slt $3, $3, $5
-0x67 0x00 0x63 0x28 # CHECK: slti $3, $3, 103
-0x67 0x00 0x63 0x2c # CHECK: sltiu $3, $3, 103
-0x2b 0x18 0x65 0x00 # CHECK: sltu $3, $3, $5
-0x04 0x73 0x20 0x46 # CHECK: sqrt.d $f12, $f14
-0x84 0x39 0x00 0x46 # CHECK: sqrt.s $f6, $f7
-0xc3 0x21 0x03 0x00 # CHECK: sra $4, $3, 7
-0x07 0x10 0xa3 0x00 # CHECK: srav $2, $3, $5
-0xc2 0x21 0x03 0x00 # CHECK: srl $4, $3, 7
-0x06 0x10 0xa3 0x00 # CHECK: srlv $2, $3, $5
-0x01 0x62 0x2e 0x46 # CHECK: sub.d $f8, $f12, $f14
-0x41 0x32 0x07 0x46 # CHECK: sub.s $f9, $f6, $f7
-0x22 0x48 0xc7 0x00 # CHECK: sub $9, $6, $7
-0x23 0x20 0x65 0x00 # CHECK: subu $4, $3, $5
-0x0d 0x20 0xb8 0x4c # CHECK: suxc1 $f4, $24($5)
-0x18 0x00 0xa4 0xac # CHECK: sw $4, 24($5)
-0xc6 0x23 0xe9 0xe4 # CHECK: swc1 $f9, 9158($7)
-0x10 0x00 0xa4 0xa8 # CHECK: swl $4, 16($5)
-0x10 0x00 0xe6 0xb8 # CHECK: swr $6, 16($7)
-0x08 0xd0 0xd2 0x4e # CHECK: swxc1 $f26, $18($22)
-0xcf 0x01 0x00 0x00 # CHECK: sync 7
-0x0d 0x73 0x20 0x46 # CHECK: trunc.w.d $f12, $f14
-0x8d 0x39 0x00 0x46 # CHECK: trunc.w.s $f6, $f7
-0xa0 0x30 0x07 0x7c # CHECK: wsbh $6, $7
-0x26 0x18 0x65 0x00 # CHECK: xor $3, $3, $5
-0x67 0x45 0xc9 0x38 # CHECK: xori $9, $6, 17767
diff --git a/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3.txt b/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3.txt
index a273c24..cf9e986 100644
--- a/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3.txt
+++ b/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3.txt
@@ -1,169 +1,171 @@
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r3 | FileCheck %s
-0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
-0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x28 0x10 # CHECK: mfhi $5
+0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
+0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
+0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
+0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
+0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
+0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
+0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
+0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
+0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
+0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
+0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
+0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
-0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
-0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
-0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
-0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
+0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
-0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xe0 0x00 0x08 # CHECK: jr $7
+0x00 0xe0 0x00 0x11 # CHECK: mthi $7
+0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
+0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
+0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
+0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
+0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
+0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
+0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
+0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
+0x41 0x60 0x60 0x00 # CHECK: di
+0x41 0x60 0x60 0x20 # CHECK: ei
+0x41 0x6e 0x60 0x20 # CHECK: ei $14
+0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
+0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
-0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
-0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
-0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
-0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
-0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
-0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
-0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
-0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
-0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
-0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
+0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
+0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
+0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
+0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
+0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
+0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
+0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
+0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
+0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
-0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
-0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
-0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
+0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
+0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
+0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
+0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
+0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
+0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
+0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
+0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
 0x46 0x07 0x30 0x3c # CHECK: c.lt.s $f6, $f7
-0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
 0x46 0x07 0x30 0x3d # CHECK: c.nge.s $f6, $f7
-0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
-0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
-0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
-0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
-0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
 0x46 0x07 0x30 0x3f # CHECK: c.ngt.s $f6, $f7
-0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
-0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
-0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
-0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
-0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
-0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
-0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
-0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
-0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
+0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
+0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
-0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
-0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
-0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
-0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
-0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
-0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
+0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
+0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
+0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
+0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
 0x46 0x20 0x73 0x0e # CHECK: ceil.w.d $f12, $f14
-0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
-0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
-0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
-0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
-0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
-0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
-0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
-0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
-0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
-0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
-0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
-0x41 0x7e 0x60 0x00 # CHECK: di $fp
-0x41 0x60 0x60 0x00 # CHECK: di
-0x41 0x6e 0x60 0x20 # CHECK: ei $14
-0x41 0x60 0x60 0x20 # CHECK: ei
-0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
-0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
-0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
-0x08 0x00 0x01 0x4c # CHECK: j 1328
-0x0c 0x00 0x01 0x4c # CHECK: jal 1328
-0x74 0x00 0x01 0x4c # CHECK: jalx 1328
-0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
-0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
-0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
-0x00 0xe0 0x00 0x08 # CHECK: jr $7
-0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
-0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
-0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
-0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
-0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
-0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
-0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
-0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
+0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
+0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
+0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
+0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
+0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
+0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
+0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
+0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
+0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
+0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
+0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
+0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x4c 0x52 0xf2 0xa9 # CHECK: msub.d $f10, $f2, $f30, $f18
 0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
-0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
-0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
-0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
-0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
+0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
+0x4d 0x1e 0x87 0xb9 # CHECK: nmsub.d $f30, $f8, $f16, $f30
+0x4d 0x54 0x74 0xb1 # CHECK: nmadd.d $f18, $f10, $f14, $f20
 0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
-0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
+0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
+0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
 0x4e 0x94 0xd4 0xa1 # CHECK: madd.d $f18, $f20, $f26, $f20
+0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
+0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
+0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
 0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
+0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
 0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
-0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
-0x00 0x00 0x28 0x10 # CHECK: mfhi $5
-0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
-0x00 0x00 0x28 0x12 # CHECK: mflo $5
-0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
-0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
 0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
-0x4c 0x52 0xf2 0xa9 # CHECK: msub.d $f10, $f2, $f30, $f18
-0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
 0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
-0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
-0x00 0xe0 0x00 0x11 # CHECK: mthi $7
-0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
-0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
-0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
-0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
 0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
-0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
-0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
-0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
-0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
-0x4d 0x54 0x74 0xb1 # CHECK: nmadd.d $f18, $f10, $f14, $f20
-0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
-0x4d 0x1e 0x87 0xb9 # CHECK: nmsub.d $f30, $f8, $f16, $f30
-0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
-0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
-0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
-0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
-0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
-0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
-0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
-0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
-0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
-0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
-0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
-0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
+0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
+0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
+0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
 0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
 0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
+0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
+0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
+0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
-0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
-0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
-0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
-0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
-0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
-0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
-0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
-0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
-0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
-0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
-0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
-0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
-0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
-0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
-0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
-0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
-0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
-0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
-0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
-0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
-0x00 0x00 0x01 0xcf # CHECK: sync 7
-0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
-0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
-0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
-0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
-0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
diff --git a/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5-el.txt b/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5-el.txt
new file mode 100644
index 0000000..b68089b
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5-el.txt
@@ -0,0 +1,171 @@
+# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r5 | FileCheck %s
+0x05 0x73 0x20 0x46 # CHECK: abs.d $f12, $f14
+0x85 0x39 0x00 0x46 # CHECK: abs.s $f6, $f7
+0x20 0x48 0xc7 0x00 # CHECK: add $9, $6, $7
+0x00 0x62 0x2e 0x46 # CHECK: add.d $f8, $f12, $f14
+0x40 0x32 0x07 0x46 # CHECK: add.s $f9, $f6, $f7
+0x67 0x45 0xc9 0x20 # CHECK: addi $9, $6, 17767
+0x67 0xc5 0xc9 0x24 # CHECK: addiu $9, $6, -15001
+0x21 0x48 0xc7 0x00 # CHECK: addu $9, $6, $7
+0x24 0x48 0xc7 0x00 # CHECK: and $9, $6, $7
+0x67 0x45 0xc9 0x30 # CHECK: andi $9, $6, 17767
+0x4c 0x01 0x00 0x10 # CHECK: b 1332
+0x4c 0x01 0x00 0x45 # CHECK: bc1f 1332
+0x4c 0x01 0x1c 0x45 # CHECK: bc1f $fcc7, 1332
+0x4c 0x01 0x01 0x45 # CHECK: bc1t 1332
+0x4c 0x01 0x1d 0x45 # CHECK: bc1t $fcc7, 1332
+0x4c 0x01 0x26 0x11 # CHECK: beq $9, $6, 1332
+0x4c 0x01 0xc1 0x04 # CHECK: bgez $6, 1332
+0x4c 0x01 0xd1 0x04 # CHECK: bgezal $6, 1332
+0x4c 0x01 0xc0 0x1c # CHECK: bgtz $6, 1332
+0x4c 0x01 0xc0 0x18 # CHECK: blez $6, 1332
+0x4c 0x01 0x26 0x15 # CHECK: bne $9, $6, 1332
+0x32 0x60 0x2e 0x46 # CHECK: c.eq.d $f12, $f14
+0x32 0x30 0x07 0x46 # CHECK: c.eq.s $f6, $f7
+0x30 0x60 0x2e 0x46 # CHECK: c.f.d $f12, $f14
+0x30 0x30 0x07 0x46 # CHECK: c.f.s $f6, $f7
+0x3e 0x60 0x2e 0x46 # CHECK: c.le.d $f12, $f14
+0x3e 0x30 0x07 0x46 # CHECK: c.le.s $f6, $f7
+0x3c 0x60 0x2e 0x46 # CHECK: c.lt.d $f12, $f14
+0x3c 0x30 0x07 0x46 # CHECK: c.lt.s $f6, $f7
+0x3d 0x60 0x2e 0x46 # CHECK: c.nge.d $f12, $f14
+0x3d 0x30 0x07 0x46 # CHECK: c.nge.s $f6, $f7
+0x3b 0x60 0x2e 0x46 # CHECK: c.ngl.d $f12, $f14
+0x3b 0x30 0x07 0x46 # CHECK: c.ngl.s $f6, $f7
+0x39 0x60 0x2e 0x46 # CHECK: c.ngle.d $f12, $f14
+0x39 0x30 0x07 0x46 # CHECK: c.ngle.s $f6, $f7
+0x3f 0x60 0x2e 0x46 # CHECK: c.ngt.d $f12, $f14
+0x3f 0x30 0x07 0x46 # CHECK: c.ngt.s $f6, $f7
+0x36 0x60 0x2e 0x46 # CHECK: c.ole.d $f12, $f14
+0x36 0x30 0x07 0x46 # CHECK: c.ole.s $f6, $f7
+0x34 0x60 0x2e 0x46 # CHECK: c.olt.d $f12, $f14
+0x34 0x30 0x07 0x46 # CHECK: c.olt.s $f6, $f7
+0x3a 0x60 0x2e 0x46 # CHECK: c.seq.d $f12, $f14
+0x3a 0x30 0x07 0x46 # CHECK: c.seq.s $f6, $f7
+0x38 0x60 0x2e 0x46 # CHECK: c.sf.d $f12, $f14
+0x38 0x30 0x07 0x46 # CHECK: c.sf.s $f6, $f7
+0x33 0x60 0x2e 0x46 # CHECK: c.ueq.d $f12, $f14
+0x33 0xe0 0x12 0x46 # CHECK: c.ueq.s $f28, $f18
+0x37 0x60 0x2e 0x46 # CHECK: c.ule.d $f12, $f14
+0x37 0x30 0x07 0x46 # CHECK: c.ule.s $f6, $f7
+0x35 0x60 0x2e 0x46 # CHECK: c.ult.d $f12, $f14
+0x35 0x30 0x07 0x46 # CHECK: c.ult.s $f6, $f7
+0x31 0x60 0x2e 0x46 # CHECK: c.un.d $f12, $f14
+0x31 0x30 0x07 0x46 # CHECK: c.un.s $f6, $f7
+0x0e 0x73 0x20 0x46 # CHECK: ceil.w.d $f12, $f14
+0x8e 0x39 0x00 0x46 # CHECK: ceil.w.s $f6, $f7
+0x00 0x38 0x46 0x44 # CHECK: cfc1 $6, $7
+0x21 0x30 0xe6 0x70 # CHECK: clo $6, $7
+0x20 0x30 0xe6 0x70 # CHECK: clz $6, $7
+0x00 0x38 0xc6 0x44 # CHECK: ctc1 $6, $7
+0xa1 0x39 0x00 0x46 # CHECK: cvt.d.s $f6, $f7
+0x21 0x73 0x80 0x46 # CHECK: cvt.d.w $f12, $f14
+0x25 0x73 0x20 0x46 # CHECK: cvt.l.d $f12, $f14
+0xa5 0x39 0x00 0x46 # CHECK: cvt.l.s $f6, $f7
+0x20 0x73 0x20 0x46 # CHECK: cvt.s.d $f12, $f14
+0xa0 0x39 0x80 0x46 # CHECK: cvt.s.w $f6, $f7
+0x24 0x73 0x20 0x46 # CHECK: cvt.w.d $f12, $f14
+0xa4 0x39 0x00 0x46 # CHECK: cvt.w.s $f6, $f7
+0x00 0x60 0x7e 0x41 # CHECK: di $fp
+0x00 0x60 0x60 0x41 # CHECK: di
+0x20 0x60 0x6e 0x41 # CHECK: ei $14
+0x20 0x60 0x60 0x41 # CHECK: ei
+0x0f 0x73 0x20 0x46 # CHECK: floor.w.d $f12, $f14
+0x8f 0x39 0x00 0x46 # CHECK: floor.w.s $f6, $f7
+0x84 0x61 0x33 0x7d # CHECK: ins $19, $9, 6, 7
+0x4c 0x01 0x00 0x08 # CHECK: j 1328
+0x4c 0x01 0x00 0x0c # CHECK: jal 1328
+0x4c 0x01 0x00 0x74 # CHECK: jalx 1328
+0x09 0xf8 0xe0 0x00 # CHECK: jalr $7
+0x09 0xfc 0x80 0x00 # CHECK: jalr.hb $4
+0x09 0x24 0xa0 0x00 # CHECK: jalr.hb $4, $5
+0x08 0x00 0xe0 0x00 # CHECK: jr $7
+0xc6 0x23 0xa4 0x80 # CHECK: lb $4, 9158($5)
+0x06 0x00 0xa4 0x90 # CHECK: lbu $4, 6($5)
+0xc6 0x23 0xe9 0xd4 # CHECK: ldc1 $f9, 9158($7)
+0x01 0x02 0xf7 0x4d # CHECK: ldxc1 $f8, $23($15)
+0x0c 0x00 0xa4 0x84 # CHECK: lh $4, 12($5)
+0x0c 0x00 0xa4 0x84 # CHECK: lh $4, 12($5)
+0xc6 0x23 0xe9 0xc0 # CHECK: ll $9, 9158($7)
+0x67 0x45 0x06 0x3c # CHECK: lui $6, 17767
+0x05 0x00 0xa6 0x4c # CHECK: luxc1 $f0, $6($5)
+0x18 0x00 0xa4 0x8c # CHECK: lw $4, 24($5)
+0xc6 0x23 0xe9 0xc4 # CHECK: lwc1 $f9, 9158($7)
+0x03 0x00 0x82 0x88 # CHECK: lwl $2, 3($4)
+0x10 0x00 0xa3 0x98 # CHECK: lwr $3, 16($5)
+0x00 0x05 0xcc 0x4d # CHECK: lwxc1 $f20, $12($14)
+0x00 0x00 0xc7 0x70 # CHECK: madd $6, $7
+0xa1 0xd4 0x94 0x4e # CHECK: madd.d $f18, $f20, $f26, $f20
+0x60 0x98 0xf9 0x4f # CHECK: madd.s $f1, $f31, $f19, $f25
+0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
+0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
+0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
+0x10 0x28 0x00 0x00 # CHECK: mfhi $5
+0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
+0x12 0x28 0x00 0x00 # CHECK: mflo $5
+0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
+0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
+0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
+0xa9 0xf2 0x52 0x4c # CHECK: msub.d $f10, $f2, $f30, $f18
+0x28 0x53 0x70 0x4e # CHECK: msub.s $f12, $f19, $f10, $f16
+0x05 0x00 0xc7 0x70 # CHECK: msubu $6, $7
+0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
+0x00 0x38 0x86 0x44 # CHECK: mtc1 $6, $f7
+0x11 0x00 0xe0 0x00 # CHECK: mthi $7
+0x00 0x80 0xe0 0x44 # CHECK: mthc1 $zero, $f16
+0x13 0x00 0xe0 0x00 # CHECK: mtlo $7
+0x02 0x62 0x2e 0x46 # CHECK: mul.d $f8, $f12, $f14
+0x42 0x32 0x07 0x46 # CHECK: mul.s $f9, $f6, $f7
+0x02 0x48 0xc7 0x70 # CHECK: mul $9, $6, $7
+0x18 0x00 0x65 0x00 # CHECK: mult $3, $5
+0x19 0x00 0x65 0x00 # CHECK: multu $3, $5
+0x07 0x73 0x20 0x46 # CHECK: neg.d $f12, $f14
+0x87 0x39 0x00 0x46 # CHECK: neg.s $f6, $f7
+0xb1 0x74 0x54 0x4d # CHECK: nmadd.d $f18, $f10, $f14, $f20
+0x30 0xc8 0xac 0x4c # CHECK: nmadd.s $f0, $f5, $f25, $f12
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x27 0x48 0xc7 0x00 # CHECK: nor $9, $6, $7
+0xb9 0x87 0x1e 0x4d # CHECK: nmsub.d $f30, $f8, $f16, $f30
+0x78 0x98 0x04 0x4f # CHECK: nmsub.s $f1, $f24, $f19, $f4
+0x25 0x18 0x65 0x00 # CHECK: or $3, $3, $5
+0x67 0x45 0xc9 0x34 # CHECK: ori $9, $6, 17767
+0xc2 0x49 0x26 0x00 # CHECK: rotr $9, $6, 7
+0x46 0x48 0xe6 0x00 # CHECK: rotrv $9, $6, $7
+0x0c 0x73 0x20 0x46 # CHECK: round.w.d $f12, $f14
+0x8c 0x39 0x00 0x46 # CHECK: round.w.s $f6, $f7
+0xc6 0x23 0xa4 0xa0 # CHECK: sb $4, 9158($5)
+0x06 0x00 0xa4 0xa0 # CHECK: sb $4, 6($5)
+0xc6 0x23 0xe9 0xe0 # CHECK: sc $9, 9158($7)
+0xc6 0x23 0xe9 0xf4 # CHECK: sdc1 $f9, 9158($7)
+0x09 0x40 0x24 0x4f # CHECK: sdxc1 $f8, $4($25)
+0x20 0x34 0x07 0x7c # CHECK: seb $6, $7
+0x20 0x36 0x07 0x7c # CHECK: seh $6, $7
+0xc6 0x23 0xa4 0xa4 # CHECK: sh $4, 9158($5)
+0xc0 0x21 0x03 0x00 # CHECK: sll $4, $3, 7
+0x04 0x10 0xa3 0x00 # CHECK: sllv $2, $3, $5
+0x2a 0x18 0x65 0x00 # CHECK: slt $3, $3, $5
+0x67 0x00 0x63 0x28 # CHECK: slti $3, $3, 103
+0x67 0x00 0x63 0x2c # CHECK: sltiu $3, $3, 103
+0x2b 0x18 0x65 0x00 # CHECK: sltu $3, $3, $5
+0x04 0x73 0x20 0x46 # CHECK: sqrt.d $f12, $f14
+0x84 0x39 0x00 0x46 # CHECK: sqrt.s $f6, $f7
+0xc3 0x21 0x03 0x00 # CHECK: sra $4, $3, 7
+0x07 0x10 0xa3 0x00 # CHECK: srav $2, $3, $5
+0xc2 0x21 0x03 0x00 # CHECK: srl $4, $3, 7
+0x06 0x10 0xa3 0x00 # CHECK: srlv $2, $3, $5
+0x01 0x62 0x2e 0x46 # CHECK: sub.d $f8, $f12, $f14
+0x41 0x32 0x07 0x46 # CHECK: sub.s $f9, $f6, $f7
+0x22 0x48 0xc7 0x00 # CHECK: sub $9, $6, $7
+0x23 0x20 0x65 0x00 # CHECK: subu $4, $3, $5
+0x0d 0x20 0xb8 0x4c # CHECK: suxc1 $f4, $24($5)
+0x18 0x00 0xa4 0xac # CHECK: sw $4, 24($5)
+0xc6 0x23 0xe9 0xe4 # CHECK: swc1 $f9, 9158($7)
+0x10 0x00 0xa4 0xa8 # CHECK: swl $4, 16($5)
+0x10 0x00 0xe6 0xb8 # CHECK: swr $6, 16($7)
+0x08 0xd0 0xd2 0x4e # CHECK: swxc1 $f26, $18($22)
+0xcf 0x01 0x00 0x00 # CHECK: sync 7
+0x0d 0x73 0x20 0x46 # CHECK: trunc.w.d $f12, $f14
+0x8d 0x39 0x00 0x46 # CHECK: trunc.w.s $f6, $f7
+0xa0 0x30 0x07 0x7c # CHECK: wsbh $6, $7
+0x26 0x18 0x65 0x00 # CHECK: xor $3, $3, $5
+0x67 0x45 0xc9 0x38 # CHECK: xori $9, $6, 17767
diff --git a/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5-le.txt b/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5-le.txt
deleted file mode 100644
index 62977dc..0000000
--- a/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5-le.txt
+++ /dev/null
@@ -1,169 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r5 | FileCheck %s
-0x05 0x73 0x20 0x46 # CHECK: abs.d $f12, $f14
-0x85 0x39 0x00 0x46 # CHECK: abs.s $f6, $f7
-0x20 0x48 0xc7 0x00 # CHECK: add $9, $6, $7
-0x00 0x62 0x2e 0x46 # CHECK: add.d $f8, $f12, $f14
-0x40 0x32 0x07 0x46 # CHECK: add.s $f9, $f6, $f7
-0x67 0x45 0xc9 0x20 # CHECK: addi $9, $6, 17767
-0x67 0xc5 0xc9 0x24 # CHECK: addiu $9, $6, -15001
-0x21 0x48 0xc7 0x00 # CHECK: addu $9, $6, $7
-0x24 0x48 0xc7 0x00 # CHECK: and $9, $6, $7
-0x67 0x45 0xc9 0x30 # CHECK: andi $9, $6, 17767
-0x4c 0x01 0x00 0x10 # CHECK: b 1332
-0x4c 0x01 0x00 0x45 # CHECK: bc1f 1332
-0x4c 0x01 0x1c 0x45 # CHECK: bc1f $fcc7, 1332
-0x4c 0x01 0x01 0x45 # CHECK: bc1t 1332
-0x4c 0x01 0x1d 0x45 # CHECK: bc1t $fcc7, 1332
-0x4c 0x01 0x26 0x11 # CHECK: beq $9, $6, 1332
-0x4c 0x01 0xc1 0x04 # CHECK: bgez $6, 1332
-0x4c 0x01 0xd1 0x04 # CHECK: bgezal $6, 1332
-0x4c 0x01 0xc0 0x1c # CHECK: bgtz $6, 1332
-0x4c 0x01 0xc0 0x18 # CHECK: blez $6, 1332
-0x4c 0x01 0x26 0x15 # CHECK: bne $9, $6, 1332
-0x32 0x60 0x2e 0x46 # CHECK: c.eq.d $f12, $f14
-0x32 0x30 0x07 0x46 # CHECK: c.eq.s $f6, $f7
-0x30 0x60 0x2e 0x46 # CHECK: c.f.d $f12, $f14
-0x30 0x30 0x07 0x46 # CHECK: c.f.s $f6, $f7
-0x3e 0x60 0x2e 0x46 # CHECK: c.le.d $f12, $f14
-0x3e 0x30 0x07 0x46 # CHECK: c.le.s $f6, $f7
-0x3c 0x60 0x2e 0x46 # CHECK: c.lt.d $f12, $f14
-0x3c 0x30 0x07 0x46 # CHECK: c.lt.s $f6, $f7
-0x3d 0x60 0x2e 0x46 # CHECK: c.nge.d $f12, $f14
-0x3d 0x30 0x07 0x46 # CHECK: c.nge.s $f6, $f7
-0x3b 0x60 0x2e 0x46 # CHECK: c.ngl.d $f12, $f14
-0x3b 0x30 0x07 0x46 # CHECK: c.ngl.s $f6, $f7
-0x39 0x60 0x2e 0x46 # CHECK: c.ngle.d $f12, $f14
-0x39 0x30 0x07 0x46 # CHECK: c.ngle.s $f6, $f7
-0x3f 0x60 0x2e 0x46 # CHECK: c.ngt.d $f12, $f14
-0x3f 0x30 0x07 0x46 # CHECK: c.ngt.s $f6, $f7
-0x36 0x60 0x2e 0x46 # CHECK: c.ole.d $f12, $f14
-0x36 0x30 0x07 0x46 # CHECK: c.ole.s $f6, $f7
-0x34 0x60 0x2e 0x46 # CHECK: c.olt.d $f12, $f14
-0x34 0x30 0x07 0x46 # CHECK: c.olt.s $f6, $f7
-0x3a 0x60 0x2e 0x46 # CHECK: c.seq.d $f12, $f14
-0x3a 0x30 0x07 0x46 # CHECK: c.seq.s $f6, $f7
-0x38 0x60 0x2e 0x46 # CHECK: c.sf.d $f12, $f14
-0x38 0x30 0x07 0x46 # CHECK: c.sf.s $f6, $f7
-0x33 0x60 0x2e 0x46 # CHECK: c.ueq.d $f12, $f14
-0x33 0xe0 0x12 0x46 # CHECK: c.ueq.s $f28, $f18
-0x37 0x60 0x2e 0x46 # CHECK: c.ule.d $f12, $f14
-0x37 0x30 0x07 0x46 # CHECK: c.ule.s $f6, $f7
-0x35 0x60 0x2e 0x46 # CHECK: c.ult.d $f12, $f14
-0x35 0x30 0x07 0x46 # CHECK: c.ult.s $f6, $f7
-0x31 0x60 0x2e 0x46 # CHECK: c.un.d $f12, $f14
-0x31 0x30 0x07 0x46 # CHECK: c.un.s $f6, $f7
-0x0e 0x73 0x20 0x46 # CHECK: ceil.w.d $f12, $f14
-0x8e 0x39 0x00 0x46 # CHECK: ceil.w.s $f6, $f7
-0x00 0x38 0x46 0x44 # CHECK: cfc1 $6, $7
-0x21 0x30 0xe6 0x70 # CHECK: clo $6, $7
-0x20 0x30 0xe6 0x70 # CHECK: clz $6, $7
-0x00 0x38 0xc6 0x44 # CHECK: ctc1 $6, $7
-0xa1 0x39 0x00 0x46 # CHECK: cvt.d.s $f6, $f7
-0x21 0x73 0x80 0x46 # CHECK: cvt.d.w $f12, $f14
-0x25 0x73 0x20 0x46 # CHECK: cvt.l.d $f12, $f14
-0xa5 0x39 0x00 0x46 # CHECK: cvt.l.s $f6, $f7
-0x20 0x73 0x20 0x46 # CHECK: cvt.s.d $f12, $f14
-0xa0 0x39 0x80 0x46 # CHECK: cvt.s.w $f6, $f7
-0x24 0x73 0x20 0x46 # CHECK: cvt.w.d $f12, $f14
-0xa4 0x39 0x00 0x46 # CHECK: cvt.w.s $f6, $f7
-0x00 0x60 0x7e 0x41 # CHECK: di $fp
-0x00 0x60 0x60 0x41 # CHECK: di
-0x20 0x60 0x6e 0x41 # CHECK: ei $14
-0x20 0x60 0x60 0x41 # CHECK: ei
-0x0f 0x73 0x20 0x46 # CHECK: floor.w.d $f12, $f14
-0x8f 0x39 0x00 0x46 # CHECK: floor.w.s $f6, $f7
-0x84 0x61 0x33 0x7d # CHECK: ins $19, $9, 6, 7
-0x4c 0x01 0x00 0x08 # CHECK: j 1328
-0x4c 0x01 0x00 0x0c # CHECK: jal 1328
-0x4c 0x01 0x00 0x74 # CHECK: jalx 1328
-0x09 0xf8 0xe0 0x00 # CHECK: jalr $7
-0x09 0xfc 0x80 0x00 # CHECK: jalr.hb $4
-0x09 0x24 0xa0 0x00 # CHECK: jalr.hb $4, $5
-0x08 0x00 0xe0 0x00 # CHECK: jr $7
-0xc6 0x23 0xa4 0x80 # CHECK: lb $4, 9158($5)
-0x06 0x00 0xa4 0x90 # CHECK: lbu $4, 6($5)
-0xc6 0x23 0xe9 0xd4 # CHECK: ldc1 $f9, 9158($7)
-0x01 0x02 0xf7 0x4d # CHECK: ldxc1 $f8, $23($15)
-0x0c 0x00 0xa4 0x84 # CHECK: lh $4, 12($5)
-0x0c 0x00 0xa4 0x84 # CHECK: lh $4, 12($5)
-0xc6 0x23 0xe9 0xc0 # CHECK: ll $9, 9158($7)
-0x67 0x45 0x06 0x3c # CHECK: lui $6, 17767
-0x05 0x00 0xa6 0x4c # CHECK: luxc1 $f0, $6($5)
-0x18 0x00 0xa4 0x8c # CHECK: lw $4, 24($5)
-0xc6 0x23 0xe9 0xc4 # CHECK: lwc1 $f9, 9158($7)
-0x03 0x00 0x82 0x88 # CHECK: lwl $2, 3($4)
-0x10 0x00 0xa3 0x98 # CHECK: lwr $3, 16($5)
-0x00 0x05 0xcc 0x4d # CHECK: lwxc1 $f20, $12($14)
-0x00 0x00 0xc7 0x70 # CHECK: madd $6, $7
-0xa1 0xd4 0x94 0x4e # CHECK: madd.d $f18, $f20, $f26, $f20
-0x60 0x98 0xf9 0x4f # CHECK: madd.s $f1, $f31, $f19, $f25
-0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
-0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
-0x10 0x28 0x00 0x00 # CHECK: mfhi $5
-0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
-0x12 0x28 0x00 0x00 # CHECK: mflo $5
-0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
-0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
-0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
-0xa9 0xf2 0x52 0x4c # CHECK: msub.d $f10, $f2, $f30, $f18
-0x28 0x53 0x70 0x4e # CHECK: msub.s $f12, $f19, $f10, $f16
-0x05 0x00 0xc7 0x70 # CHECK: msubu $6, $7
-0x00 0x38 0x86 0x44 # CHECK: mtc1 $6, $f7
-0x11 0x00 0xe0 0x00 # CHECK: mthi $7
-0x00 0x80 0xe0 0x44 # CHECK: mthc1 $zero, $f16
-0x13 0x00 0xe0 0x00 # CHECK: mtlo $7
-0x02 0x62 0x2e 0x46 # CHECK: mul.d $f8, $f12, $f14
-0x42 0x32 0x07 0x46 # CHECK: mul.s $f9, $f6, $f7
-0x02 0x48 0xc7 0x70 # CHECK: mul $9, $6, $7
-0x18 0x00 0x65 0x00 # CHECK: mult $3, $5
-0x19 0x00 0x65 0x00 # CHECK: multu $3, $5
-0x07 0x73 0x20 0x46 # CHECK: neg.d $f12, $f14
-0x87 0x39 0x00 0x46 # CHECK: neg.s $f6, $f7
-0xb1 0x74 0x54 0x4d # CHECK: nmadd.d $f18, $f10, $f14, $f20
-0x30 0xc8 0xac 0x4c # CHECK: nmadd.s $f0, $f5, $f25, $f12
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x27 0x48 0xc7 0x00 # CHECK: nor $9, $6, $7
-0xb9 0x87 0x1e 0x4d # CHECK: nmsub.d $f30, $f8, $f16, $f30
-0x78 0x98 0x04 0x4f # CHECK: nmsub.s $f1, $f24, $f19, $f4
-0x25 0x18 0x65 0x00 # CHECK: or $3, $3, $5
-0x67 0x45 0xc9 0x34 # CHECK: ori $9, $6, 17767
-0xc2 0x49 0x26 0x00 # CHECK: rotr $9, $6, 7
-0x46 0x48 0xe6 0x00 # CHECK: rotrv $9, $6, $7
-0x0c 0x73 0x20 0x46 # CHECK: round.w.d $f12, $f14
-0x8c 0x39 0x00 0x46 # CHECK: round.w.s $f6, $f7
-0xc6 0x23 0xa4 0xa0 # CHECK: sb $4, 9158($5)
-0x06 0x00 0xa4 0xa0 # CHECK: sb $4, 6($5)
-0xc6 0x23 0xe9 0xe0 # CHECK: sc $9, 9158($7)
-0xc6 0x23 0xe9 0xf4 # CHECK: sdc1 $f9, 9158($7)
-0x09 0x40 0x24 0x4f # CHECK: sdxc1 $f8, $4($25)
-0x20 0x34 0x07 0x7c # CHECK: seb $6, $7
-0x20 0x36 0x07 0x7c # CHECK: seh $6, $7
-0xc6 0x23 0xa4 0xa4 # CHECK: sh $4, 9158($5)
-0xc0 0x21 0x03 0x00 # CHECK: sll $4, $3, 7
-0x04 0x10 0xa3 0x00 # CHECK: sllv $2, $3, $5
-0x2a 0x18 0x65 0x00 # CHECK: slt $3, $3, $5
-0x67 0x00 0x63 0x28 # CHECK: slti $3, $3, 103
-0x67 0x00 0x63 0x2c # CHECK: sltiu $3, $3, 103
-0x2b 0x18 0x65 0x00 # CHECK: sltu $3, $3, $5
-0x04 0x73 0x20 0x46 # CHECK: sqrt.d $f12, $f14
-0x84 0x39 0x00 0x46 # CHECK: sqrt.s $f6, $f7
-0xc3 0x21 0x03 0x00 # CHECK: sra $4, $3, 7
-0x07 0x10 0xa3 0x00 # CHECK: srav $2, $3, $5
-0xc2 0x21 0x03 0x00 # CHECK: srl $4, $3, 7
-0x06 0x10 0xa3 0x00 # CHECK: srlv $2, $3, $5
-0x01 0x62 0x2e 0x46 # CHECK: sub.d $f8, $f12, $f14
-0x41 0x32 0x07 0x46 # CHECK: sub.s $f9, $f6, $f7
-0x22 0x48 0xc7 0x00 # CHECK: sub $9, $6, $7
-0x23 0x20 0x65 0x00 # CHECK: subu $4, $3, $5
-0x0d 0x20 0xb8 0x4c # CHECK: suxc1 $f4, $24($5)
-0x18 0x00 0xa4 0xac # CHECK: sw $4, 24($5)
-0xc6 0x23 0xe9 0xe4 # CHECK: swc1 $f9, 9158($7)
-0x10 0x00 0xa4 0xa8 # CHECK: swl $4, 16($5)
-0x10 0x00 0xe6 0xb8 # CHECK: swr $6, 16($7)
-0x08 0xd0 0xd2 0x4e # CHECK: swxc1 $f26, $18($22)
-0xcf 0x01 0x00 0x00 # CHECK: sync 7
-0x0d 0x73 0x20 0x46 # CHECK: trunc.w.d $f12, $f14
-0x8d 0x39 0x00 0x46 # CHECK: trunc.w.s $f6, $f7
-0xa0 0x30 0x07 0x7c # CHECK: wsbh $6, $7
-0x26 0x18 0x65 0x00 # CHECK: xor $3, $3, $5
-0x67 0x45 0xc9 0x38 # CHECK: xori $9, $6, 17767
diff --git a/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5.txt b/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5.txt
index 39c4644..282f3a2 100644
--- a/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5.txt
+++ b/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5.txt
@@ -1,169 +1,171 @@
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r5 | FileCheck %s
-0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
-0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x28 0x10 # CHECK: mfhi $5
+0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
+0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
+0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
+0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
+0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
+0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
+0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
+0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
+0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
+0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
+0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
+0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
-0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
-0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
-0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
-0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
+0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
-0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xe0 0x00 0x08 # CHECK: jr $7
+0x00 0xe0 0x00 0x11 # CHECK: mthi $7
+0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
+0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
+0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
+0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
+0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
+0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
+0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
+0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
+0x41 0x60 0x60 0x00 # CHECK: di
+0x41 0x60 0x60 0x20 # CHECK: ei
+0x41 0x6e 0x60 0x20 # CHECK: ei $14
+0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
+0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
-0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
-0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
-0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
-0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
-0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
-0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
-0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
-0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
-0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
-0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
+0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
+0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
+0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
+0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
+0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
+0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
+0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
+0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
+0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
-0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
-0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
-0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
+0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
+0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
+0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
+0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
+0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
+0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
+0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
+0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
 0x46 0x07 0x30 0x3c # CHECK: c.lt.s $f6, $f7
-0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
 0x46 0x07 0x30 0x3d # CHECK: c.nge.s $f6, $f7
-0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
-0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
-0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
-0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
-0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
 0x46 0x07 0x30 0x3f # CHECK: c.ngt.s $f6, $f7
-0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
-0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
-0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
-0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
-0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
-0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
-0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
-0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
-0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
+0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
+0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
-0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
-0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
-0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
-0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
-0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
-0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
+0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
+0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
+0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
+0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
 0x46 0x20 0x73 0x0e # CHECK: ceil.w.d $f12, $f14
-0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
-0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
-0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
-0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
-0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
-0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
-0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
-0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
-0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
-0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
-0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
-0x41 0x7e 0x60 0x00 # CHECK: di $fp
-0x41 0x60 0x60 0x00 # CHECK: di
-0x41 0x6e 0x60 0x20 # CHECK: ei $14
-0x41 0x60 0x60 0x20 # CHECK: ei
-0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
-0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
-0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
-0x08 0x00 0x01 0x4c # CHECK: j 1328
-0x0c 0x00 0x01 0x4c # CHECK: jal 1328
-0x74 0x00 0x01 0x4c # CHECK: jalx 1328
-0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
-0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
-0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
-0x00 0xe0 0x00 0x08 # CHECK: jr $7
-0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
-0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
-0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
-0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
-0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
-0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
-0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
-0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
+0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
+0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
+0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
+0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
+0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
+0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
+0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
+0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
+0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
+0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
+0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
+0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x4c 0x52 0xf2 0xa9 # CHECK: msub.d $f10, $f2, $f30, $f18
 0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
-0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
-0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
-0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
-0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
+0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
+0x4d 0x1e 0x87 0xb9 # CHECK: nmsub.d $f30, $f8, $f16, $f30
+0x4d 0x54 0x74 0xb1 # CHECK: nmadd.d $f18, $f10, $f14, $f20
 0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
-0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
+0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
+0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
 0x4e 0x94 0xd4 0xa1 # CHECK: madd.d $f18, $f20, $f26, $f20
+0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
+0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
+0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
 0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
+0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
 0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
-0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
-0x00 0x00 0x28 0x10 # CHECK: mfhi $5
-0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
-0x00 0x00 0x28 0x12 # CHECK: mflo $5
-0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
-0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
 0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
-0x4c 0x52 0xf2 0xa9 # CHECK: msub.d $f10, $f2, $f30, $f18
-0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
 0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
-0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
-0x00 0xe0 0x00 0x11 # CHECK: mthi $7
-0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
-0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
-0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
-0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
 0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
-0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
-0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
-0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
-0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
-0x4d 0x54 0x74 0xb1 # CHECK: nmadd.d $f18, $f10, $f14, $f20
-0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
-0x4d 0x1e 0x87 0xb9 # CHECK: nmsub.d $f30, $f8, $f16, $f30
-0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
-0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
-0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
-0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
-0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
-0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
-0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
-0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
-0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
-0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
-0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
-0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
+0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
+0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
+0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
 0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
 0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
+0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
+0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
+0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
-0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
-0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
-0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
-0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
-0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
-0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
-0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
-0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
-0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
-0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
-0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
-0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
-0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
-0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
-0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
-0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
-0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
-0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
-0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
-0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
-0x00 0x00 0x01 0xcf # CHECK: sync 7
-0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
-0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
-0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
-0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
-0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
diff --git a/test/MC/Disassembler/Mips/mips32r6.txt b/test/MC/Disassembler/Mips/mips32r6.txt
deleted file mode 100644
index afef8ad..0000000
--- a/test/MC/Disassembler/Mips/mips32r6.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r6 | FileCheck %s
-
-0xec 0x80 0x00 0x19 # CHECK: addiupc $4, 100
-0x7c 0x43 0x22 0xa0 # CHECK: align $4, $2, $3, 2
-0xec 0x7f 0x00 0x38 # CHECK: aluipc $3, 56
-0x3c 0x62 0xff 0xe9 # CHECK: aui $3, $2, -23
-0xec 0x7e 0xff 0xff # CHECK: auipc $3, -1
-0xe8 0x37 0x96 0xb8 # CHECK: balc 14572256
-0xc8 0x37 0x96 0xb8 # CHECK: bc 14572256
-
-# FIXME: Don't check the immediate on these for the moment, the encode/decode
-#        functions are not inverses of eachother.
-#        The immediate should be 4 but the disassembler currently emits 8
-0x45 0x20 0x00 0x01 # CHECK: bc1eqz $f0,
-0x45 0x3f 0x00 0x01 # CHECK: bc1eqz $f31,
-0x45 0xa0 0x00 0x01 # CHECK: bc1nez $f0,
-0x45 0xbf 0x00 0x01 # CHECK: bc1nez $f31,
-# FIXME: Don't check the immediate on these for the moment, the encode/decode
-#        functions are not inverses of eachother.
-#        The immediate should be 8 but the disassembler currently emits 12
-0x49 0x20 0x00 0x02 # CHECK: bc2eqz $0,
-0x49 0x3f 0x00 0x02 # CHECK: bc2eqz $31,
-0x49 0xa0 0x00 0x02 # CHECK: bc2nez $0,
-0x49 0xbf 0x00 0x02 # CHECK: bc2nez $31,
-
-0x20 0xa6 0x00 0x40 # CHECK: beqc $5, $6, 256
-# FIXME: Don't check the immediate on the bcczal's for the moment, the
-#        encode/decode functions are not inverses of eachother.
-0x20 0x02 0x01 0x4d # CHECK: beqzalc $2,
-0x60 0xa6 0x00 0x40 # CHECK: bnec $5, $6, 256
-0x60 0x02 0x01 0x4d # CHECK: bnezalc $2,
-0xd8 0xa0 0x46 0x90 # CHECK: beqzc $5, 72256
-0x58 0x43 0x00 0x40 # CHECK: bgec $2, $3, 256
-0x18 0x43 0x00 0x40 # CHECK: bgeuc $2, $3, 256
-0x18 0x42 0x01 0x4d # CHECK: bgezalc $2,
-0xf8 0xa0 0x46 0x90 # CHECK: bnezc $5, 72256
-0x5c 0xa5 0x00 0x40 # CHECK: bltzc $5, 256
-0x58 0xa5 0x00 0x40 # CHECK: bgezc $5, 256
-0x1c 0x02 0x01 0x4d # CHECK: bgtzalc $2,
-0x58 0x05 0x00 0x40 # CHECK: blezc $5, 256
-0x1c 0x42 0x01 0x4d # CHECK: bltzalc $2,
-0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256
-0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
-0x18 0x02 0x01 0x4d # CHECK: blezalc $2,
-0x5c 0xa6 0x00 0x40 # CHECK: bltc $5, $6, 256
-0x1c 0xa6 0x00 0x40 # CHECK: bltuc $5, $6, 256
-0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
-0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4
-0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4
-0x20 0x00 0x00 0x01 # CHECK: bovc $zero, $zero, 4
-0x20 0x40 0x00 0x01 # CHECK: bovc $2, $zero, 4
-0x20 0x82 0x00 0x01 # CHECK: bovc $4, $2, 4
-0x46 0x84 0x18 0x80 # CHECK: cmp.af.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x80 # CHECK: cmp.af.d $f2, $f3, $f4
-0x46 0x84 0x18 0x81 # CHECK: cmp.un.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4
-0x46 0x84 0x18 0x82 # CHECK: cmp.eq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x83 # CHECK: cmp.ueq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x84 # CHECK: cmp.lt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x84 # CHECK: cmp.lt.d $f2, $f3, $f4
-0x46 0x84 0x18 0x85 # CHECK: cmp.ult.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4
-0x46 0x84 0x18 0x86 # CHECK: cmp.le.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x86 # CHECK: cmp.le.d $f2, $f3, $f4
-0x46 0x84 0x18 0x87 # CHECK: cmp.ule.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4
-0x46 0x84 0x18 0x88 # CHECK: cmp.saf.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x88 # CHECK: cmp.saf.d $f2, $f3, $f4
-0x46 0x84 0x18 0x89 # CHECK: cmp.sun.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x89 # CHECK: cmp.sun.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8a # CHECK: cmp.seq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8b # CHECK: cmp.sueq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8b # CHECK: cmp.sueq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8c # CHECK: cmp.slt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8c # CHECK: cmp.slt.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8d # CHECK: cmp.sult.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8e # CHECK: cmp.sle.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8f # CHECK: cmp.sule.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8f # CHECK: cmp.sule.d $f2, $f3, $f4
-0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4
-0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4
-# 0xf8 0x05 0x01 0x00 # CHECK-TODO: jialc $5, 256
-# 0xd8 0x05 0x01 0x00 # CHECK-TODO: jic $5, 256
-0xec 0x48 0x00 0x43 # CHECK: lwpc $2, 268
-0xec 0x50 0x00 0x43 # CHECK: lwupc $2, 268
-0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4
-0x00 0x64 0x10 0xdb # CHECK: modu $2, $3, $4
-0x00 0x64 0x10 0x98 # CHECK: mul $2, $3, $4
-0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4
-0x00 0x64 0x10 0x99 # CHECK: mulu $2, $3, $4
-0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4
-0x46 0x04 0x18 0x98 # CHECK: maddf.s $f2, $f3, $f4
-0x46 0x24 0x18 0x98 # CHECK: maddf.d $f2, $f3, $f4
-0x46 0x04 0x18 0x99 # CHECK: msubf.s $f2, $f3, $f4
-0x46 0x24 0x18 0x99 # CHECK: msubf.d $f2, $f3, $f4
-0x46 0x22 0x08 0x10 # CHECK: sel.d $f0, $f1, $f2
-0x46 0x02 0x08 0x10 # CHECK: sel.s $f0, $f1, $f2
-0x00 0x64 0x10 0x35 # CHECK: seleqz $2, $3, $4
-0x00 0x64 0x10 0x37 # CHECK: selnez $2, $3, $4
-0x46 0x04 0x10 0x1d # CHECK: max.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1d # CHECK: max.d $f0, $f2, $f4
-0x46 0x04 0x10 0x1c # CHECK: min.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1c # CHECK: min.d $f0, $f2, $f4
-0x46 0x04 0x10 0x1f # CHECK: maxa.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1f # CHECK: maxa.d $f0, $f2, $f4
-0x46 0x04 0x10 0x1e # CHECK: mina.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1e # CHECK: mina.d $f0, $f2, $f4
-0x46 0x04 0x10 0x14 # CHECK: seleqz.s $f0, $f2, $f4
-0x46 0x24 0x10 0x14 # CHECK: seleqz.d $f0, $f2, $f4
-0x46 0x04 0x10 0x17 # CHECK: selnez.s $f0, $f2, $f4
-0x46 0x24 0x10 0x17 # CHECK: selnez.d $f0, $f2, $f4
-0x46 0x00 0x20 0x9a # CHECK: rint.s $f2, $f4
-0x46 0x20 0x20 0x9a # CHECK: rint.d $f2, $f4
-0x46 0x00 0x20 0x9b # CHECK: class.s $f2, $f4
-0x46 0x20 0x20 0x9b # CHECK: class.d $f2, $f4
-0x00 0x80 0x04 0x09 # CHECK: jr.hb $4
-0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
-0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
-0x7e 0x42 0xb3 0xb6 # CHECK: ll $2, -153($18)
-0x7e 0x6f 0xec 0x26 # CHECK: sc $15, -40($19)
-0x00 0xa0 0x58 0x51 # CHECK: clo $11, $5
-0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
diff --git a/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt b/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt
index c10d166..94dc3a2 100644
--- a/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt
+++ b/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt
@@ -83,6 +83,7 @@
 0xc5 0x10 0x64 0x00 # CHECK: lsa $2, $3, $4, 3
 0x43 0x00 0x48 0xec # CHECK: lwpc $2, 268
 0x43 0x00 0x50 0xec # CHECK: lwupc $2, 268
+0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
 0xda 0x10 0x64 0x00 # CHECK: mod $2, $3, $4
 0xdb 0x10 0x64 0x00 # CHECK: modu $2, $3, $4
 0x98 0x10 0x64 0x00 # CHECK: mul $2, $3, $4
@@ -93,6 +94,7 @@
 0x98 0x18 0x24 0x46 # CHECK: maddf.d $f2, $f3, $f4
 0x99 0x18 0x04 0x46 # CHECK: msubf.s $f2, $f3, $f4
 0x99 0x18 0x24 0x46 # CHECK: msubf.d $f2, $f3, $f4
+0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
 0x10 0x08 0x22 0x46 # CHECK: sel.d $f0, $f1, $f2
 0x10 0x08 0x02 0x46 # CHECK: sel.s $f0, $f1, $f2
 0x35 0x10 0x64 0x00 # CHECK: seleqz $2, $3, $4
diff --git a/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt b/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt
index 0b78003..e1721b9 100644
--- a/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt
+++ b/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt
@@ -1,148 +1,173 @@
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r6 | FileCheck %s
-0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
-0xec 0x80 0x00 0x19 # CHECK: addiupc $4, 100
-0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
-0x7c 0x43 0x22 0xa0 # CHECK: align $4, $2, $3, 2
-0xec 0x7f 0x00 0x38 # CHECK: aluipc $3, 56
-0x3c 0x62 0xff 0xe9 # CHECK: aui $3, $2, -23
-0xec 0x7e 0xff 0xff # CHECK: auipc $3, -1
+0x00 0x00 0x00 0x0e # CHECK: sdbbp
+0x00 0x00 0x00 0x0f # CHECK: sync
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x4f # CHECK: sync 1
+0x00 0x00 0x08 0x8e # CHECK: sdbbp 34
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x64 0x10 0x35 # CHECK: seleqz $2, $3, $4
+0x00 0x64 0x10 0x37 # CHECK: selnez $2, $3, $4
+0x00 0x64 0x10 0x98 # CHECK: mul $2, $3, $4
+0x00 0x64 0x10 0x99 # CHECK: mulu $2, $3, $4
+0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4
+0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4
+0x00 0x64 0x10 0xc5 # CHECK: lsa $2, $3, $4, 3
+0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4
+0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4
+0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4
+0x00 0x64 0x10 0xdb # CHECK: modu $2, $3, $4
+0x00 0x80 0x04 0x09 # CHECK: jr.hb $4
+0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
+0x00 0xa0 0x58 0x51 # CHECK: clo $11, $5
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
+0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
 0x04 0x11 0x14 0x9b # CHECK: bal 21104
-0xe8 0x37 0x96 0xb8 # CHECK: balc 14572256
-0xc8 0x37 0x96 0xb8 # CHECK: bc 14572256
-0x45 0x20 0x00 0x01 # CHECK: bc1eqz $f0, 8
-0x45 0x3f 0x00 0x01 # CHECK: bc1eqz $f31, 8
-0x45 0xa0 0x00 0x01 # CHECK: bc1nez $f0, 8
-0x45 0xbf 0x00 0x01 # CHECK: bc1nez $f31, 8
-0x49 0x20 0x00 0x02 # CHECK: bc2eqz $0, 12
-0x49 0x3f 0x00 0x02 # CHECK: bc2eqz $31, 12
-0x49 0xa0 0x00 0x02 # CHECK: bc2nez $0, 12
-0x49 0xbf 0x00 0x02 # CHECK: bc2nez $31, 12
-0x20 0xa6 0x00 0x40 # CHECK: beqc $5, $6, 256
-0x20 0x02 0x01 0x4d # CHECK: beqzalc $2, 1332
-0x60 0xa6 0x00 0x40 # CHECK: bnec $5, $6, 256
-0x60 0x02 0x01 0x4d # CHECK: bnezalc $2, 1332
-0xd8 0xa0 0x46 0x90 # CHECK: beqzc $5, 72256
-0x58 0x43 0x00 0x40 # CHECK: bgec $2, $3, 256
-0x18 0x43 0x00 0x40 # CHECK: bgeuc $2, $3, 256
+# FIXME: The encode/decode functions are not inverses of each other.
+0x18 0x02 0x01 0x4d # CHECK: blezalc $2, 1332
+# FIXME: The encode/decode functions are not inverses of each other.
 0x18 0x42 0x01 0x4d # CHECK: bgezalc $2, 1332
-0xf8 0xa0 0x46 0x90 # CHECK: bnezc $5, 72256
-0x5c 0xa5 0x00 0x40 # CHECK: bltzc $5, 256
-0x58 0xa5 0x00 0x40 # CHECK: bgezc $5, 256
+0x18 0x43 0x00 0x40 # CHECK: bgeuc $2, $3, 256
+# FIXME: The encode/decode functions are not inverses of each other.
 0x1c 0x02 0x01 0x4d # CHECK: bgtzalc $2, 1332
-0x58 0x05 0x00 0x40 # CHECK: blezc $5, 256
+# FIXME: The encode/decode functions are not inverses of each other.
 0x1c 0x42 0x01 0x4d # CHECK: bltzalc $2, 1332
-0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256
-0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
-0x18 0x02 0x01 0x4d # CHECK: blezalc $2, 1332
-0x5c 0xa6 0x00 0x40 # CHECK: bltc $5, $6, 256
 0x1c 0xa6 0x00 0x40 # CHECK: bltuc $5, $6, 256
-0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
-0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4
-0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4
 0x20 0x00 0x00 0x01 # CHECK: bovc $zero, $zero, 4
+# FIXME: The encode/decode functions are not inverses of each other.
+0x20 0x02 0x01 0x4d # CHECK: beqzalc $2, 1332
 0x20 0x40 0x00 0x01 # CHECK: bovc $2, $zero, 4
 0x20 0x82 0x00 0x01 # CHECK: bovc $4, $2, 4
+0x20 0xa6 0x00 0x40 # CHECK: beqc $5, $6, 256
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
+0x3c 0x62 0xff 0xe9 # CHECK: aui $3, $2, -23
+0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x03 # CHECK: mfc0 $8, $16, 3
+0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
+0x41 0x60 0x60 0x00 # CHECK: di
+0x41 0x60 0x60 0x20 # CHECK: ei
+0x41 0x6e 0x60 0x20 # CHECK: ei $14
+0x41 0x7e 0x60 0x00 # CHECK: di $fp
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 4 but the disassembler currently emits 8
+0x45 0x20 0x00 0x01 # CHECK: bc1eqz $f0, 8
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 4 but the disassembler currently emits 8
+0x45 0x3f 0x00 0x01 # CHECK: bc1eqz $f31, 8
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 4 but the disassembler currently emits 8
+0x45 0xa0 0x00 0x01 # CHECK: bc1nez $f0, 8
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 4 but the disassembler currently emits 8
+0x45 0xbf 0x00 0x01 # CHECK: bc1nez $f31, 8
+0x46 0x00 0x20 0x9a # CHECK: rint.s $f2, $f4
+0x46 0x00 0x20 0x9b # CHECK: class.s $f2, $f4
+0x46 0x02 0x08 0x10 # CHECK: sel.s $f0, $f1, $f2
+0x46 0x04 0x10 0x14 # CHECK: seleqz.s $f0, $f2, $f4
+0x46 0x04 0x10 0x17 # CHECK: selnez.s $f0, $f2, $f4
+0x46 0x04 0x10 0x1c # CHECK: min.s $f0, $f2, $f4
+0x46 0x04 0x10 0x1d # CHECK: max.s $f0, $f2, $f4
+0x46 0x04 0x10 0x1e # CHECK: mina.s $f0, $f2, $f4
+0x46 0x04 0x10 0x1f # CHECK: maxa.s $f0, $f2, $f4
+0x46 0x04 0x18 0x98 # CHECK: maddf.s $f2, $f3, $f4
+0x46 0x04 0x18 0x99 # CHECK: msubf.s $f2, $f3, $f4
+0x46 0x20 0x20 0x9a # CHECK: rint.d $f2, $f4
+0x46 0x20 0x20 0x9b # CHECK: class.d $f2, $f4
+0x46 0x22 0x08 0x10 # CHECK: sel.d $f0, $f1, $f2
+0x46 0x24 0x10 0x14 # CHECK: seleqz.d $f0, $f2, $f4
+0x46 0x24 0x10 0x17 # CHECK: selnez.d $f0, $f2, $f4
+0x46 0x24 0x10 0x1c # CHECK: min.d $f0, $f2, $f4
+0x46 0x24 0x10 0x1d # CHECK: max.d $f0, $f2, $f4
+0x46 0x24 0x10 0x1e # CHECK: mina.d $f0, $f2, $f4
+0x46 0x24 0x10 0x1f # CHECK: maxa.d $f0, $f2, $f4
+0x46 0x24 0x18 0x98 # CHECK: maddf.d $f2, $f3, $f4
+0x46 0x24 0x18 0x99 # CHECK: msubf.d $f2, $f3, $f4
 0x46 0x84 0x18 0x80 # CHECK: cmp.af.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x80 # CHECK: cmp.af.d $f2, $f3, $f4
 0x46 0x84 0x18 0x81 # CHECK: cmp.un.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4
 0x46 0x84 0x18 0x82 # CHECK: cmp.eq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4
 0x46 0x84 0x18 0x83 # CHECK: cmp.ueq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4
 0x46 0x84 0x18 0x84 # CHECK: cmp.lt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x84 # CHECK: cmp.lt.d $f2, $f3, $f4
 0x46 0x84 0x18 0x85 # CHECK: cmp.ult.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4
 0x46 0x84 0x18 0x86 # CHECK: cmp.le.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x86 # CHECK: cmp.le.d $f2, $f3, $f4
 0x46 0x84 0x18 0x87 # CHECK: cmp.ule.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4
 0x46 0x84 0x18 0x88 # CHECK: cmp.saf.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x88 # CHECK: cmp.saf.d $f2, $f3, $f4
 0x46 0x84 0x18 0x89 # CHECK: cmp.sun.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x89 # CHECK: cmp.sun.d $f2, $f3, $f4
 0x46 0x84 0x18 0x8a # CHECK: cmp.seq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4
 0x46 0x84 0x18 0x8b # CHECK: cmp.sueq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8b # CHECK: cmp.sueq.d $f2, $f3, $f4
 0x46 0x84 0x18 0x8c # CHECK: cmp.slt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8c # CHECK: cmp.slt.d $f2, $f3, $f4
 0x46 0x84 0x18 0x8d # CHECK: cmp.sult.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4
 0x46 0x84 0x18 0x8e # CHECK: cmp.sle.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4
 0x46 0x84 0x18 0x8f # CHECK: cmp.sule.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x80 # CHECK: cmp.af.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x84 # CHECK: cmp.lt.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x86 # CHECK: cmp.le.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x88 # CHECK: cmp.saf.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x89 # CHECK: cmp.sun.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x8b # CHECK: cmp.sueq.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x8c # CHECK: cmp.slt.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4
 0x46 0xa4 0x18 0x8f # CHECK: cmp.sule.d $f2, $f3, $f4
-0x41 0x7e 0x60 0x00 # CHECK: di $fp
-0x41 0x60 0x60 0x00 # CHECK: di
-0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4
-0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4
-0x41 0x6e 0x60 0x20 # CHECK: ei $14
-0x41 0x60 0x60 0x20 # CHECK: ei
-0x00 0x64 0x10 0xc5 # CHECK: lsa $2, $3, $4, 3
-0xec 0x48 0x00 0x43 # CHECK: lwpc $2, 268
-0xec 0x50 0x00 0x43 # CHECK: lwupc $2, 268
-0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4
-0x00 0x64 0x10 0xdb # CHECK: modu $2, $3, $4
-0x00 0x64 0x10 0x98 # CHECK: mul $2, $3, $4
-0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4
-0x00 0x64 0x10 0x99 # CHECK: mulu $2, $3, $4
-0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4
-0x46 0x04 0x18 0x98 # CHECK: maddf.s $f2, $f3, $f4
-0x46 0x24 0x18 0x98 # CHECK: maddf.d $f2, $f3, $f4
-0x46 0x04 0x18 0x99 # CHECK: msubf.s $f2, $f3, $f4
-0x46 0x24 0x18 0x99 # CHECK: msubf.d $f2, $f3, $f4
-0x46 0x22 0x08 0x10 # CHECK: sel.d $f0, $f1, $f2
-0x46 0x02 0x08 0x10 # CHECK: sel.s $f0, $f1, $f2
-0x00 0x64 0x10 0x35 # CHECK: seleqz $2, $3, $4
-0x00 0x64 0x10 0x37 # CHECK: selnez $2, $3, $4
-0x46 0x04 0x10 0x1d # CHECK: max.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1d # CHECK: max.d $f0, $f2, $f4
-0x46 0x04 0x10 0x1c # CHECK: min.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1c # CHECK: min.d $f0, $f2, $f4
-0x46 0x04 0x10 0x1f # CHECK: maxa.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1f # CHECK: maxa.d $f0, $f2, $f4
-0x46 0x04 0x10 0x1e # CHECK: mina.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1e # CHECK: mina.d $f0, $f2, $f4
-0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
-0x46 0x04 0x10 0x14 # CHECK: seleqz.s $f0, $f2, $f4
-0x46 0x24 0x10 0x14 # CHECK: seleqz.d $f0, $f2, $f4
-0x46 0x04 0x10 0x17 # CHECK: selnez.s $f0, $f2, $f4
-0x46 0x24 0x10 0x17 # CHECK: selnez.d $f0, $f2, $f4
-0x46 0x00 0x20 0x9a # CHECK: rint.s $f2, $f4
-0x46 0x20 0x20 0x9a # CHECK: rint.d $f2, $f4
-0x46 0x00 0x20 0x9b # CHECK: class.s $f2, $f4
-0x46 0x20 0x20 0x9b # CHECK: class.d $f2, $f4
-0x00 0x80 0x04 0x09 # CHECK: jr.hb $4
-0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
-0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
-0x7e 0x42 0xb3 0xb6 # CHECK: ll $2, -153($18)
-0x7e 0x6f 0xec 0x26 # CHECK: sc $15, -40($19)
-0x00 0xa0 0x58 0x51 # CHECK: clo $11, $5
-0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
-0x00 0x00 0x00 0x40 # CHECK: ssnop
-0x00 0x00 0x00 0x0e # CHECK: sdbbp
-0x00 0x00 0x08 0x8e # CHECK: sdbbp 34
-0x00 0x00 0x00 0x0f # CHECK: sync
-0x00 0x00 0x00 0x4f # CHECK: sync 1
-0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
-0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
-0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
-0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
-0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
-0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
-0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
-0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
-0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
-0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
-0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
-0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
-0x49 0xc8 0x0d 0x43 # CHECK: ldc2 $8, -701($1)
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 8 but the disassembler currently emits 12
+0x49 0x20 0x00 0x02 # CHECK: bc2eqz $0, 12
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 8 but the disassembler currently emits 12
+0x49 0x3f 0x00 0x02 # CHECK: bc2eqz $31, 12
 0x49 0x52 0x34 0xb7 # CHECK: lwc2 $18, -841($6)
-0x49 0xf4 0x92 0x75 # CHECK: sdc2 $20, 629($18)
 0x49 0x79 0x81 0x30 # CHECK: swc2 $25, 304($16)
-0xf8 0x05 0x01 0x00 # CHECK: jialc $5, 256
-0xd8 0x05 0x01 0x00 # CHECK: jic $5, 256
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 8 but the disassembler currently emits 12
+0x49 0xa0 0x00 0x02 # CHECK: bc2nez $0, 12
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 8 but the disassembler currently emits 12
+0x49 0xbf 0x00 0x02 # CHECK: bc2nez $31, 12
+0x49 0xc8 0x0d 0x43 # CHECK: ldc2 $8, -701($1)
+0x49 0xf4 0x92 0x75 # CHECK: sdc2 $20, 629($18)
+0x58 0x05 0x00 0x40 # CHECK: blezc $5, 256
+0x58 0x43 0x00 0x40 # CHECK: bgec $2, $3, 256
+0x58 0xa5 0x00 0x40 # CHECK: bgezc $5, 256
+0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256
+0x5c 0xa5 0x00 0x40 # CHECK: bltzc $5, 256
+0x5c 0xa6 0x00 0x40 # CHECK: bltc $5, $6, 256
+0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
+# FIXME: The encode/decode functions are not inverses of each other.
+0x60 0x02 0x01 0x4d # CHECK: bnezalc $2, 1332
+0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4
+0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4
+0x60 0xa6 0x00 0x40 # CHECK: bnec $5, $6, 256
+0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
+0x7c 0x43 0x22 0xa0 # CHECK: align $4, $2, $3, 2
 0x7c 0xa1 0x04 0x25 # CHECK: cache 1, 8($5)
 0x7c 0xa1 0x04 0x35 # CHECK: pref 1, 8($5)
+0x7e 0x42 0xb3 0xb6 # CHECK: ll $2, -153($18)
+0x7e 0x6f 0xec 0x26 # CHECK: sc $15, -40($19)
+0xc8 0x37 0x96 0xb8 # CHECK: bc 14572256
+0xd8 0x05 0x01 0x00 # CHECK: jic $5, 256
+0xd8 0xa0 0x46 0x90 # CHECK: beqzc $5, 72256
+0xe8 0x37 0x96 0xb8 # CHECK: balc 14572256
+0xec 0x48 0x00 0x43 # CHECK: lwpc $2, 268
+0xec 0x50 0x00 0x43 # CHECK: lwupc $2, 268
+0xec 0x7e 0xff 0xff # CHECK: auipc $3, -1
+0xec 0x7f 0x00 0x38 # CHECK: aluipc $3, 56
+0xec 0x80 0x00 0x19 # CHECK: addiupc $4, 100
+0xf8 0x05 0x01 0x00 # CHECK: jialc $5, 256
+0xf8 0xa0 0x46 0x90 # CHECK: bnezc $5, 72256
diff --git a/test/MC/Disassembler/Mips/mips4/valid-mips4.txt b/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
index 2f7cbe9..207f408 100644
--- a/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
+++ b/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
@@ -1,231 +1,229 @@
 # RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips4 | FileCheck %s
 # CHECK: .text
-0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
-0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0xc0 # CHECK: ehb
+0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
+0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
+0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
+0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
+0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
+0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
+0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
+0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
+0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
+0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
+0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
+0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
+0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
+0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
+0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
+0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x1c 0xe0 0x01 # CHECK: movf $gp, $8, $fcc7
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
+0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
+0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x30 0x18 0x0b # CHECK: movn $3, $17, $16
 0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
+0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
+0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
+0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0x95 0x00 0x01 # CHECK: movt $zero, $20, $fcc5
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
+0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xc9 0x28 0x0a # CHECK: movz $5, $22, $9
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
+0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
+0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
 0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
 0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
-0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
-0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
-0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
-0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
-0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
 0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
-0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
 0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
+0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
+0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
 0x45 0x00 0x00 0x01 # CHECK: bc1f 8
-0x45 0x04 0x00 0x00 # CHECK: bc1f $fcc1, 4
-0x45 0x1e 0x00 0x06 # CHECK: bc1fl $fcc7, 28
-0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
 0x45 0x01 0x00 0x01 # CHECK: bc1t 8
-0x45 0x05 0x00 0x00 # CHECK: bc1t $fcc1, 4
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
 0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
+0x45 0x04 0x00 0x00 # CHECK: bc1f $fcc1, 4
+0x45 0x05 0x00 0x00 # CHECK: bc1t $fcc1, 4
+0x45 0x1e 0x00 0x06 # CHECK: bc1fl $fcc7, 28
 0x45 0x1f 0x00 0x06 # CHECK: bc1tl $fcc7, 28
-0x04 0x11 0x14 0x9b # CHECK: bal 21104
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
-0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
-0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
-0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
-0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
-0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
-0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
-0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
-0xbc 0xa1 0x00 0x08 # CHECK: cache 1, 8($5)
-0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
-0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
-0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
-0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
-0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
+0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
+0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
 0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
-0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
 0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
-0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
-0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
 0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
-0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
-0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
-0x46 0x20 0x7e 0x25 # CHECK: cvt.l.d $f24, $f15
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
 0x46 0x00 0xea 0xe5 # CHECK: cvt.l.s $f11, $f29
-0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
+0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
+0x46 0x03 0x3e 0x52 # CHECK: movz.s $f25, $f7, $3
+0x46 0x05 0x17 0x91 # CHECK: movt.s $f30, $f2, $fcc1
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x17 0x03 0x13 # CHECK: movn.s $f12, $f0, $23
+0x46 0x18 0x2d 0xd1 # CHECK: movf.s $f23, $f5, $fcc6
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
+0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
+0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
 0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
-0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
 0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
-0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
-0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x7e 0x25 # CHECK: cvt.l.d $f24, $f15
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
+0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
+0x46 0x21 0x10 0x11 # CHECK: movt.d $f0, $f2, $fcc0
+0x46 0x29 0xeb 0x12 # CHECK: movz.d $f12, $f29, $9
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x34 0x59 0x91 # CHECK: movf.d $f6, $f11, $fcc5
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xae 0xd3 # CHECK: movn.d $f27, $f21, $26
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
+0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
+0x4d 0xca 0x58 0x09 # CHECK: sdxc1 $f11, $10($14)
+0x4f 0x4c 0x98 0x08 # CHECK: swxc1 $f19, $12($26)
+0x4f 0xd1 0x03 0x00 # CHECK: lwxc1 $f12, $17($fp)
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
-0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
 0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
-0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
-0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
 0x64 0x58 0x46 0x9f # CHECK: daddiu $24, $2, 18079
 0x66 0x73 0x69 0x3f # CHECK: daddiu $19, $19, 26943
-0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
-0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
-0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
-0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
-0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
-0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
-0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
-0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
-0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
-0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
-0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
-0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
-0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
-0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
-0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
-0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
-0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
-0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
-0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
-0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
-0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
-0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
-0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
-0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
-0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
-0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
-0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
-0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
-0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
-0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
-0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
-0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
-0x00 0x00 0x00 0xc0 # CHECK: ehb
-0x42 0x00 0x00 0x18 # CHECK: eret
-0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
-0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
-0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
-0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
+0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
 0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
-0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
-0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
-0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
 0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
 0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
-0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
-0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
+0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
+0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0xa1 0x00 0x08 # CHECK: cache 1, 8($5)
 0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
-0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
 0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
 0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
-0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
-0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
-0x4f 0xd1 0x03 0x00 # CHECK: lwxc1 $f12, $17($fp)
-0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
-0x00 0x00 0x98 0x10 # CHECK: mfhi $19
-0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
-0x00 0x00 0x88 0x12 # CHECK: mflo $17
-0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
-0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
-0x01 0x1c 0xe0 0x01 # CHECK: movf $gp, $8, $fcc7
-0x46 0x34 0x59 0x91 # CHECK: movf.d $f6, $f11, $fcc5
-0x46 0x18 0x2d 0xd1 # CHECK: movf.s $f23, $f5, $fcc6
-0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
-0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
-0x02 0x30 0x18 0x0b # CHECK: movn $3, $17, $16
-0x46 0x3a 0xae 0xd3 # CHECK: movn.d $f27, $f21, $26
-0x46 0x17 0x03 0x13 # CHECK: movn.s $f12, $f0, $23
-0x02 0x95 0x00 0x01 # CHECK: movt $zero, $20, $fcc5
-0x46 0x21 0x10 0x11 # CHECK: movt.d $f0, $f2, $fcc0
-0x46 0x05 0x17 0x91 # CHECK: movt.s $f30, $f2, $fcc1
-0x02 0xc9 0x28 0x0a # CHECK: movz $5, $22, $9
-0x46 0x29 0xeb 0x12 # CHECK: movz.d $f12, $f29, $9
-0x46 0x03 0x3e 0x52 # CHECK: movz.s $f25, $f7, $3
-0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
-0x02 0x20 0x00 0x11 # CHECK: mthi $17
-0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
-0x03 0x20 0x00 0x13 # CHECK: mtlo $25
-0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
-0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
-0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
-0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
-0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
-0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
-0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
-0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
-0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
-0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
-0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
-0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
 0xcc 0xa1 0x00 0x08 # CHECK: pref 1, 8($5) 
-0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
-0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
-0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
-0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
-0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
 0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
 0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
-0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
-0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
-0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
 0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
 0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
-0x4d 0xca 0x58 0x09 # CHECK: sdxc1 $f11, $10($14)
-0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
-0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
-0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
-0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
-0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
-0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
-0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
-0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
-0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
-0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
-0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
-0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
-0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
-0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
-0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
-0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
-0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
-0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
-0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
-0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
-0x00 0x00 0x00 0x40 # CHECK: ssnop
-0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
-0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
-0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
-0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
-0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
-0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
-0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
-0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
-0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
-0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
-0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
-0x4f 0x4c 0x98 0x08 # CHECK: swxc1 $f19, $12($26)
-0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
-0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
-0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
-0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
-0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
-0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
-0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
-0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
-0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
-0x42 0x00 0x00 0x08 # CHECK: tlbp
-0x42 0x00 0x00 0x01 # CHECK: tlbr
-0x42 0x00 0x00 0x02 # CHECK: tlbwi
-0x42 0x00 0x00 0x06 # CHECK: tlbwr
-0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
-0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
-0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
-0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
-0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
-0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
-0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
-0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
-0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
-0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
-0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
-0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
-0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
-0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
+0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips64.txt b/test/MC/Disassembler/Mips/mips64.txt
deleted file mode 100644
index d494df6..0000000
--- a/test/MC/Disassembler/Mips/mips64.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux | FileCheck %s
-# CHECK: daddiu $11, $26, 31949
-0x67 0x4b 0x7c 0xcd
-
-# CHECK: daddiu $sp, $sp, -32
-0x67 0xbd 0xff 0xe0
-
-# CHECK: daddu $26, $1, $11
-0x00 0x2b 0xd0 0x2d
-
-# CHECK: ddiv $zero, $26, $22
-0x03 0x56 0x00 0x1e
-
-# CHECK: ddivu $zero, $9, $24
-0x01 0x38 0x00 0x1f
-
-# CHECK: dmfc1 $2, $f14
-0x44 0x22 0x70 0x00
-
-# CHECK: dmtc1 $23, $f5
-0x44 0xb7 0x28 0x00
-
-# CHECK: dmult $11, $26
-0x01 0x7a 0x00 0x1c
-
-# CHECK: dmultu $23, $13
-0x02 0xed 0x00 0x1d
-
-# CHECK: dsll $3, $24, 17
-0x00 0x18 0x1c 0x78
-
-# CHECK: dsllv $gp, $27, $24
-0x03 0x1b 0xe0 0x14
-
-# CHECK: dsra $1, $1, 30
-0x00 0x01 0x0f 0xbb
-
-# CHECK: dsrav $1, $1, $fp
-0x03 0xc1 0x08 0x17
-
-# CHECK: dsrl $10, $gp, 24
-0x00 0x1c 0x56 0x3a
-
-# CHECK: dsrlv $gp, $10, $23
-0x02 0xea 0xe0 0x16
-
-# CHECK: dsubu $gp, $27, $24
-0x03 0x78 0xe0 0x2f
-
-# CHECK: lw $27, -15155($1)
-0x8c 0x3b 0xc4 0xcd
-
-# CHECK: lui $1, 1
-0x3c 0x01 0x00 0x01
-
-# CHECK: lwu $3, -1746($3)
-0x9c 0x63 0xf9 0x2e
-
-# CHECK: lui $ra, 1
-0x3c 0x1f 0x00 0x01
-
-# CHECK: sw $26, -15159($1)
-0xac 0x3a 0xc4 0xc9
-
-# CHECK: ld $26, 3958($zero)
-0xdc 0x1a 0x0f 0x76
-
-# CHECK: sd $6, 17767($zero)
-0xfc 0x06 0x45 0x67
-
-# CHECK: luxc1 $f0, $6($5)
-0x4c 0xa6 0x00 0x05
-
-# CHECK: lwxc1 $f20, $12($14)
-0x4d 0xcc 0x05 0x00
-
-# CHECK: suxc1 $f4, $24($5)
-0x4c 0xb8 0x20 0x0d
-
-# CHECK: swxc1 $f26, $18($22)
-0x4e 0xd2 0xd0 0x08
-
-# CHECK: ldxc1 $f2, $2($10)
-0x4d 0x42 0x00 0x81
-
-# CHECK: sdxc1 $f8, $4($25)
-0x4f 0x24 0x40 0x09
-
-# CHECK: sdc2  $9, 9158($7)
-0xf8 0xe9 0x23 0xc6
-
-# CHECK: ldc2  $3, 9162($8)
-0xd9 0x03 0x23 0xca
diff --git a/test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt b/test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt
index 698ebfb..2d52216 100644
--- a/test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt
+++ b/test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt
@@ -82,7 +82,9 @@
 0x24 0x80 0x30 0x73 # CHECK: dclz $16, $25
 0x1e 0x00 0x53 0x03 # CHECK: ddiv $zero, $26, $19
 0x1f 0x00 0x11 0x02 # CHECK: ddivu $zero, $16, $17
+0x00 0x50 0x38 0x40 # CHECK: dmfc0 $24, $10, 0
 0x00 0x68 0x2c 0x44 # CHECK: dmfc1 $12, $f13
+0x00 0x50 0xa4 0x40 # CHECK: dmtc0 $4, $10, 0
 0x00 0x70 0xb0 0x44 # CHECK: dmtc1 $16, $f14
 0x1c 0x00 0xe9 0x02 # CHECK: dmult $23, $9
 0x1d 0x00 0xa6 0x00 # CHECK: dmultu $5, $6
@@ -142,6 +144,7 @@
 0x10 0x00 0xa3 0x98 # CHECK: lwr $3, 16($5)
 0x00 0x00 0xc7 0x70 # CHECK: madd $6, $7
 0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
+0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
 0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
 0x10 0x28 0x00 0x00 # CHECK: mfhi $5
 0x12 0x28 0x00 0x00 # CHECK: mflo $5
@@ -149,6 +152,7 @@
 0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
 0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
 0x05 0x00 0xc7 0x70 # CHECK: msubu $6, $7
+0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
 0x00 0x38 0x86 0x44 # CHECK: mtc1 $6, $f7
 0x11 0x00 0xe0 0x00 # CHECK: mthi $7
 0x13 0x00 0xe0 0x00 # CHECK: mtlo $7
diff --git a/test/MC/Disassembler/Mips/mips64/valid-mips64.txt b/test/MC/Disassembler/Mips/mips64/valid-mips64.txt
index 953e31f..6cbf5d3 100644
--- a/test/MC/Disassembler/Mips/mips64/valid-mips64.txt
+++ b/test/MC/Disassembler/Mips/mips64/valid-mips64.txt
@@ -1,218 +1,254 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux | FileCheck %s
-0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
-0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
+0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x10 # CHECK: mfhi $5
+0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x01 0x0f 0xbb # CHECK: dsra $1, $1, 30
+0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
+0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
+0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
+0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
+0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
+0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
+0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
+0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
+0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
+0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x18 0x1c 0x78 # CHECK: dsll $3, $24, 17
+0x00 0x1c 0x56 0x3a # CHECK: dsrl $10, $gp, 24
+0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
+0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
+0x00 0x2b 0xd0 0x2d # CHECK: daddu $26, $1, $11
+0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
+0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
+0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
+0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
+0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
+0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
+0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
+0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
+0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
+0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
+0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
+0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
-0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
-0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
-0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
-0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
+0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
-0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xe0 0x00 0x08 # CHECK: jr $7
+0x00 0xe0 0x00 0x11 # CHECK: mthi $7
+0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
+0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x01 0x38 0x00 0x1f # CHECK: ddivu $zero, $9, $24
+0x01 0x7a 0x00 0x1c # CHECK: dmult $11, $26
+0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
+0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
+0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
+0x02 0xea 0xe0 0x16 # CHECK: dsrlv $gp, $10, $23
+0x02 0xed 0x00 0x1d # CHECK: dmultu $23, $13
+0x03 0x1b 0xe0 0x14 # CHECK: dsllv $gp, $27, $24
+0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
+0x03 0x56 0x00 0x1e # CHECK: ddiv $zero, $26, $22
+0x03 0x78 0xe0 0x2f # CHECK: dsubu $gp, $27, $24
+0x03 0xc1 0x08 0x17 # CHECK: dsrav $1, $1, $fp
+0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
+0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
+0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
+0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
+0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
+0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
+0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x01 0x00 0x01 # CHECK: lui $1, 1
+0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x3c 0x1f 0x00 0x01 # CHECK: lui $ra, 1
+0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
+0x40 0x38 0x50 0x00 # CHECK: dmfc0 $24, $10, 0
+0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
+0x40 0xa4 0x50 0x00 # CHECK: dmtc0 $4, $10, 0
+0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x22 0x70 0x00 # CHECK: dmfc1 $2, $f14
+0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
+0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
+0x44 0xb7 0x28 0x00 # CHECK: dmtc1 $23, $f5
+0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
-0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
-0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
-0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
-0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
-0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
-0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
-0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
-0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
-0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
-0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
+0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
+0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
+0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
+0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
+0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
+0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
+0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
+0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
+0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
+0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
+0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
-0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
-0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
-0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
+0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
+0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
+0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
+0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
+0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
+0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
+0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
+0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
 0x46 0x07 0x30 0x3c # CHECK: c.lt.s $f6, $f7
-0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
 0x46 0x07 0x30 0x3d # CHECK: c.nge.s $f6, $f7
-0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
-0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
-0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
-0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
-0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
 0x46 0x07 0x30 0x3f # CHECK: c.ngt.s $f6, $f7
-0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
-0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
-0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
-0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
-0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
-0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
-0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
-0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
-0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
+0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
+0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
-0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
-0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
-0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
-0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
-0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
-0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
-0x46 0x20 0x73 0x0e # CHECK: ceil.w.d $f12, $f14
-0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
+0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
 0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
-0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
-0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
-0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
-0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
-0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
-0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
-0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
-0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
+0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
+0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
+0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
+0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
+0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
+0x46 0x20 0x73 0x0e # CHECK: ceil.w.d $f12, $f14
+0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
-0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
-0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
-0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
-0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
+0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
+0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
+0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
+0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
+0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
+0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
+0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
+0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
+0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
+0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
+0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
+0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
+0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
+0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
+0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
+0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
+0x4d 0x42 0x00 0x81 # CHECK: ldxc1 $f2, $2($10)
+0x4d 0xbb 0x60 0x0d # CHECK: suxc1 $f12, $27($13)
+0x4d 0xca 0x58 0x09 # CHECK: sdxc1 $f11, $10($14)
+0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
+0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
+0x4e 0xb6 0x04 0xc5 # CHECK: luxc1 $f19, $22($21)
+0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
+0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
+0x4f 0x4c 0x98 0x08 # CHECK: swxc1 $f19, $12($26)
+0x4f 0xd1 0x03 0x00 # CHECK: lwxc1 $f12, $17($fp)
+0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
-0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
+0x63 0xbd 0x6c 0x39 # CHECK: daddi $sp, $sp, 27705
+0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
 0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
-0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
-0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
 0x64 0x58 0x46 0x9f # CHECK: daddiu $24, $2, 18079
-0x66 0x73 0x69 0x3f # CHECK: daddiu $19, $19, 26943
-0x70 0xd2 0x90 0x25 # CHECK: dclo $18, $6
-0x73 0x30 0x80 0x24 # CHECK: dclz $16, $25
-0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
-0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
-0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
-0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
-0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
-0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
-0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
-0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
-0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
-0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
-0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
-0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
-0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
-0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
-0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
-0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
-0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
-0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
-0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
-0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
-0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
-0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
-0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
-0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
-0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
-0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
-0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
-0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
-0x63 0xbd 0x6c 0x39 # CHECK: daddi $sp, $sp, 27705
-0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
 0x65 0x6f 0xec 0x5f # CHECK: daddiu $15, $11, -5025
 0x65 0xce 0x11 0xea # CHECK: daddiu $14, $14, 4586
-0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
-0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
-0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
-0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
-0x08 0x00 0x01 0x4c # CHECK: j 1328
-0x0c 0x00 0x01 0x4c # CHECK: jal 1328
-0x74 0x00 0x01 0x4c # CHECK: jalx 1328
-0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
-0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
-0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
-0x00 0xe0 0x00 0x08 # CHECK: jr $7
-0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
-0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
-0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
-0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
+0x66 0x73 0x69 0x3f # CHECK: daddiu $19, $19, 26943
+0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
+0x67 0x4b 0x7c 0xcd # CHECK: daddiu $11, $26, 31949
+0x67 0xbd 0xff 0xe0 # CHECK: daddiu $sp, $sp, -32
 0x6b 0x18 0xef 0xb9 # CHECK: ldl $24, -4167($24)
 0x6e 0x8e 0x89 0x6a # CHECK: ldr $14, -30358($20)
-0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
-0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
-0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
-0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
-0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
-0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
-0x4e 0xb6 0x04 0xc5 # CHECK: luxc1 $f19, $22($21)
-0x9c 0x73 0xa1 0xea # CHECK: lwu $19, -24086($3)
-0x4f 0xd1 0x03 0x00 # CHECK: lwxc1 $f12, $17($fp)
-0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
-0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
-0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
-0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
 0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
 0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
-0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
-0x00 0x00 0x28 0x10 # CHECK: mfhi $5
-0x00 0x00 0x28 0x12 # CHECK: mflo $5
-0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
-0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
 0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
 0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
-0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
-0x00 0xe0 0x00 0x11 # CHECK: mthi $7
-0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
-0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
-0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
 0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
-0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
-0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
-0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
-0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
-0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
-0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
-0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
-0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
-0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
-0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
-0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
-0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
-0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
-0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
-0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
-0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
-0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
-0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
-0x4d 0xca 0x58 0x09 # CHECK: sdxc1 $f11, $10($14)
-0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
-0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
-0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
-0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
-0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
-0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
-0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
-0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
-0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
-0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
-0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
-0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
-0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
-0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
-0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
-0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
-0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
-0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
-0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
-0x4d 0xbb 0x60 0x0d # CHECK: suxc1 $f12, $27($13)
-0x4f 0x4c 0x98 0x08 # CHECK: swxc1 $f19, $12($26)
-0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
-0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
-0x00 0x00 0x01 0xcf # CHECK: sync 7
-0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
-0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
-0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
-0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
-0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
-0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x70 0xd2 0x90 0x25 # CHECK: dclo $18, $6
+0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
+0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
+0x73 0x30 0x80 0x24 # CHECK: dclz $16, $25
+0x74 0x00 0x01 0x4c # CHECK: jalx 1328
 0x7c 0x05 0xe8 0x3b # CHECK: .set push
                     # CHECK: .set mips32r2
                     # CHECK: rdhwr $5, $29
                     # CHECK: .set pop
+0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x8c 0x3b 0xc4 0xcd # CHECK: lw $27, -15155($1)
+0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9c 0x63 0xf9 0x2e # CHECK: lwu $3, -1746($3)
+0x9c 0x73 0xa1 0xea # CHECK: lwu $19, -24086($3)
+0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
+0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xac 0x3a 0xc4 0xc9 # CHECK: sw $26, -15159($1)
+0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
+0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
+0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
 0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
 0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
+0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd9 0x03 0x23 0xca # CHECK: ldc2  $3, 9162($8)
+0xdc 0x1a 0x0f 0x76 # CHECK: ld $26, 3958($zero)
+0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
+0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
 0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
-0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
+0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf8 0xe9 0x23 0xc6 # CHECK: sdc2  $9, 9158($7)
+0xfc 0x06 0x45 0x67 # CHECK: sd $6, 17767($zero)
+0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips64r2.txt b/test/MC/Disassembler/Mips/mips64r2.txt
deleted file mode 100644
index cee6f3c..0000000
--- a/test/MC/Disassembler/Mips/mips64r2.txt
+++ /dev/null
@@ -1,90 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mattr +mips64r2 | FileCheck %s
-# CHECK: daddiu $11, $26, 31949
-0x67 0x4b 0x7c 0xcd
-
-# CHECK: daddu $26, $1, $11
-0x00 0x2b 0xd0 0x2d
-
-# CHECK: ddiv $zero, $26, $22
-0x03 0x56 0x00 0x1e
-
-# CHECK: ddivu $zero, $9, $24
-0x01 0x38 0x00 0x1f
-
-# CHECK: dmfc1 $2, $f14
-0x44 0x22 0x70 0x00
-
-# CHECK: dmtc1 $23, $f5
-0x44 0xb7 0x28 0x00
-
-# CHECK: dmult $11, $26
-0x01 0x7a 0x00 0x1c
-
-# CHECK: dmultu $23, $13
-0x02 0xed 0x00 0x1d
-
-# CHECK: dsll $3, $24, 17
-0x00 0x18 0x1c 0x78
-
-# CHECK: dsllv $gp, $27, $24
-0x03 0x1b 0xe0 0x14
-
-# CHECK: dsra $1, $1, 30
-0x00 0x01 0x0f 0xbb
-
-# CHECK: dsrav $1, $1, $fp
-0x03 0xc1 0x08 0x17
-
-# CHECK: dsrl $10, $gp, 24
-0x00 0x1c 0x56 0x3a
-
-# CHECK: dsrlv $gp, $10, $23
-0x02 0xea 0xe0 0x16
-
-# CHECK: dsubu $gp, $27, $24
-0x03 0x78 0xe0 0x2f
-
-# CHECK: lw $27, -15155($1)
-0x8c 0x3b 0xc4 0xcd
-
-# CHECK: lui $1, 1
-0x3c 0x01 0x00 0x01
-
-# CHECK: lwu $3, -1746($3)
-0x9c 0x63 0xf9 0x2e
-
-# CHECK: lui $ra, 1
-0x3c 0x1f 0x00 0x01
-
-# CHECK: sw $26, -15159($1)
-0xac 0x3a 0xc4 0xc9
-
-# CHECK: ld $26, 3958($zero)
-0xdc 0x1a 0x0f 0x76
-
-# CHECK: sd $6, 17767($zero)
-0xfc 0x06 0x45 0x67
-
-# CHECK: dclo $9, $24
-0x73 0x09 0x48 0x25
-
-# CHECK: dclz $26, $9
-0x71 0x3a 0xd0 0x24
-
-# CHECK: dext $7, $gp, 29, 31
-0x7f 0x87 0xf7 0x43
-
-# CHECK: dins $20, $gp, 15, 1
-0x7f 0x94 0x7b 0xc7
-
-# CHECK: dsbh $7, $gp
-0x7c 0x1c 0x38 0xa4
-
-# CHECK: dshd $3, $14
-0x7c 0x0e 0x19 0x64
-
-# CHECK: drotr $20, $27, 6
-0x00 0x3b 0xa1 0xba
-
-# CHECK: drotrv $24, $23, $5
-0x00 0xb7 0xc0 0x56
diff --git a/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt b/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt
index 6509456..2c6859f 100644
--- a/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt
+++ b/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt
@@ -88,7 +88,9 @@
 0x24 0x80 0x30 0x73 # CHECK: dclz $16, $25
 0x1e 0x00 0x53 0x03 # CHECK: ddiv $zero, $26, $19
 0x1f 0x00 0x11 0x02 # CHECK: ddivu $zero, $16, $17
+0x00 0x50 0x38 0x40 # CHECK: dmfc0 $24, $10, 0
 0x00 0x68 0x2c 0x44 # CHECK: dmfc1 $12, $f13
+0x00 0x50 0xa4 0x40 # CHECK: dmtc0 $4, $10, 0
 0x00 0x70 0xb0 0x44 # CHECK: dmtc1 $16, $f14
 0x1c 0x00 0xe9 0x02 # CHECK: dmult $23, $9
 0x1d 0x00 0xa6 0x00 # CHECK: dmultu $5, $6
@@ -161,6 +163,7 @@
 0x00 0x00 0xc7 0x70 # CHECK: madd $6, $7
 0x60 0x98 0xf9 0x4f # CHECK: madd.s $f1, $f31, $f19, $f25
 0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
+0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
 0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
 0x10 0x28 0x00 0x00 # CHECK: mfhi $5
 0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
@@ -170,6 +173,7 @@
 0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
 0x28 0x53 0x70 0x4e # CHECK: msub.s $f12, $f19, $f10, $f16
 0x05 0x00 0xc7 0x70 # CHECK: msubu $6, $7
+0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
 0x00 0x38 0x86 0x44 # CHECK: mtc1 $6, $f7
 0x11 0x00 0xe0 0x00 # CHECK: mthi $7
 0x00 0x80 0xe0 0x44 # CHECK: mthc1 $zero, $f16
diff --git a/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt b/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt
index 79fcc76..0c6e10e 100644
--- a/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt
+++ b/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt
@@ -3,237 +3,271 @@
 # an effect on the disassembler behaviour.
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips64r2 | FileCheck %s
 # CHECK: .text
-0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
-0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
+0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x10 # CHECK: mfhi $5
+0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x01 0x0f 0xbb # CHECK: dsra $1, $1, 30
+0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
+0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
+0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
+0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
+0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
+0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
+0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
+0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
+0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
+0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x18 0x1c 0x78 # CHECK: dsll $3, $24, 17
+0x00 0x1c 0x56 0x3a # CHECK: dsrl $10, $gp, 24
+0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
+0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
+0x00 0x21 0x0b 0xfa # CHECK: drotr $1, $1, 15
+0x00 0x21 0x0b 0xfe # CHECK: drotr32 $1, $1, 15
+0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x2b 0xd0 0x2d # CHECK: daddu $26, $1, $11
+0x00 0x2e 0x0b 0xfa # CHECK: drotr $1, $14, 15
+0x00 0x2e 0x0b 0xfe # CHECK: drotr32 $1, $14, 15
+0x00 0x3b 0xa1 0xba # CHECK: drotr $20, $27, 6
+0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
+0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
+0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
+0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
+0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
+0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
+0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
+0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
+0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
+0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
+0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
+0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xb7 0xc0 0x56 # CHECK: drotrv $24, $23, $5
+0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
-0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
-0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
-0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
-0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
+0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
-0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xe0 0x00 0x08 # CHECK: jr $7
+0x00 0xe0 0x00 0x11 # CHECK: mthi $7
+0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
+0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x01 0x38 0x00 0x1f # CHECK: ddivu $zero, $9, $24
+0x01 0x7a 0x00 0x1c # CHECK: dmult $11, $26
+0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0xee 0x08 0x56 # CHECK: drotrv $1, $14, $15
+0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
+0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
+0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
+0x02 0xea 0xe0 0x16 # CHECK: dsrlv $gp, $10, $23
+0x02 0xed 0x00 0x1d # CHECK: dmultu $23, $13
+0x03 0x1b 0xe0 0x14 # CHECK: dsllv $gp, $27, $24
+0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
+0x03 0x56 0x00 0x1e # CHECK: ddiv $zero, $26, $22
+0x03 0x78 0xe0 0x2f # CHECK: dsubu $gp, $27, $24
+0x03 0xc1 0x08 0x17 # CHECK: dsrav $1, $1, $fp
+0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
+0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
+0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
+0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
+0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
+0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
+0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x01 0x00 0x01 # CHECK: lui $1, 1
+0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x3c 0x1f 0x00 0x01 # CHECK: lui $ra, 1
+0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x38 0x50 0x00 # CHECK: dmfc0 $24, $10, 0
+0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
+0x40 0xa4 0x50 0x00 # CHECK: dmtc0 $4, $10, 0
+0x41 0x60 0x60 0x00 # CHECK: di
+0x41 0x60 0x60 0x20 # CHECK: ei
+0x41 0x6e 0x60 0x20 # CHECK: ei $14
+0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x22 0x70 0x00 # CHECK: dmfc1 $2, $f14
+0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
+0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
+0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
+0x44 0xb7 0x28 0x00 # CHECK: dmtc1 $23, $f5
+0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
-0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
-0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
-0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
-0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
-0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
-0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
-0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
-0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
-0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
-0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
+0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
+0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
+0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
+0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
+0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
+0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
+0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
+0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
+0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
+0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
+0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
-0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
-0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
-0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
+0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
+0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
+0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
+0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
+0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
+0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
+0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
+0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
 0x46 0x07 0x30 0x3c # CHECK: c.lt.s $f6, $f7
-0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
 0x46 0x07 0x30 0x3d # CHECK: c.nge.s $f6, $f7
-0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
-0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
-0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
-0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
-0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
 0x46 0x07 0x30 0x3f # CHECK: c.ngt.s $f6, $f7
-0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
-0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
-0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
-0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
-0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
-0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
-0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
-0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
-0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
+0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
+0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
-0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
-0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
-0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
-0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
-0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
-0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
 0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
-0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
+0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
+0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
+0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
+0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
+0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
 0x46 0x20 0x73 0x0e # CHECK: ceil.w.d $f12, $f14
-0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
-0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
-0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
-0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
-0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
-0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
-0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
-0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
-0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
-0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
-0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
+0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
-0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
-0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
-0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
+0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
+0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
+0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
+0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
+0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
+0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
+0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
+0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
+0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
+0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
+0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
+0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
+0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
+0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
+0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
+0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
+0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
+0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
+0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
+0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
+0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
+0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
+0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
+0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
+0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
-0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
+0x63 0xbd 0x6c 0x39 # CHECK: daddi $sp, $sp, 27705
+0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
 0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
-0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
-0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
 0x64 0x58 0x46 0x9f # CHECK: daddiu $24, $2, 18079
+0x65 0x6f 0xec 0x5f # CHECK: daddiu $15, $11, -5025
+0x65 0xce 0x11 0xea # CHECK: daddiu $14, $14, 4586
 0x66 0x73 0x69 0x3f # CHECK: daddiu $19, $19, 26943
+0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
+0x67 0x4b 0x7c 0xcd # CHECK: daddiu $11, $26, 31949
+0x6b 0x18 0xef 0xb9 # CHECK: ldl $24, -4167($24)
+0x6e 0x8e 0x89 0x6a # CHECK: ldr $14, -30358($20)
+0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
+0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
+0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
+0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
+0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
 0x70 0xd2 0x90 0x25 # CHECK: dclo $18, $6
+0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
+0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
+0x71 0x3a 0xd0 0x24 # CHECK: dclz $26, $9
+0x73 0x09 0x48 0x25 # CHECK: dclo $9, $24
 0x73 0x30 0x80 0x24 # CHECK: dclz $16, $25
-0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
-0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
-0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
-0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
-0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
-0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
-0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
-0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
-0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
-0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
-0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
-0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
-0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
-0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
-0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
-0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
-0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
-0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
-0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
-0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
-0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
-0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
-0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
-0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
-0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
-0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
-0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
+0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
+0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
 0x7c 0x0e 0x18 0xa4 # CHECK: dsbh $3, $14
+0x7c 0x0e 0x19 0x64 # CHECK: dshd $3, $14
+0x7c 0x1c 0x38 0xa4 # CHECK: dsbh $7, $gp
 0x7c 0x1d 0x11 0x64 # CHECK: dshd $2, $sp
-0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
-0x63 0xbd 0x6c 0x39 # CHECK: daddi $sp, $sp, 27705
-0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
-0x65 0x6f 0xec 0x5f # CHECK: daddiu $15, $11, -5025
-0x65 0xce 0x11 0xea # CHECK: daddiu $14, $14, 4586
-0x41 0x7e 0x60 0x00 # CHECK: di $fp
-0x41 0x60 0x60 0x00 # CHECK: di
-0x00 0x21 0x0b 0xfa # CHECK: drotr $1, $1, 15
-0x00 0x2e 0x0b 0xfa # CHECK: drotr $1, $14, 15
-0x00 0x21 0x0b 0xfe # CHECK: drotr32 $1, $1, 15
-0x00 0x2e 0x0b 0xfe # CHECK: drotr32 $1, $14, 15
-0x01 0xee 0x08 0x56 # CHECK: drotrv $1, $14, $15
-0x41 0x6e 0x60 0x20 # CHECK: ei $14
-0x41 0x60 0x60 0x20 # CHECK: ei
-0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
-0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
-0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
-0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
 0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
-0x08 0x00 0x01 0x4c # CHECK: j 1328
-0x0c 0x00 0x01 0x4c # CHECK: jal 1328
-0x74 0x00 0x01 0x4c # CHECK: jalx 1328
-0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
-0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
-0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
-0x00 0xe0 0x00 0x08 # CHECK: jr $7
+0x7f 0x87 0xf7 0x43 # CHECK: dext $7, $gp, 29, 31
+0x7f 0x94 0x7b 0xc7 # CHECK: dins $20, $gp, 15, 1
 0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
-0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
-0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
-0x6b 0x18 0xef 0xb9 # CHECK: ldl $24, -4167($24)
-0x6e 0x8e 0x89 0x6a # CHECK: ldr $14, -30358($20)
-0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
-0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
-0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
-0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
-0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
-0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
-0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
-0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
 0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x8c 0x3b 0xc4 0xcd # CHECK: lw $27, -15155($1)
+0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
 0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
-0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
+0x9c 0x63 0xf9 0x2e # CHECK: lwu $3, -1746($3)
 0x9c 0x73 0xa1 0xea # CHECK: lwu $19, -24086($3)
-0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
-0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
-0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
-0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
-0x00 0x00 0x28 0x10 # CHECK: mfhi $5
-0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
-0x00 0x00 0x28 0x12 # CHECK: mflo $5
-0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
-0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
-0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
-0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
-0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
-0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
-0x00 0xe0 0x00 0x11 # CHECK: mthi $7
-0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
-0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
-0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
-0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
-0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
-0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
-0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
-0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
-0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
-0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
-0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
-0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
-0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
-0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
-0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
-0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
-0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
-0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
-0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
-0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
 0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
-0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
-0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
-0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
-0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
-0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
-0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
-0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
-0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
-0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
+0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
-0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
-0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
-0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
-0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
-0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
-0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
-0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
-0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
-0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
-0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
-0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
-0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
-0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
-0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
-0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
-0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
-0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
-0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
-0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xac 0x3a 0xc4 0xc9 # CHECK: sw $26, -15159($1)
+0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
+0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
-0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
-0x00 0x00 0x01 0xcf # CHECK: sync 7
-0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
-0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
-0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
-0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
-0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
-0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
-0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
+0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xdc 0x1a 0x0f 0x76 # CHECK: ld $26, 3958($zero)
+0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
+0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
+0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xfc 0x06 0x45 0x67 # CHECK: sd $6, 17767($zero)
+0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3-el.txt b/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3-el.txt
index 52374af..88e9c26 100644
--- a/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3-el.txt
+++ b/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3-el.txt
@@ -85,7 +85,9 @@
 0x24 0x80 0x30 0x73 # CHECK: dclz $16, $25
 0x1e 0x00 0x53 0x03 # CHECK: ddiv $zero, $26, $19
 0x1f 0x00 0x11 0x02 # CHECK: ddivu $zero, $16, $17
+0x00 0x50 0x38 0x40 # CHECK: dmfc0 $24, $10, 0
 0x00 0x68 0x2c 0x44 # CHECK: dmfc1 $12, $f13
+0x00 0x50 0xa4 0x40 # CHECK: dmtc0 $4, $10, 0
 0x00 0x70 0xb0 0x44 # CHECK: dmtc1 $16, $f14
 0x1c 0x00 0xe9 0x02 # CHECK: dmult $23, $9
 0x1d 0x00 0xa6 0x00 # CHECK: dmultu $5, $6
@@ -158,6 +160,7 @@
 0x00 0x00 0xc7 0x70 # CHECK: madd $6, $7
 0x60 0x98 0xf9 0x4f # CHECK: madd.s $f1, $f31, $f19, $f25
 0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
+0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
 0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
 0x10 0x28 0x00 0x00 # CHECK: mfhi $5
 0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
@@ -167,6 +170,7 @@
 0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
 0x28 0x53 0x70 0x4e # CHECK: msub.s $f12, $f19, $f10, $f16
 0x05 0x00 0xc7 0x70 # CHECK: msubu $6, $7
+0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
 0x00 0x38 0x86 0x44 # CHECK: mtc1 $6, $f7
 0x11 0x00 0xe0 0x00 # CHECK: mthi $7
 0x00 0x80 0xe0 0x44 # CHECK: mthc1 $zero, $f16
diff --git a/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3.txt b/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3.txt
index acd59fc..82405f3 100644
--- a/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3.txt
+++ b/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3.txt
@@ -1,236 +1,240 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r3 | FileCheck %s
 # CHECK: .text
-0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
-0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
+0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x10 # CHECK: mfhi $5
+0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
+0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
+0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
+0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
+0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
+0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
+0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
+0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
+0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
+0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
+0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
+0x00 0x21 0x0b 0xfa # CHECK: drotr $1, $1, 15
+0x00 0x21 0x0b 0xfe # CHECK: drotr32 $1, $1, 15
+0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x2e 0x0b 0xfa # CHECK: drotr $1, $14, 15
+0x00 0x2e 0x0b 0xfe # CHECK: drotr32 $1, $14, 15
+0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
+0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
+0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
+0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
+0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
+0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
+0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
+0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
+0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
+0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
+0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
+0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
-0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
-0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
-0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
-0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
+0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
-0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xe0 0x00 0x08 # CHECK: jr $7
+0x00 0xe0 0x00 0x11 # CHECK: mthi $7
+0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
+0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0xee 0x08 0x56 # CHECK: drotrv $1, $14, $15
+0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
+0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
+0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
+0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
+0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
+0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
+0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
+0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
+0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
+0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
+0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x38 0x50 0x00 # CHECK: dmfc0 $24, $10, 0
+0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
+0x40 0xa4 0x50 0x00 # CHECK: dmtc0 $4, $10, 0
+0x41 0x60 0x60 0x00 # CHECK: di
+0x41 0x60 0x60 0x20 # CHECK: ei
+0x41 0x6e 0x60 0x20 # CHECK: ei $14
+0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
+0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
+0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
+0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
-0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
-0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
-0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
-0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
-0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
-0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
-0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
-0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
-0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
-0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
+0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
+0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
+0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
+0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
+0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
+0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
+0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
+0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
+0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
+0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
+0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
-0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
-0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
-0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
+0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
+0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
+0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
+0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
+0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
+0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
+0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
+0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
 0x46 0x07 0x30 0x3c # CHECK: c.lt.s $f6, $f7
-0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
 0x46 0x07 0x30 0x3d # CHECK: c.nge.s $f6, $f7
-0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
-0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
-0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
-0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
-0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
 0x46 0x07 0x30 0x3f # CHECK: c.ngt.s $f6, $f7
-0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
-0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
-0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
-0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
-0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
-0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
-0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
-0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
-0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
+0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
+0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
-0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
-0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
-0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
-0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
-0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
-0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
 0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
-0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
+0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
+0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
+0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
+0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
+0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
 0x46 0x20 0x73 0x0e # CHECK: ceil.w.d $f12, $f14
-0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
-0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
-0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
-0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
-0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
-0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
-0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
-0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
-0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
-0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
-0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
+0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
-0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
-0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
-0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
+0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
+0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
+0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
+0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
+0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
+0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
+0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
+0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
+0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
+0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
+0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
+0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
+0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
+0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
+0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
+0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
+0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
+0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
+0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
+0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
+0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
+0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
+0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
+0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
+0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
-0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
+0x63 0xbd 0x6c 0x39 # CHECK: daddi $sp, $sp, 27705
+0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
 0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
-0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
-0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
 0x64 0x58 0x46 0x9f # CHECK: daddiu $24, $2, 18079
+0x65 0x6f 0xec 0x5f # CHECK: daddiu $15, $11, -5025
+0x65 0xce 0x11 0xea # CHECK: daddiu $14, $14, 4586
 0x66 0x73 0x69 0x3f # CHECK: daddiu $19, $19, 26943
+0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
+0x6b 0x18 0xef 0xb9 # CHECK: ldl $24, -4167($24)
+0x6e 0x8e 0x89 0x6a # CHECK: ldr $14, -30358($20)
+0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
+0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
+0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
+0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
+0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
 0x70 0xd2 0x90 0x25 # CHECK: dclo $18, $6
+0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
+0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
 0x73 0x30 0x80 0x24 # CHECK: dclz $16, $25
-0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
-0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
-0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
-0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
-0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
-0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
-0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
-0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
-0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
-0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
-0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
-0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
-0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
-0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
-0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
-0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
-0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
-0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
-0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
-0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
-0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
-0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
-0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
-0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
-0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
-0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
-0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
+0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
+0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
 0x7c 0x0e 0x18 0xa4 # CHECK: dsbh $3, $14
 0x7c 0x1d 0x11 0x64 # CHECK: dshd $2, $sp
-0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
-0x63 0xbd 0x6c 0x39 # CHECK: daddi $sp, $sp, 27705
-0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
-0x65 0x6f 0xec 0x5f # CHECK: daddiu $15, $11, -5025
-0x65 0xce 0x11 0xea # CHECK: daddiu $14, $14, 4586
-0x41 0x7e 0x60 0x00 # CHECK: di $fp
-0x41 0x60 0x60 0x00 # CHECK: di
-0x00 0x21 0x0b 0xfa # CHECK: drotr $1, $1, 15
-0x00 0x2e 0x0b 0xfa # CHECK: drotr $1, $14, 15
-0x00 0x21 0x0b 0xfe # CHECK: drotr32 $1, $1, 15
-0x00 0x2e 0x0b 0xfe # CHECK: drotr32 $1, $14, 15
-0x01 0xee 0x08 0x56 # CHECK: drotrv $1, $14, $15
-0x41 0x6e 0x60 0x20 # CHECK: ei $14
-0x41 0x60 0x60 0x20 # CHECK: ei
-0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
-0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
-0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
-0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
 0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
-0x08 0x00 0x01 0x4c # CHECK: j 1328
-0x0c 0x00 0x01 0x4c # CHECK: jal 1328
-0x74 0x00 0x01 0x4c # CHECK: jalx 1328
-0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
-0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
-0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
-0x00 0xe0 0x00 0x08 # CHECK: jr $7
 0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
-0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
-0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
-0x6b 0x18 0xef 0xb9 # CHECK: ldl $24, -4167($24)
-0x6e 0x8e 0x89 0x6a # CHECK: ldr $14, -30358($20)
-0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
-0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
-0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
-0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
-0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
-0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
-0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
-0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
 0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
 0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
-0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
 0x9c 0x73 0xa1 0xea # CHECK: lwu $19, -24086($3)
-0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
-0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
-0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
-0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
-0x00 0x00 0x28 0x10 # CHECK: mfhi $5
-0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
-0x00 0x00 0x28 0x12 # CHECK: mflo $5
-0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
-0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
-0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
-0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
-0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
-0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
-0x00 0xe0 0x00 0x11 # CHECK: mthi $7
-0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
-0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
-0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
-0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
-0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
-0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
-0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
-0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
-0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
-0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
-0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
-0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
-0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
-0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
-0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
-0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
-0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
-0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
-0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
-0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
 0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
-0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
-0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
-0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
-0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
-0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
-0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
-0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
-0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
-0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
+0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
-0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
-0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
-0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
-0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
-0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
-0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
-0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
-0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
-0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
-0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
-0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
-0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
-0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
-0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
-0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
-0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
-0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
-0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
-0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
+0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
-0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
-0x00 0x00 0x01 0xcf # CHECK: sync 7
-0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
-0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
-0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
-0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
-0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
-0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
-0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
+0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
+0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
+0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5-el.txt b/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5-el.txt
index 3d97a2b..bd709d22 100644
--- a/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5-el.txt
+++ b/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5-el.txt
@@ -85,7 +85,9 @@
 0x24 0x80 0x30 0x73 # CHECK: dclz $16, $25
 0x1e 0x00 0x53 0x03 # CHECK: ddiv $zero, $26, $19
 0x1f 0x00 0x11 0x02 # CHECK: ddivu $zero, $16, $17
+0x00 0x50 0x38 0x40 # CHECK: dmfc0 $24, $10, 0
 0x00 0x68 0x2c 0x44 # CHECK: dmfc1 $12, $f13
+0x00 0x50 0xa4 0x40 # CHECK: dmtc0 $4, $10, 0
 0x00 0x70 0xb0 0x44 # CHECK: dmtc1 $16, $f14
 0x1c 0x00 0xe9 0x02 # CHECK: dmult $23, $9
 0x1d 0x00 0xa6 0x00 # CHECK: dmultu $5, $6
@@ -158,6 +160,7 @@
 0x00 0x00 0xc7 0x70 # CHECK: madd $6, $7
 0x60 0x98 0xf9 0x4f # CHECK: madd.s $f1, $f31, $f19, $f25
 0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
+0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
 0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
 0x10 0x28 0x00 0x00 # CHECK: mfhi $5
 0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
@@ -167,6 +170,7 @@
 0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
 0x28 0x53 0x70 0x4e # CHECK: msub.s $f12, $f19, $f10, $f16
 0x05 0x00 0xc7 0x70 # CHECK: msubu $6, $7
+0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
 0x00 0x38 0x86 0x44 # CHECK: mtc1 $6, $f7
 0x11 0x00 0xe0 0x00 # CHECK: mthi $7
 0x00 0x80 0xe0 0x44 # CHECK: mthc1 $zero, $f16
diff --git a/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5.txt b/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5.txt
index ce414ed..1b30144 100644
--- a/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5.txt
+++ b/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5.txt
@@ -1,236 +1,240 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r5 | FileCheck %s
 # CHECK: .text
-0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
-0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
+0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x10 # CHECK: mfhi $5
+0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
+0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
+0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
+0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
+0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
+0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
+0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
+0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
+0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
+0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
+0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
+0x00 0x21 0x0b 0xfa # CHECK: drotr $1, $1, 15
+0x00 0x21 0x0b 0xfe # CHECK: drotr32 $1, $1, 15
+0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x2e 0x0b 0xfa # CHECK: drotr $1, $14, 15
+0x00 0x2e 0x0b 0xfe # CHECK: drotr32 $1, $14, 15
+0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
+0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
+0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
+0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
+0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
+0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
+0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
+0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
+0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
+0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
+0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
+0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
-0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
-0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
-0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
-0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
+0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
-0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xe0 0x00 0x08 # CHECK: jr $7
+0x00 0xe0 0x00 0x11 # CHECK: mthi $7
+0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
+0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0xee 0x08 0x56 # CHECK: drotrv $1, $14, $15
+0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
+0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
+0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
+0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
+0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
+0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
+0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
+0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
+0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
+0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
+0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x38 0x50 0x00 # CHECK: dmfc0 $24, $10, 0
+0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
+0x40 0xa4 0x50 0x00 # CHECK: dmtc0 $4, $10, 0
+0x41 0x60 0x60 0x00 # CHECK: di
+0x41 0x60 0x60 0x20 # CHECK: ei
+0x41 0x6e 0x60 0x20 # CHECK: ei $14
+0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
+0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
+0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
+0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
-0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
-0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
-0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
-0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
-0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
-0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
-0x15 0x26 0x01 0x4c # CHECK: bne $9, $6, 1332
-0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
-0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
-0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
+0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
+0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
+0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
+0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
+0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
+0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
+0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
+0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
+0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
+0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
+0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
+0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
-0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
-0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
-0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
+0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
+0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
+0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
+0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
+0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
+0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
+0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
+0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
 0x46 0x07 0x30 0x3c # CHECK: c.lt.s $f6, $f7
-0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
 0x46 0x07 0x30 0x3d # CHECK: c.nge.s $f6, $f7
-0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
-0x46 0x07 0x30 0x3b # CHECK: c.ngl.s $f6, $f7
-0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
-0x46 0x07 0x30 0x39 # CHECK: c.ngle.s $f6, $f7
-0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
 0x46 0x07 0x30 0x3f # CHECK: c.ngt.s $f6, $f7
-0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
-0x46 0x07 0x30 0x36 # CHECK: c.ole.s $f6, $f7
-0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
-0x46 0x07 0x30 0x34 # CHECK: c.olt.s $f6, $f7
-0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
-0x46 0x07 0x30 0x3a # CHECK: c.seq.s $f6, $f7
-0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
-0x46 0x07 0x30 0x38 # CHECK: c.sf.s $f6, $f7
-0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
+0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
+0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
-0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
-0x46 0x07 0x30 0x37 # CHECK: c.ule.s $f6, $f7
-0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
-0x46 0x07 0x30 0x35 # CHECK: c.ult.s $f6, $f7
-0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
-0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
+0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
 0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
-0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
+0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
+0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
+0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
+0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
+0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
 0x46 0x20 0x73 0x0e # CHECK: ceil.w.d $f12, $f14
-0x46 0x00 0x39 0x8e # CHECK: ceil.w.s $f6, $f7
-0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
-0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
-0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
-0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
-0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
-0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
-0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
-0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
-0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
-0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
+0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
-0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
-0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
-0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
+0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
+0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
+0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
+0x46 0x2e 0x60 0x33 # CHECK: c.ueq.d $f12, $f14
+0x46 0x2e 0x60 0x34 # CHECK: c.olt.d $f12, $f14
+0x46 0x2e 0x60 0x35 # CHECK: c.ult.d $f12, $f14
+0x46 0x2e 0x60 0x36 # CHECK: c.ole.d $f12, $f14
+0x46 0x2e 0x60 0x37 # CHECK: c.ule.d $f12, $f14
+0x46 0x2e 0x60 0x38 # CHECK: c.sf.d $f12, $f14
+0x46 0x2e 0x60 0x39 # CHECK: c.ngle.d $f12, $f14
+0x46 0x2e 0x60 0x3a # CHECK: c.seq.d $f12, $f14
+0x46 0x2e 0x60 0x3b # CHECK: c.ngl.d $f12, $f14
+0x46 0x2e 0x60 0x3c # CHECK: c.lt.d $f12, $f14
+0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
+0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
+0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
+0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
+0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
+0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
+0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
+0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
+0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
+0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
+0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
+0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
+0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
+0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
+0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
+0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
-0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
+0x63 0xbd 0x6c 0x39 # CHECK: daddi $sp, $sp, 27705
+0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
 0x63 0xbd 0x93 0xc7 # CHECK: daddi $sp, $sp, -27705
-0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
-0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
 0x64 0x58 0x46 0x9f # CHECK: daddiu $24, $2, 18079
+0x65 0x6f 0xec 0x5f # CHECK: daddiu $15, $11, -5025
+0x65 0xce 0x11 0xea # CHECK: daddiu $14, $14, 4586
 0x66 0x73 0x69 0x3f # CHECK: daddiu $19, $19, 26943
+0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
+0x6b 0x18 0xef 0xb9 # CHECK: ldl $24, -4167($24)
+0x6e 0x8e 0x89 0x6a # CHECK: ldr $14, -30358($20)
+0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
+0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
+0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
+0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
+0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
 0x70 0xd2 0x90 0x25 # CHECK: dclo $18, $6
+0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
+0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
 0x73 0x30 0x80 0x24 # CHECK: dclz $16, $25
-0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
-0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
-0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
-0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
-0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
-0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
-0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
-0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
-0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
-0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
-0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
-0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
-0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
-0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
-0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
-0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
-0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
-0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
-0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
-0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
-0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
-0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
-0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
-0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
-0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
-0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
-0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
+0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
+0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
 0x7c 0x0e 0x18 0xa4 # CHECK: dsbh $3, $14
 0x7c 0x1d 0x11 0x64 # CHECK: dshd $2, $sp
-0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
-0x63 0xbd 0x6c 0x39 # CHECK: daddi $sp, $sp, 27705
-0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
-0x65 0x6f 0xec 0x5f # CHECK: daddiu $15, $11, -5025
-0x65 0xce 0x11 0xea # CHECK: daddiu $14, $14, 4586
-0x41 0x7e 0x60 0x00 # CHECK: di $fp
-0x41 0x60 0x60 0x00 # CHECK: di
-0x00 0x21 0x0b 0xfa # CHECK: drotr $1, $1, 15
-0x00 0x2e 0x0b 0xfa # CHECK: drotr $1, $14, 15
-0x00 0x21 0x0b 0xfe # CHECK: drotr32 $1, $1, 15
-0x00 0x2e 0x0b 0xfe # CHECK: drotr32 $1, $14, 15
-0x01 0xee 0x08 0x56 # CHECK: drotrv $1, $14, $15
-0x41 0x6e 0x60 0x20 # CHECK: ei $14
-0x41 0x60 0x60 0x20 # CHECK: ei
-0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
-0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
-0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
-0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
 0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
-0x08 0x00 0x01 0x4c # CHECK: j 1328
-0x0c 0x00 0x01 0x4c # CHECK: jal 1328
-0x74 0x00 0x01 0x4c # CHECK: jalx 1328
-0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
-0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
-0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
-0x00 0xe0 0x00 0x08 # CHECK: jr $7
 0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
-0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
-0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
-0x6b 0x18 0xef 0xb9 # CHECK: ldl $24, -4167($24)
-0x6e 0x8e 0x89 0x6a # CHECK: ldr $14, -30358($20)
-0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
-0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
-0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
-0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
-0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
-0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
-0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
-0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
 0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
 0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
-0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
 0x9c 0x73 0xa1 0xea # CHECK: lwu $19, -24086($3)
-0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
-0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
-0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
-0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
-0x00 0x00 0x28 0x10 # CHECK: mfhi $5
-0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
-0x00 0x00 0x28 0x12 # CHECK: mflo $5
-0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
-0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
-0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
-0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
-0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
-0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
-0x00 0xe0 0x00 0x11 # CHECK: mthi $7
-0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
-0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
-0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
-0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
-0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
-0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
-0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
-0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
-0x46 0x00 0x39 0x87 # CHECK: neg.s $f6, $f7
-0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
-0x00 0x00 0x00 0x00 # CHECK: nop
-0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
-0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
-0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
-0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
-0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
-0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
-0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
-0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
-0x46 0x20 0x73 0x0c # CHECK: round.w.d $f12, $f14
-0x46 0x00 0x39 0x8c # CHECK: round.w.s $f6, $f7
-0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
 0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
-0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
-0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
-0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
-0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
-0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
-0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
-0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
-0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
-0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
+0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
-0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
-0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
-0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
-0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
-0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
-0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
-0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
-0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
-0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
-0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
-0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
-0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
-0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
-0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
-0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
-0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
-0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
-0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
-0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
+0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
-0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
-0x00 0x00 0x01 0xcf # CHECK: sync 7
-0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
-0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
-0x46 0x20 0x73 0x0d # CHECK: trunc.w.d $f12, $f14
-0x46 0x00 0x39 0x8d # CHECK: trunc.w.s $f6, $f7
-0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
-0x00 0x65 0x18 0x26 # CHECK: xor $3, $3, $5
-0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
+0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
+0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
+0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips64r6.txt b/test/MC/Disassembler/Mips/mips64r6.txt
deleted file mode 100644
index 3ddef9a..0000000
--- a/test/MC/Disassembler/Mips/mips64r6.txt
+++ /dev/null
@@ -1,145 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips64r6 | FileCheck %s
-
-0xec 0x80 0x00 0x19 # CHECK: addiupc $4, 100
-0x7c 0x43 0x22 0xa0 # CHECK: align $4, $2, $3, 2
-0xec 0x7f 0x00 0x38 # CHECK: aluipc $3, 56
-0x3c 0x62 0xff 0xe9 # CHECK: aui $3, $2, -23
-0xec 0x7e 0xff 0xff # CHECK: auipc $3, -1
-0xe8 0x37 0x96 0xb8 # CHECK: balc 14572256
-0xc8 0x37 0x96 0xb8 # CHECK: bc 14572256
-
-# FIXME: Don't check the immediate on these for the moment, the encode/decode
-#        functions are not inverses of eachother.
-#        The immediate should be 4 but the disassembler currently emits 8
-0x45 0x20 0x00 0x01 # CHECK: bc1eqz $f0,
-0x45 0x3f 0x00 0x01 # CHECK: bc1eqz $f31,
-0x45 0xa0 0x00 0x01 # CHECK: bc1nez $f0,
-0x45 0xbf 0x00 0x01 # CHECK: bc1nez $f31,
-# FIXME: Don't check the immediate on these for the moment, the encode/decode
-#        functions are not inverses of eachother.
-#        The immediate should be 8 but the disassembler currently emits 12
-0x49 0x20 0x00 0x02 # CHECK: bc2eqz $0,
-0x49 0x3f 0x00 0x02 # CHECK: bc2eqz $31,
-0x49 0xa0 0x00 0x02 # CHECK: bc2nez $0,
-0x49 0xbf 0x00 0x02 # CHECK: bc2nez $31,
-
-0x20 0xa6 0x00 0x40 # CHECK: beqc $5, $6, 256
-# FIXME: Don't check the immediate on the bcczal's for the moment, the
-#        encode/decode functions are not inverses of eachother.
-0x20 0x02 0x01 0x4d # CHECK: beqzalc $2,
-0x60 0xa6 0x00 0x40 # CHECK: bnec $5, $6, 256
-0x60 0x02 0x01 0x4d # CHECK: bnezalc $2,
-0xd8 0xa0 0x46 0x90 # CHECK: beqzc $5, 72256
-0x58 0x43 0x00 0x40 # CHECK: bgec $2, $3, 256
-0x18 0x43 0x00 0x40 # CHECK: bgeuc $2, $3, 256
-0x18 0x42 0x01 0x4d # CHECK: bgezalc $2,
-0xf8 0xa0 0x46 0x90 # CHECK: bnezc $5, 72256
-0x5c 0xa5 0x00 0x40 # CHECK: bltzc $5, 256
-0x58 0xa5 0x00 0x40 # CHECK: bgezc $5, 256
-0x1c 0x02 0x01 0x4d # CHECK: bgtzalc $2,
-0x58 0x05 0x00 0x40 # CHECK: blezc $5, 256
-0x1c 0x42 0x01 0x4d # CHECK: bltzalc $2,
-0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256
-0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
-0x18 0x02 0x01 0x4d # CHECK: blezalc $2,
-0x5c 0xa6 0x00 0x40 # CHECK: bltc $5, $6, 256
-0x1c 0xa6 0x00 0x40 # CHECK: bltuc $5, $6, 256
-0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
-0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4
-0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4
-0x20 0x00 0x00 0x01 # CHECK: bovc $zero, $zero, 4
-0x20 0x40 0x00 0x01 # CHECK: bovc $2, $zero, 4
-0x20 0x82 0x00 0x01 # CHECK: bovc $4, $2, 4
-0x46 0x84 0x18 0x80 # CHECK: cmp.af.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x80 # CHECK: cmp.af.d $f2, $f3, $f4
-0x46 0x84 0x18 0x81 # CHECK: cmp.un.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4
-0x46 0x84 0x18 0x82 # CHECK: cmp.eq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x83 # CHECK: cmp.ueq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x84 # CHECK: cmp.lt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x84 # CHECK: cmp.lt.d $f2, $f3, $f4
-0x46 0x84 0x18 0x85 # CHECK: cmp.ult.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4
-0x46 0x84 0x18 0x86 # CHECK: cmp.le.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x86 # CHECK: cmp.le.d $f2, $f3, $f4
-0x46 0x84 0x18 0x87 # CHECK: cmp.ule.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4
-0x46 0x84 0x18 0x88 # CHECK: cmp.saf.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x88 # CHECK: cmp.saf.d $f2, $f3, $f4
-0x46 0x84 0x18 0x89 # CHECK: cmp.sun.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x89 # CHECK: cmp.sun.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8a # CHECK: cmp.seq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8b # CHECK: cmp.sueq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8b # CHECK: cmp.sueq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8c # CHECK: cmp.slt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8c # CHECK: cmp.slt.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8d # CHECK: cmp.sult.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8e # CHECK: cmp.sle.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8f # CHECK: cmp.sule.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8f # CHECK: cmp.sule.d $f2, $f3, $f4
-0x7c 0x43 0x23 0x64 # CHECK: dalign $4, $2, $3, 5
-0x74 0x62 0x12 0x34 # CHECK: daui $3, $2, 4660
-0x04 0x66 0x56 0x78 # CHECK: dahi $3, 22136
-0x04 0x7e 0xab 0xcd # CHECK: dati $3, -21555
-0x7c 0x02 0x20 0x24 # CHECK: dbitswap $4, $2
-0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4
-0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4
-# 0xf8 0x05 0x01 0x00 # CHECK-TODO: jialc $5, 256
-# 0xd8 0x05 0x01 0x00 # CHECK-TODO: jic $5, 256
-0xec 0x48 0x00 0x43 # CHECK: lwpc $2, 268
-0xec 0x50 0x00 0x43 # CHECK: lwupc $2, 268
-0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4
-0x00 0x64 0x10 0xdb # CHECK: modu $2, $3, $4
-0x00 0x64 0x10 0x9e # CHECK: ddiv $2, $3, $4
-0x00 0x64 0x10 0x9f # CHECK: ddivu $2, $3, $4
-0x00 0x64 0x10 0xde # CHECK: dmod $2, $3, $4
-0x00 0x64 0x10 0xdf # CHECK: dmodu $2, $3, $4
-0x00 0x64 0x10 0x98 # CHECK: mul $2, $3, $4
-0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4
-0x00 0x64 0x10 0x99 # CHECK: mulu $2, $3, $4
-0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4
-0x00 0x64 0x10 0x9c # CHECK: dmul $2, $3, $4
-0x00 0x64 0x10 0xdc # CHECK: dmuh $2, $3, $4
-0x00 0x64 0x10 0x9d # CHECK: dmulu $2, $3, $4
-0x00 0x64 0x10 0xdd # CHECK: dmuhu $2, $3, $4
-0x46 0x04 0x18 0x98 # CHECK: maddf.s $f2, $f3, $f4
-0x46 0x24 0x18 0x98 # CHECK: maddf.d $f2, $f3, $f4
-0x46 0x04 0x18 0x99 # CHECK: msubf.s $f2, $f3, $f4
-0x46 0x24 0x18 0x99 # CHECK: msubf.d $f2, $f3, $f4
-0x46 0x22 0x08 0x10 # CHECK: sel.d $f0, $f1, $f2
-0x46 0x02 0x08 0x10 # CHECK: sel.s $f0, $f1, $f2
-0x00 0x64 0x10 0x35 # CHECK: seleqz $2, $3, $4
-0x00 0x64 0x10 0x37 # CHECK: selnez $2, $3, $4
-0x46 0x04 0x10 0x1d # CHECK: max.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1d # CHECK: max.d $f0, $f2, $f4
-0x46 0x04 0x10 0x1c # CHECK: min.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1c # CHECK: min.d $f0, $f2, $f4
-0x46 0x04 0x10 0x1f # CHECK: maxa.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1f # CHECK: maxa.d $f0, $f2, $f4
-0x46 0x04 0x10 0x1e # CHECK: mina.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1e # CHECK: mina.d $f0, $f2, $f4
-0x46 0x04 0x10 0x14 # CHECK: seleqz.s $f0, $f2, $f4
-0x46 0x24 0x10 0x14 # CHECK: seleqz.d $f0, $f2, $f4
-0x46 0x04 0x10 0x17 # CHECK: selnez.s $f0, $f2, $f4
-0x46 0x24 0x10 0x17 # CHECK: selnez.d $f0, $f2, $f4
-0x46 0x00 0x20 0x9a # CHECK: rint.s $f2, $f4
-0x46 0x20 0x20 0x9a # CHECK: rint.d $f2, $f4
-0x46 0x00 0x20 0x9b # CHECK: class.s $f2, $f4
-0x46 0x20 0x20 0x9b # CHECK: class.d $f2, $f4
-0xec 0x58 0x3c 0x48 # CHECK: ldpc $2, 123456
-0x00 0x80 0x04 0x09 # CHECK: jr.hb $4
-0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
-0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
-0x7e 0x42 0xb3 0xb6 # CHECK: ll $2, -153($18)
-0x7f 0xe0 0x38 0x37 # CHECK: lld $zero, 112($ra)
-0x7e 0x6f 0xec 0x26 # CHECK: sc $15, -40($19)
-0x7f 0xaf 0xe6 0xa7 # CHECK: scd $15, -51($sp)
-0x00 0xa0 0x58 0x51 # CHECK: clo $11, $5
-0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
-0x00 0xc0 0x90 0x53 # CHECK: dclo $18, $6
-0x03 0x20 0x80 0x52 # CHECK: dclz $16, $25
diff --git a/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt b/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt
index 4afd9cc..157e335 100644
--- a/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt
+++ b/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt
@@ -92,8 +92,10 @@
 0x9a 0x10 0x64 0x00 # CHECK: div $2, $3, $4
 0x9b 0x10 0x64 0x00 # CHECK: divu $2, $3, $4
 0xd5 0x10 0x64 0x00 # CHECK: dlsa $2, $3, $4, 3
+0x00 0x50 0x38 0x40 # CHECK: dmfc0 $24, $10, 0
 0xde 0x10 0x64 0x00 # CHECK: dmod $2, $3, $4
 0xdf 0x10 0x64 0x00 # CHECK: dmodu $2, $3, $4
+0x00 0x50 0xa4 0x40 # CHECK: dmtc0 $4, $10, 0
 0xdc 0x10 0x64 0x00 # CHECK: dmuh $2, $3, $4
 0xdd 0x10 0x64 0x00 # CHECK: dmuhu $2, $3, $4
 0x9c 0x10 0x64 0x00 # CHECK: dmul $2, $3, $4
@@ -119,12 +121,14 @@
 0x1d 0x10 0x04 0x46 # CHECK: max.s $f0, $f2, $f4
 0x1f 0x10 0x24 0x46 # CHECK: maxa.d $f0, $f2, $f4
 0x1f 0x10 0x04 0x46 # CHECK: maxa.s $f0, $f2, $f4
+0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
 0x1c 0x10 0x24 0x46 # CHECK: min.d $f0, $f2, $f4
 0x1c 0x10 0x04 0x46 # CHECK: min.s $f0, $f2, $f4
 0x1e 0x10 0x24 0x46 # CHECK: mina.d $f0, $f2, $f4
 0x1e 0x10 0x04 0x46 # CHECK: mina.s $f0, $f2, $f4
 0xda 0x10 0x64 0x00 # CHECK: mod $2, $3, $4
 0xdb 0x10 0x64 0x00 # CHECK: modu $2, $3, $4
+0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
 0x99 0x18 0x24 0x46 # CHECK: msubf.d $f2, $f3, $f4
 0x99 0x18 0x04 0x46 # CHECK: msubf.s $f2, $f3, $f4
 0xd8 0x10 0x64 0x00 # CHECK: muh $2, $3, $4
diff --git a/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt b/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt
index c41ba99..45379d9 100644
--- a/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt
+++ b/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt
@@ -1,168 +1,196 @@
 # RUN: llvm-mc %s -disassemble -triple=mips-unknown-linux -mcpu=mips64r6 | FileCheck %s
+0x00 0x00 0x00 0x0e # CHECK: sdbbp
+0x00 0x00 0x00 0x0f # CHECK: sync
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x4f # CHECK: sync 1
+0x00 0x00 0x08 0x8e # CHECK: sdbbp 34
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
+0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x64 0x10 0x35 # CHECK: seleqz $2, $3, $4
+0x00 0x64 0x10 0x37 # CHECK: selnez $2, $3, $4
+0x00 0x64 0x10 0x98 # CHECK: mul $2, $3, $4
+0x00 0x64 0x10 0x99 # CHECK: mulu $2, $3, $4
+0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4
+0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4
+0x00 0x64 0x10 0x9c # CHECK: dmul $2, $3, $4
+0x00 0x64 0x10 0x9d # CHECK: dmulu $2, $3, $4
+0x00 0x64 0x10 0x9e # CHECK: ddiv $2, $3, $4
+0x00 0x64 0x10 0x9f # CHECK: ddivu $2, $3, $4
+0x00 0x64 0x10 0xc5 # CHECK: lsa $2, $3, $4, 3
+0x00 0x64 0x10 0xd5 # CHECK: dlsa $2, $3, $4, 3
+0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4
+0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4
+0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4
+0x00 0x64 0x10 0xdb # CHECK: modu $2, $3, $4
+0x00 0x64 0x10 0xdc # CHECK: dmuh $2, $3, $4
+0x00 0x64 0x10 0xdd # CHECK: dmuhu $2, $3, $4
+0x00 0x64 0x10 0xde # CHECK: dmod $2, $3, $4
+0x00 0x64 0x10 0xdf # CHECK: dmodu $2, $3, $4
+0x00 0x80 0x04 0x09 # CHECK: jr.hb $4
+0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
+0x00 0xa0 0x58 0x51 # CHECK: clo $11, $5
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xc0 0x90 0x53 # CHECK: dclo $18, $6
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
+0x03 0x20 0x80 0x52 # CHECK: dclz $16, $25
+0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x66 0x56 0x78 # CHECK: dahi $3, 22136
+0x04 0x7e 0xab 0xcd # CHECK: dati $3, -21555
+# FIXME: The encode/decode functions are not inverses of each other.
+0x18 0x02 0x01 0x4d # CHECK: blezalc $2, 1332
+# FIXME: The encode/decode functions are not inverses of each other.
+0x18 0x42 0x01 0x4d # CHECK: bgezalc $2, 1332
+0x18 0x43 0x00 0x40 # CHECK: bgeuc $2, $3, 256
+# FIXME: The encode/decode functions are not inverses of each other.
+0x1c 0x02 0x01 0x4d # CHECK: bgtzalc $2, 1332
+# FIXME: The encode/decode functions are not inverses of each other.
+0x1c 0x42 0x01 0x4d # CHECK: bltzalc $2, 1332
+0x1c 0xa6 0x00 0x40 # CHECK: bltuc $5, $6, 256
+0x20 0x00 0x00 0x01 # CHECK: bovc $zero, $zero, 4
+# FIXME: The encode/decode functions are not inverses of each other.
+0x20 0x02 0x01 0x4d # CHECK: beqzalc $2, 1332
+0x20 0x40 0x00 0x01 # CHECK: bovc $2, $zero, 4
+0x20 0x82 0x00 0x01 # CHECK: bovc $4, $2, 4
+0x20 0xa6 0x00 0x40 # CHECK: beqc $5, $6, 256
 0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
-0xec 0x80 0x00 0x19 # CHECK: addiupc $4, 100
-0x7c 0x43 0x22 0xa0 # CHECK: align $4, $2, $3, 2
-0xec 0x7f 0x00 0x38 # CHECK: aluipc $3, 56
 0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
 0x3c 0x62 0xff 0xe9 # CHECK: aui $3, $2, -23
-0xec 0x7e 0xff 0xff # CHECK: auipc $3, -1
-0x04 0x11 0x14 0x9b # CHECK: bal 21104
-0xe8 0x37 0x96 0xb8 # CHECK: balc 14572256
-0xc8 0x37 0x96 0xb8 # CHECK: bc 14572256
+0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x03 # CHECK: mfc0 $8, $16, 3
+0x40 0x38 0x50 0x00 # CHECK: dmfc0 $24, $10, 0
+0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
+0x40 0xa4 0x50 0x00 # CHECK: dmtc0 $4, $10, 0
+0x41 0x60 0x60 0x00 # CHECK: di
+0x41 0x60 0x60 0x20 # CHECK: ei
+0x41 0x6e 0x60 0x20 # CHECK: ei $14
+0x41 0x7e 0x60 0x00 # CHECK: di $fp
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 4 but the disassembler currently emits 8
 0x45 0x20 0x00 0x01 # CHECK: bc1eqz $f0, 8
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 4 but the disassembler currently emits 8
 0x45 0x3f 0x00 0x01 # CHECK: bc1eqz $f31, 8
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 4 but the disassembler currently emits 8
 0x45 0xa0 0x00 0x01 # CHECK: bc1nez $f0, 8
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 4 but the disassembler currently emits 8
 0x45 0xbf 0x00 0x01 # CHECK: bc1nez $f31, 8
+0x46 0x00 0x20 0x9a # CHECK: rint.s $f2, $f4
+0x46 0x00 0x20 0x9b # CHECK: class.s $f2, $f4
+0x46 0x02 0x08 0x10 # CHECK: sel.s $f0, $f1, $f2
+0x46 0x04 0x10 0x14 # CHECK: seleqz.s $f0, $f2, $f4
+0x46 0x04 0x10 0x17 # CHECK: selnez.s $f0, $f2, $f4
+0x46 0x04 0x10 0x1c # CHECK: min.s $f0, $f2, $f4
+0x46 0x04 0x10 0x1d # CHECK: max.s $f0, $f2, $f4
+0x46 0x04 0x10 0x1e # CHECK: mina.s $f0, $f2, $f4
+0x46 0x04 0x10 0x1f # CHECK: maxa.s $f0, $f2, $f4
+0x46 0x04 0x18 0x98 # CHECK: maddf.s $f2, $f3, $f4
+0x46 0x04 0x18 0x99 # CHECK: msubf.s $f2, $f3, $f4
+0x46 0x20 0x20 0x9a # CHECK: rint.d $f2, $f4
+0x46 0x20 0x20 0x9b # CHECK: class.d $f2, $f4
+0x46 0x22 0x08 0x10 # CHECK: sel.d $f0, $f1, $f2
+0x46 0x24 0x10 0x14 # CHECK: seleqz.d $f0, $f2, $f4
+0x46 0x24 0x10 0x17 # CHECK: selnez.d $f0, $f2, $f4
+0x46 0x24 0x10 0x1c # CHECK: min.d $f0, $f2, $f4
+0x46 0x24 0x10 0x1d # CHECK: max.d $f0, $f2, $f4
+0x46 0x24 0x10 0x1e # CHECK: mina.d $f0, $f2, $f4
+0x46 0x24 0x10 0x1f # CHECK: maxa.d $f0, $f2, $f4
+0x46 0x24 0x18 0x98 # CHECK: maddf.d $f2, $f3, $f4
+0x46 0x24 0x18 0x99 # CHECK: msubf.d $f2, $f3, $f4
+0x46 0x84 0x18 0x80 # CHECK: cmp.af.s $f2, $f3, $f4
+0x46 0x84 0x18 0x81 # CHECK: cmp.un.s $f2, $f3, $f4
+0x46 0x84 0x18 0x82 # CHECK: cmp.eq.s $f2, $f3, $f4
+0x46 0x84 0x18 0x83 # CHECK: cmp.ueq.s $f2, $f3, $f4
+0x46 0x84 0x18 0x84 # CHECK: cmp.lt.s $f2, $f3, $f4
+0x46 0x84 0x18 0x85 # CHECK: cmp.ult.s $f2, $f3, $f4
+0x46 0x84 0x18 0x86 # CHECK: cmp.le.s $f2, $f3, $f4
+0x46 0x84 0x18 0x87 # CHECK: cmp.ule.s $f2, $f3, $f4
+0x46 0x84 0x18 0x88 # CHECK: cmp.saf.s $f2, $f3, $f4
+0x46 0x84 0x18 0x89 # CHECK: cmp.sun.s $f2, $f3, $f4
+0x46 0x84 0x18 0x8a # CHECK: cmp.seq.s $f2, $f3, $f4
+0x46 0x84 0x18 0x8b # CHECK: cmp.sueq.s $f2, $f3, $f4
+0x46 0x84 0x18 0x8c # CHECK: cmp.slt.s $f2, $f3, $f4
+0x46 0x84 0x18 0x8d # CHECK: cmp.sult.s $f2, $f3, $f4
+0x46 0x84 0x18 0x8e # CHECK: cmp.sle.s $f2, $f3, $f4
+0x46 0x84 0x18 0x8f # CHECK: cmp.sule.s $f2, $f3, $f4
+0x46 0xa4 0x18 0x80 # CHECK: cmp.af.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x84 # CHECK: cmp.lt.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x86 # CHECK: cmp.le.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x88 # CHECK: cmp.saf.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x89 # CHECK: cmp.sun.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x8b # CHECK: cmp.sueq.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x8c # CHECK: cmp.slt.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4
+0x46 0xa4 0x18 0x8f # CHECK: cmp.sule.d $f2, $f3, $f4
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 8 but the disassembler currently emits 12
 0x49 0x20 0x00 0x02 # CHECK: bc2eqz $0, 12
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 8 but the disassembler currently emits 12
 0x49 0x3f 0x00 0x02 # CHECK: bc2eqz $31, 12
+0x49 0x52 0x34 0xb7 # CHECK: lwc2 $18, -841($6)
+0x49 0x79 0x81 0x30 # CHECK: swc2 $25, 304($16)
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 8 but the disassembler currently emits 12
 0x49 0xa0 0x00 0x02 # CHECK: bc2nez $0, 12
+# FIXME: The encode/decode functions are not inverses of each other.
+#        The immediate should be 8 but the disassembler currently emits 12
 0x49 0xbf 0x00 0x02 # CHECK: bc2nez $31, 12
-0x20 0xa6 0x00 0x40 # CHECK: beqc $5, $6, 256
-0x20 0x02 0x01 0x4d # CHECK: beqzalc $2, 1332
-0xd8 0xa0 0x46 0x90 # CHECK: beqzc $5, 72256
+0x49 0xc8 0x0d 0x43 # CHECK: ldc2 $8, -701($1)
+0x49 0xf4 0x92 0x75 # CHECK: sdc2 $20, 629($18)
+0x58 0x05 0x00 0x40 # CHECK: blezc $5, 256
 0x58 0x43 0x00 0x40 # CHECK: bgec $2, $3, 256
-0x18 0x43 0x00 0x40 # CHECK: bgeuc $2, $3, 256
-0x18 0x42 0x01 0x4d # CHECK: bgezalc $2, 1332
 0x58 0xa5 0x00 0x40 # CHECK: bgezc $5, 256
-0x1c 0x02 0x01 0x4d # CHECK: bgtzalc $2, 1332
 0x5c 0x05 0x00 0x40 # CHECK: bgtzc $5, 256
-0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
-0x18 0x02 0x01 0x4d # CHECK: blezalc $2, 1332
-0x58 0x05 0x00 0x40 # CHECK: blezc $5, 256
-0x5c 0xa6 0x00 0x40 # CHECK: bltc $5, $6, 256
-0x1c 0xa6 0x00 0x40 # CHECK: bltuc $5, $6, 256
-0x1c 0x42 0x01 0x4d # CHECK: bltzalc $2, 1332
 0x5c 0xa5 0x00 0x40 # CHECK: bltzc $5, 256
-0x60 0xa6 0x00 0x40 # CHECK: bnec $5, $6, 256
+0x5c 0xa6 0x00 0x40 # CHECK: bltc $5, $6, 256
+0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
+# FIXME: The encode/decode functions are not inverses of each other.
 0x60 0x02 0x01 0x4d # CHECK: bnezalc $2, 1332
-0xf8 0xa0 0x46 0x90 # CHECK: bnezc $5, 72256
 0x60 0x40 0x00 0x01 # CHECK: bnvc $2, $zero, 4
 0x60 0x82 0x00 0x01 # CHECK: bnvc $4, $2, 4
-0x60 0x00 0x00 0x01 # CHECK: bnvc $zero, $zero, 4
-0x20 0x40 0x00 0x01 # CHECK: bovc $2, $zero, 4
-0x20 0x82 0x00 0x01 # CHECK: bovc $4, $2, 4
-0x20 0x00 0x00 0x01 # CHECK: bovc $zero, $zero, 4
-0x7c 0xa1 0x04 0x25 # CHECK: cache 1, 8($5)
-0x46 0x20 0x20 0x9b # CHECK: class.d $f2, $f4
-0x46 0x00 0x20 0x9b # CHECK: class.s $f2, $f4
-0x00 0xa0 0x58 0x51 # CHECK: clo $11, $5
-0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
-0x46 0xa4 0x18 0x80 # CHECK: cmp.af.d $f2, $f3, $f4
-0x46 0x84 0x18 0x80 # CHECK: cmp.af.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x82 # CHECK: cmp.eq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x82 # CHECK: cmp.eq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x86 # CHECK: cmp.le.d $f2, $f3, $f4
-0x46 0x84 0x18 0x86 # CHECK: cmp.le.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x84 # CHECK: cmp.lt.d $f2, $f3, $f4
-0x46 0x84 0x18 0x84 # CHECK: cmp.lt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x88 # CHECK: cmp.saf.d $f2, $f3, $f4
-0x46 0x84 0x18 0x88 # CHECK: cmp.saf.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8a # CHECK: cmp.seq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8a # CHECK: cmp.seq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8e # CHECK: cmp.sle.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8c # CHECK: cmp.slt.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8c # CHECK: cmp.slt.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8b # CHECK: cmp.sueq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8b # CHECK: cmp.sueq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8f # CHECK: cmp.sule.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8f # CHECK: cmp.sule.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4
-0x46 0x84 0x18 0x8d # CHECK: cmp.sult.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x89 # CHECK: cmp.sun.d $f2, $f3, $f4
-0x46 0x84 0x18 0x89 # CHECK: cmp.sun.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x83 # CHECK: cmp.ueq.d $f2, $f3, $f4
-0x46 0x84 0x18 0x83 # CHECK: cmp.ueq.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x87 # CHECK: cmp.ule.d $f2, $f3, $f4
-0x46 0x84 0x18 0x87 # CHECK: cmp.ule.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x85 # CHECK: cmp.ult.d $f2, $f3, $f4
-0x46 0x84 0x18 0x85 # CHECK: cmp.ult.s $f2, $f3, $f4
-0x46 0xa4 0x18 0x81 # CHECK: cmp.un.d $f2, $f3, $f4
-0x46 0x84 0x18 0x81 # CHECK: cmp.un.s $f2, $f3, $f4
-0x04 0x66 0x56 0x78 # CHECK: dahi $3, 22136
-0x7c 0x43 0x23 0x64 # CHECK: dalign $4, $2, $3, 5
+0x60 0xa6 0x00 0x40 # CHECK: bnec $5, $6, 256
 0x74 0x62 0x12 0x34 # CHECK: daui $3, $2, 4660
+0x7c 0x02 0x20 0x20 # CHECK: bitswap $4, $2
 0x7c 0x02 0x20 0x24 # CHECK: dbitswap $4, $2
-0x00 0xc0 0x90 0x53 # CHECK: dclo $18, $6
-0x03 0x20 0x80 0x52 # CHECK: dclz $16, $25
-0x00 0x64 0x10 0x9e # CHECK: ddiv $2, $3, $4
-0x00 0x64 0x10 0x9f # CHECK: ddivu $2, $3, $4
-0x41 0x60 0x60 0x00 # CHECK: di
-0x41 0x7e 0x60 0x00 # CHECK: di $fp
-0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4
-0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4
-0x00 0x64 0x10 0xd5 # CHECK: dlsa $2, $3, $4, 3
-0x00 0x64 0x10 0xde # CHECK: dmod $2, $3, $4
-0x00 0x64 0x10 0xdf # CHECK: dmodu $2, $3, $4
-0x00 0x64 0x10 0xdc # CHECK: dmuh $2, $3, $4
-0x00 0x64 0x10 0xdd # CHECK: dmuhu $2, $3, $4
-0x00 0x64 0x10 0x9c # CHECK: dmul $2, $3, $4
-0x00 0x64 0x10 0x9d # CHECK: dmulu $2, $3, $4
-0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
-0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
-0x41 0x60 0x60 0x20 # CHECK: ei
-0x41 0x6e 0x60 0x20 # CHECK: ei $14
-0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
-0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
-0xf8 0x05 0x01 0x00 # CHECK: jialc $5, 256
-0xd8 0x05 0x01 0x00 # CHECK: jic $5, 256
-0x00 0x80 0x04 0x09 # CHECK: jr.hb $4
-0x49 0xc8 0x0d 0x43 # CHECK: ldc2 $8, -701($1)
-0xec 0x58 0x3c 0x48 # CHECK: ldpc $2, 123456
+0x7c 0x43 0x22 0xa0 # CHECK: align $4, $2, $3, 2
+0x7c 0x43 0x23 0x64 # CHECK: dalign $4, $2, $3, 5
+0x7c 0xa1 0x04 0x25 # CHECK: cache 1, 8($5)
+0x7c 0xa1 0x04 0x35 # CHECK: pref 1, 8($5)
 0x7e 0x42 0xb3 0xb6 # CHECK: ll $2, -153($18)
+0x7e 0x6f 0xec 0x26 # CHECK: sc $15, -40($19)
+0x7f 0xaf 0xe6 0xa7 # CHECK: scd $15, -51($sp)
 0x7f 0xe0 0x38 0x37 # CHECK: lld $zero, 112($ra)
-0x00 0x64 0x10 0xc5 # CHECK: lsa $2, $3, $4, 3
-0x49 0x52 0x34 0xb7 # CHECK: lwc2 $18, -841($6)
+0xc8 0x37 0x96 0xb8 # CHECK: bc 14572256
+0xd8 0x05 0x01 0x00 # CHECK: jic $5, 256
+0xd8 0xa0 0x46 0x90 # CHECK: beqzc $5, 72256
+0xe8 0x37 0x96 0xb8 # CHECK: balc 14572256
 0xec 0x48 0x00 0x43 # CHECK: lwpc $2, 268
 0xec 0x50 0x00 0x43 # CHECK: lwupc $2, 268
-0x46 0x24 0x18 0x98 # CHECK: maddf.d $f2, $f3, $f4
-0x46 0x04 0x18 0x98 # CHECK: maddf.s $f2, $f3, $f4
-0x46 0x24 0x10 0x1d # CHECK: max.d $f0, $f2, $f4
-0x46 0x04 0x10 0x1d # CHECK: max.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1f # CHECK: maxa.d $f0, $f2, $f4
-0x46 0x04 0x10 0x1f # CHECK: maxa.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1c # CHECK: min.d $f0, $f2, $f4
-0x46 0x04 0x10 0x1c # CHECK: min.s $f0, $f2, $f4
-0x46 0x24 0x10 0x1e # CHECK: mina.d $f0, $f2, $f4
-0x46 0x04 0x10 0x1e # CHECK: mina.s $f0, $f2, $f4
-0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4
-0x00 0x64 0x10 0xdb # CHECK: modu $2, $3, $4
-0x46 0x24 0x18 0x99 # CHECK: msubf.d $f2, $f3, $f4
-0x46 0x04 0x18 0x99 # CHECK: msubf.s $f2, $f3, $f4
-0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4
-0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4
-0x00 0x64 0x10 0x98 # CHECK: mul $2, $3, $4
-0x00 0x64 0x10 0x99 # CHECK: mulu $2, $3, $4
-0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
-0x7c 0xa1 0x04 0x35 # CHECK: pref 1, 8($5)
-0x46 0x20 0x20 0x9a # CHECK: rint.d $f2, $f4
-0x46 0x00 0x20 0x9a # CHECK: rint.s $f2, $f4
-0x7e 0x6f 0xec 0x26 # CHECK: sc $15, -40($19)
-0x7f 0xaf 0xe6 0xa7 # CHECK: scd $15, -51($sp)
-0x00 0x00 0x00 0x0e # CHECK: sdbbp
-0x00 0x00 0x08 0x8e # CHECK: sdbbp 34
-0x49 0xf4 0x92 0x75 # CHECK: sdc2 $20, 629($18)
-0x46 0x22 0x08 0x10 # CHECK: sel.d $f0, $f1, $f2
-0x46 0x02 0x08 0x10 # CHECK: sel.s $f0, $f1, $f2
-0x00 0x64 0x10 0x35 # CHECK: seleqz $2, $3, $4
-0x46 0x24 0x10 0x14 # CHECK: seleqz.d $f0, $f2, $f4
-0x46 0x04 0x10 0x14 # CHECK: seleqz.s $f0, $f2, $f4
-0x00 0x64 0x10 0x37 # CHECK: selnez $2, $3, $4
-0x46 0x24 0x10 0x17 # CHECK: selnez.d $f0, $f2, $f4
-0x46 0x04 0x10 0x17 # CHECK: selnez.s $f0, $f2, $f4
-0x00 0x00 0x00 0x40 # CHECK: ssnop
-0x49 0x79 0x81 0x30 # CHECK: swc2 $25, 304($16)
-0x00 0x00 0x00 0x0f # CHECK: sync
-0x00 0x00 0x00 0x4f # CHECK: sync 1
-0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
-0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
-0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
-0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
-0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
-0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
-0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
-0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
-0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
-0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
-0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
-0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0xec 0x58 0x3c 0x48 # CHECK: ldpc $2, 123456
+0xec 0x7e 0xff 0xff # CHECK: auipc $3, -1
+0xec 0x7f 0x00 0x38 # CHECK: aluipc $3, 56
+0xec 0x80 0x00 0x19 # CHECK: addiupc $4, 100
+0xf8 0x05 0x01 0x00 # CHECK: jialc $5, 256
+0xf8 0xa0 0x46 0x90 # CHECK: bnezc $5, 72256
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
index 0e3a83f..16ff14c 100644
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt
@@ -99,6 +99,12 @@
 # CHECK: vmrglw 2, 3, 4                  
 0x10 0x43 0x21 0x8c
 
+# CHECK: vmrgew 2, 3, 4
+0x10 0x43 0x27 0x8c
+
+# CHECK: vmrgow 2, 3, 4
+0x10 0x43 0x26 0x8c
+
 # CHECK: vspltb 2, 3, 1                  
 0x10 0x41 0x1a 0x0c
 
diff --git a/test/MC/Disassembler/X86/x86-16.txt b/test/MC/Disassembler/X86/x86-16.txt
index c6844cd..021cb23 100644
--- a/test/MC/Disassembler/X86/x86-16.txt
+++ b/test/MC/Disassembler/X86/x86-16.txt
@@ -786,3 +786,5 @@
 # CHECK: lretl
 0x66 0xcb
 
+# CHECK: callw	-1
+0xe8 0xff 0xff
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index 830b830..c51e0a3 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -54,6 +54,9 @@
 # CHECK: calll	-1234
 0xe8 0x2e 0xfb 0xff 0xff
 
+# CHECK: callw	-1
+0x66 0xe8 0xff 0xff
+
 # CHECK: lfence
 0x0f 0xae 0xe8
 
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index 5699f40..065b2a5 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -339,3 +339,9 @@
 # CHECK: vaddps (%rdx,%xmm1), %zmm20, %zmm15
 # FIXME: vaddps (%rdx,%rcx), %zmm20, %zmm15
 0x62 0x71 0x5c 0x40 0x58 0x3c 0x0a
+
+# CHECK: callq 32767
+0xe8 0xff 0x7f 0x00 0x00
+
+# CHECK: callq -32769
+0xe8 0xff 0x7f 0xff 0xff
diff --git a/test/MC/ELF/discriminator.s b/test/MC/ELF/discriminator.s
index 8a695b9..75e4e86 100644
--- a/test/MC/ELF/discriminator.s
+++ b/test/MC/ELF/discriminator.s
@@ -19,12 +19,12 @@ foo:
         .long   .L.debug_abbrev_begin   # Offset Into Abbrev. Section
         .byte   8                       # Address Size (in bytes)
         .byte   1                       # Abbrev [1] 0xb:0x1b DW_TAG_compile_unit
-        .long   .Linfo_string0          # DW_AT_producer
+        .long   info_string0            # DW_AT_producer
         .short  12                      # DW_AT_language
-        .long   .Linfo_string1          # DW_AT_name
+        .long   info_string1            # DW_AT_name
         .quad   0                       # DW_AT_low_pc
         .long   0                       # DW_AT_stmt_list
-        .long   .Linfo_string2          # DW_AT_comp_dir
+        .long   info_string2            # DW_AT_comp_dir
                                         # DW_AT_APPLE_optimized
         .section        .debug_abbrev,"",@progbits
 .L.debug_abbrev_begin:
diff --git a/test/MC/ELF/many-sections-3.s b/test/MC/ELF/many-sections-3.s
new file mode 100644
index 0000000..02d30a6
--- /dev/null
+++ b/test/MC/ELF/many-sections-3.s
@@ -0,0 +1,107 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t
+// RUN: llvm-readobj -t %t | FileCheck --check-prefix=SYMBOLS %s
+// RUN: llvm-nm %t | FileCheck --check-prefix=NM %s
+
+// Test that symbol a has a section that could be confused with common (0xFFF2)
+// SYMBOLS:         Name: a
+// SYMBOLS-NEXT:    Value: 0x0
+// SYMBOLS-NEXT:    Size: 0
+// SYMBOLS-NEXT:    Binding: Local (0x0)
+// SYMBOLS-NEXT:    Type: None (0x0)
+// SYMBOLS-NEXT:    Other: 0
+// SYMBOLS-NEXT:    Section: bar (0xFFF2)
+// SYMBOLS-NEXT:  }
+
+// Test that we don't get confused
+// NM: 0000000000000000 r a
+
+.macro gen_sections4 x
+        .section a\x
+        .section b\x
+        .section c\x
+        .section d\x
+.endm
+
+.macro gen_sections8 x
+        gen_sections4 a\x
+        gen_sections4 b\x
+.endm
+
+.macro gen_sections16 x
+        gen_sections8 a\x
+        gen_sections8 b\x
+.endm
+
+.macro gen_sections32 x
+        gen_sections16 a\x
+        gen_sections16 b\x
+.endm
+
+.macro gen_sections64 x
+        gen_sections32 a\x
+        gen_sections32 b\x
+.endm
+
+.macro gen_sections128 x
+        gen_sections64 a\x
+        gen_sections64 b\x
+.endm
+
+.macro gen_sections256 x
+        gen_sections128 a\x
+        gen_sections128 b\x
+.endm
+
+.macro gen_sections512 x
+        gen_sections256 a\x
+        gen_sections256 b\x
+.endm
+
+.macro gen_sections1024 x
+        gen_sections512 a\x
+        gen_sections512 b\x
+.endm
+
+.macro gen_sections2048 x
+        gen_sections1024 a\x
+        gen_sections1024 b\x
+.endm
+
+.macro gen_sections4096 x
+        gen_sections2048 a\x
+        gen_sections2048 b\x
+.endm
+
+.macro gen_sections8192 x
+        gen_sections4096 a\x
+        gen_sections4096 b\x
+.endm
+
+.macro gen_sections16384 x
+        gen_sections8192 a\x
+        gen_sections8192 b\x
+.endm
+
+.macro gen_sections32768 x
+        gen_sections16384 a\x
+        gen_sections16384 b\x
+.endm
+
+gen_sections32768 a
+gen_sections16384 b
+gen_sections8192 c
+gen_sections4096 d
+gen_sections2048 e
+gen_sections1024 f
+gen_sections512 g
+gen_sections256 h
+gen_sections128 i
+gen_sections64 j
+gen_sections32 k
+gen_sections8 l
+gen_sections4 m
+
+        .section foo
+        .section bar, "a"
+
+a:
diff --git a/test/MC/ELF/relax-arith.s b/test/MC/ELF/relax-arith.s
index b814556..d4f37a9 100644
--- a/test/MC/ELF/relax-arith.s
+++ b/test/MC/ELF/relax-arith.s
@@ -1,17 +1,18 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-objdump -d - | FileCheck  %s
 
 // Test that we correctly relax these instructions into versions that use
 // 16 or 32 bit immediate values.
 
 bar:
-// CHECK:      Name: imul
-// CHECK:      SectionData (
-// CHECK-NEXT:   0000: 6669DB00 0066691C 25000000 00000069
-// CHECK-NEXT:   0010: DB000000 00691C25 00000000 00000000
-// CHECK-NEXT:   0020: 4869DB00 00000048 691C2500 00000000
-// CHECK-NEXT:   0030: 000000
-// CHECK-NEXT: )
-        .section imul
+// CHECK:      Disassembly of section imul:
+// CHECK-NEXT: imul:
+// CHECK-NEXT:   0: 66 69 db 00 00                       imulw $0, %bx, %bx
+// CHECK-NEXT:   5: 66 69 1c 25 00 00 00 00 00 00        imulw $0, 0, %bx
+// CHECK-NEXT:   f: 69 db 00 00 00 00                    imull $0, %ebx, %ebx
+// CHECK-NEXT:  15: 69 1c 25 00 00 00 00 00 00 00 00     imull $0, 0, %ebx
+// CHECK-NEXT:  20: 48 69 db 00 00 00 00                 imulq $0, %rbx, %rbx
+// CHECK-NEXT:  27: 48 69 1c 25 00 00 00 00 00 00 00 00  imulq $0, 0, %rbx
+        .section imul,"x"
         imul $foo, %bx,  %bx
         imul $foo, bar,  %bx
         imul $foo, %ebx, %ebx
@@ -19,15 +20,15 @@ bar:
         imul $foo, %rbx, %rbx
         imul $foo, bar,  %rbx
 
-
-// CHECK:      Name: and
-// CHECK:      SectionData (
-// CHECK-NEXT:   0000: 6681E300 00668124 25000000 00000081
-// CHECK-NEXT:   0010: E3000000 00812425 00000000 00000000
-// CHECK-NEXT:   0020: 4881E300 00000048 81242500 00000000
-// CHECK-NEXT:   0030: 000000
-// CHECK-NEXT: )
-        .section and
+// CHECK:      Disassembly of section and:
+// CHECK-NEXT: and:
+// CHECK-NEXT:   0: 66 81 e3 00 00                       andw $0, %bx
+// CHECK-NEXT:   5: 66 81 24 25 00 00 00 00 00 00        andw $0, 0
+// CHECK-NEXT:   f: 81 e3 00 00 00 00                    andl $0, %ebx
+// CHECK-NEXT:  15: 81 24 25 00 00 00 00 00 00 00 00     andl $0, 0
+// CHECK-NEXT:  20: 48 81 e3 00 00 00 00                 andq $0, %rbx
+// CHECK-NEXT:  27: 48 81 24 25 00 00 00 00 00 00 00 00  andq $0, 0
+        .section and,"x"
         and  $foo, %bx
         andw $foo, bar
         and  $foo, %ebx
@@ -35,14 +36,15 @@ bar:
         and  $foo, %rbx
         andq $foo, bar
 
-// CHECK:      Name: or
-// CHECK:      SectionData (
-// CHECK-NEXT:   0000: 6681CB00 0066810C 25000000 00000081
-// CHECK-NEXT:   0010: CB000000 00810C25 00000000 00000000
-// CHECK-NEXT:   0020: 4881CB00 00000048 810C2500 00000000
-// CHECK-NEXT:   0030: 000000
-// CHECK-NEXT: )
-        .section or
+// CHECK:      Disassembly of section or:
+// CHECK-NEXT: or:
+// CHECK-NEXT:   0: 66 81 cb 00 00                       orw $0, %bx
+// CHECK-NEXT:   5: 66 81 0c 25 00 00 00 00 00 00        orw $0, 0
+// CHECK-NEXT:   f: 81 cb 00 00 00 00                    orl $0, %ebx
+// CHECK-NEXT:  15: 81 0c 25 00 00 00 00 00 00 00 00     orl $0, 0
+// CHECK-NEXT:  20: 48 81 cb 00 00 00 00                 orq $0, %rbx
+// CHECK-NEXT:  27: 48 81 0c 25 00 00 00 00 00 00 00 00  orq $0, 0
+        .section or,"x"
         or  $foo, %bx
         orw $foo, bar
         or  $foo, %ebx
@@ -50,14 +52,15 @@ bar:
         or  $foo, %rbx
         orq $foo, bar
 
-// CHECK:      Name: xor
-// CHECK:      SectionData (
-// CHECK-NEXT:   0000: 6681F300 00668134 25000000 00000081
-// CHECK-NEXT:   0010: F3000000 00813425 00000000 00000000
-// CHECK-NEXT:   0020: 4881F300 00000048 81342500 00000000
-// CHECK-NEXT:   0030: 000000
-// CHECK-NEXT: )
-        .section xor
+// CHECK:      Disassembly of section xor:
+// CHECK-NEXT: xor:
+// CHECK-NEXT:   0: 66 81 f3 00 00                       xorw $0, %bx
+// CHECK-NEXT:   5: 66 81 34 25 00 00 00 00 00 00        xorw $0, 0
+// CHECK-NEXT:   f: 81 f3 00 00 00 00                    xorl $0, %ebx
+// CHECK-NEXT:  15: 81 34 25 00 00 00 00 00 00 00 00     xorl $0, 0
+// CHECK-NEXT:  20: 48 81 f3 00 00 00 00                 xorq $0, %rbx
+// CHECK-NEXT:  27: 48 81 34 25 00 00 00 00 00 00 00 00  xorq $0, 0
+        .section xor,"x"
         xor  $foo, %bx
         xorw $foo, bar
         xor  $foo, %ebx
@@ -65,14 +68,15 @@ bar:
         xor  $foo, %rbx
         xorq $foo, bar
 
-// CHECK:      Name: add
-// CHECK:      SectionData (
-// CHECK-NEXT:   0000: 6681C300 00668104 25000000 00000081
-// CHECK-NEXT:   0010: C3000000 00810425 00000000 00000000
-// CHECK-NEXT:   0020: 4881C300 00000048 81042500 00000000
-// CHECK-NEXT:   0030: 000000
-// CHECK-NEXT: )
-        .section add
+// CHECK:      Disassembly of section add:
+// CHECK-NEXT: add:
+// CHECK-NEXT:   0: 66 81 c3 00 00                       addw $0, %bx
+// CHECK-NEXT:   5: 66 81 04 25 00 00 00 00 00 00        addw $0, 0
+// CHECK-NEXT:   f: 81 c3 00 00 00 00                    addl $0, %ebx
+// CHECK-NEXT:  15: 81 04 25 00 00 00 00 00 00 00 00     addl $0, 0
+// CHECK-NEXT:  20: 48 81 c3 00 00 00 00                 addq $0, %rbx
+// CHECK-NEXT:  27: 48 81 04 25 00 00 00 00 00 00 00 00  addq $0, 0
+        .section add,"x"
         add  $foo, %bx
         addw $foo, bar
         add  $foo, %ebx
@@ -80,14 +84,15 @@ bar:
         add  $foo, %rbx
         addq $foo, bar
 
-// CHECK:      Name: sub
-// CHECK:      SectionData (
-// CHECK-NEXT:   000: 6681EB00 0066812C 25000000 00000081
-// CHECK-NEXT:   010: EB000000 00812C25 00000000 00000000
-// CHECK-NEXT:   020: 4881EB00 00000048 812C2500 00000000
-// CHECK-NEXT:   030: 000000
-// CHECK-NEXT: )
-        .section sub
+// CHECK:      Disassembly of section sub:
+// CHECK-NEXT: sub:
+// CHECK-NEXT:   0: 66 81 eb 00 00                       subw $0, %bx
+// CHECK-NEXT:   5: 66 81 2c 25 00 00 00 00 00 00        subw $0, 0
+// CHECK-NEXT:   f: 81 eb 00 00 00 00                    subl $0, %ebx
+// CHECK-NEXT:  15: 81 2c 25 00 00 00 00 00 00 00 00     subl $0, 0
+// CHECK-NEXT:  20: 48 81 eb 00 00 00 00                 subq $0, %rbx
+// CHECK-NEXT:  27: 48 81 2c 25 00 00 00 00 00 00 00 00  subq $0, 0
+        .section sub,"x"
         sub  $foo, %bx
         subw $foo, bar
         sub  $foo, %ebx
@@ -95,14 +100,15 @@ bar:
         sub  $foo, %rbx
         subq $foo, bar
 
-// CHECK:      Name: cmp
-// CHECK:      SectionData (
-// CHECK-NEXT:   0000: 6681FB00 0066813C 25000000 00000081
-// CHECK-NEXT:   0010: FB000000 00813C25 00000000 00000000
-// CHECK-NEXT:   0020: 4881FB00 00000048 813C2500 00000000
-// CHECK-NEXT:   0030: 000000
-// CHECK-NEXT: )
-        .section cmp
+// CHECK:      Disassembly of section cmp:
+// CHECK-NEXT: cmp:
+// CHECK-NEXT:   0: 66 81 fb 00 00                       cmpw $0, %bx
+// CHECK-NEXT:   5: 66 81 3c 25 00 00 00 00 00 00        cmpw $0, 0
+// CHECK-NEXT:   f: 81 fb 00 00 00 00                    cmpl $0, %ebx
+// CHECK-NEXT:  15: 81 3c 25 00 00 00 00 00 00 00 00     cmpl $0, 0
+// CHECK-NEXT:  20: 48 81 fb 00 00 00 00                 cmpq $0, %rbx
+// CHECK-NEXT:  27: 48 81 3c 25 00 00 00 00 00 00 00 00  cmpq $0, 0
+        .section cmp,"x"
         cmp  $foo, %bx
         cmpw $foo, bar
         cmp  $foo, %ebx
diff --git a/test/MC/ELF/relax-arith2.s b/test/MC/ELF/relax-arith2.s
new file mode 100644
index 0000000..a6c55ad
--- /dev/null
+++ b/test/MC/ELF/relax-arith2.s
@@ -0,0 +1,118 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-objdump -d - | FileCheck  %s
+
+// Test that we avoid relaxing these instructions and instead generate versions
+// that use 8-bit immediate values.
+
+bar:
+// CHECK:      Disassembly of section imul:
+// CHECK-NEXT: imul:
+// CHECK-NEXT:   0: 66 6b db 80                   imulw $-128, %bx, %bx
+// CHECK-NEXT:   4: 66 6b 1c 25 00 00 00 00 7f    imulw $127, 0, %bx
+// CHECK-NEXT:   d: 6b db 00                      imull $0, %ebx, %ebx
+// CHECK-NEXT:  10: 6b 1c 25 00 00 00 00 01       imull $1, 0, %ebx
+// CHECK-NEXT:  18: 48 6b db ff                   imulq $-1, %rbx, %rbx
+// CHECK-NEXT:  1c: 48 6b 1c 25 00 00 00 00 2a    imulq $42, 0, %rbx
+        .section imul,"x"
+        imul $-128, %bx,  %bx
+        imul $127, bar,  %bx
+        imul $0, %ebx, %ebx
+        imul $1, bar,  %ebx
+        imul $-1, %rbx, %rbx
+        imul $42, bar,  %rbx
+
+
+// CHECK:      Disassembly of section and:
+// CHECK-NEXT: and:
+// CHECK-NEXT:   0: 66 83 e3 7f                   andw $127, %bx
+// CHECK-NEXT:   4: 66 83 24 25 00 00 00 00 00    andw $0, 0
+// CHECK-NEXT:   d: 83 e3 01                      andl $1, %ebx
+// CHECK-NEXT:  10: 83 24 25 00 00 00 00 ff       andl $-1, 0
+// CHECK-NEXT:  18: 48 83 e3 2a                   andq $42, %rbx
+// CHECK-NEXT:  1c: 48 83 24 25 00 00 00 00 80    andq $-128, 0
+        .section and,"x"
+        and  $127, %bx
+        andw $0, bar
+        and  $1, %ebx
+        andl $-1, bar
+        and  $42, %rbx
+        andq $-128, bar
+
+// CHECK:      Disassembly of section or:
+// CHECK-NEXT: or:
+// CHECK-NEXT:   0: 66 83 cb 00                   orw $0, %bx
+// CHECK-NEXT:   4: 66 83 0c 25 00 00 00 00 01    orw $1, 0
+// CHECK-NEXT:   d: 83 cb ff                      orl $-1, %ebx
+// CHECK-NEXT:  10: 83 0c 25 00 00 00 00 2a       orl $42, 0
+// CHECK-NEXT:  18: 48 83 cb 80                   orq $-128, %rbx
+// CHECK-NEXT:  1c: 48 83 0c 25 00 00 00 00 7f    orq $127, 0
+        .section or,"x"
+        or  $0, %bx
+        orw $1, bar
+        or  $-1, %ebx
+        orl $42, bar
+        or  $-128, %rbx
+        orq $127, bar
+
+// CHECK:      Disassembly of section xor:
+// CHECK-NEXT: xor:
+// CHECK-NEXT:   0: 66 83 f3 01                   xorw $1, %bx
+// CHECK-NEXT:   4: 66 83 34 25 00 00 00 00 ff    xorw $-1, 0
+// CHECK-NEXT:   d: 83 f3 2a                      xorl $42, %ebx
+// CHECK-NEXT:  10: 83 34 25 00 00 00 00 80       xorl $-128, 0
+// CHECK-NEXT:  18: 48 83 f3 7f                   xorq $127, %rbx
+// CHECK-NEXT:  1c: 48 83 34 25 00 00 00 00 00    xorq $0, 0
+        .section xor,"x"
+        xor  $1, %bx
+        xorw $-1, bar
+        xor  $42, %ebx
+        xorl $-128, bar
+        xor  $127, %rbx
+        xorq $0, bar
+
+// CHECK:      Disassembly of section add:
+// CHECK-NEXT: add:
+// CHECK-NEXT:   0: 66 83 c3 ff                   addw $-1, %bx
+// CHECK-NEXT:   4: 66 83 04 25 00 00 00 00 2a    addw $42, 0
+// CHECK-NEXT:   d: 83 c3 80                      addl $-128, %ebx
+// CHECK-NEXT:  10: 83 04 25 00 00 00 00 7f       addl $127, 0
+// CHECK-NEXT:  18: 48 83 c3 00                   addq $0, %rbx
+// CHECK-NEXT:  1c: 48 83 04 25 00 00 00 00 01    addq $1, 0
+        .section add,"x"
+        add  $-1, %bx
+        addw $42, bar
+        add  $-128, %ebx
+        addl $127, bar
+        add  $0, %rbx
+        addq $1, bar
+
+// CHECK:      Disassembly of section sub:
+// CHECK-NEXT: sub:
+// CHECK-NEXT:   0: 66 83 eb 2a                   subw $42, %bx
+// CHECK-NEXT:   4: 66 83 2c 25 00 00 00 00 80    subw $-128, 0
+// CHECK-NEXT:   d: 83 eb 7f                      subl $127, %ebx
+// CHECK-NEXT:  10: 83 2c 25 00 00 00 00 00       subl $0, 0
+// CHECK-NEXT:  18: 48 83 eb 01                   subq $1, %rbx
+// CHECK-NEXT:  1c: 48 83 2c 25 00 00 00 00 ff    subq $-1, 0
+        .section sub,"x"
+        sub  $42, %bx
+        subw $-128, bar
+        sub  $127, %ebx
+        subl $0, bar
+        sub  $1, %rbx
+        subq $-1, bar
+
+// CHECK:      Disassembly of section cmp:
+// CHECK-NEXT: cmp:
+// CHECK-NEXT:   0: 66 83 fb 80                   cmpw $-128, %bx
+// CHECK-NEXT:   4: 66 83 3c 25 00 00 00 00 7f    cmpw $127, 0
+// CHECK-NEXT:   d: 83 fb 00                      cmpl $0, %ebx
+// CHECK-NEXT:  10: 83 3c 25 00 00 00 00 01       cmpl $1, 0
+// CHECK-NEXT:  18: 48 83 fb ff                   cmpq $-1, %rbx
+// CHECK-NEXT:  1c: 48 83 3c 25 00 00 00 00 2a    cmpq $42, 0
+        .section cmp,"x"
+        cmp  $-128, %bx
+        cmpw $127, bar
+        cmp  $0, %ebx
+        cmpl $1, bar
+        cmp  $-1, %rbx
+        cmpq $42, bar
diff --git a/test/MC/ELF/relax-arith3.s b/test/MC/ELF/relax-arith3.s
new file mode 100644
index 0000000..3be8b0e
--- /dev/null
+++ b/test/MC/ELF/relax-arith3.s
@@ -0,0 +1,76 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-objdump -d - | FileCheck  %s
+
+// Test that we correctly relax these instructions into versions that use
+// 16 or 32 bit immediate values.
+
+bar:
+// CHECK:      Disassembly of section imul:
+// CHECK-NEXT: imul:
+// CHECK-NEXT:   0: 66 69 1d 00 00 00 00 00 00        imulw $0, (%rip), %bx
+// CHECK-NEXT:   9: 69 1d 00 00 00 00 00 00 00 00     imull $0, (%rip), %ebx
+// CHECK-NEXT:  13: 48 69 1d 00 00 00 00 00 00 00 00  imulq $0, (%rip), %rbx
+        .section imul,"x"
+        imul $foo, bar(%rip),  %bx
+        imul $foo, bar(%rip),  %ebx
+        imul $foo, bar(%rip),  %rbx
+
+
+// CHECK:      Disassembly of section and:
+// CHECK-NEXT: and:
+// CHECK-NEXT:   0: 66 81 25 00 00 00 00 00 00        andw $0, (%rip)
+// CHECK-NEXT:   9: 81 25 00 00 00 00 00 00 00 00     andl $0, (%rip)
+// CHECK-NEXT:  13: 48 81 25 00 00 00 00 00 00 00 00  andq $0, (%rip)
+        .section and,"x"
+        andw $foo, bar(%rip)
+        andl $foo, bar(%rip)
+        andq $foo, bar(%rip)
+
+// CHECK:      Disassembly of section or:
+// CHECK-NEXT: or:
+// CHECK-NEXT:   0: 66 81 0d 00 00 00 00 00 00        orw $0, (%rip)
+// CHECK-NEXT:   9: 81 0d 00 00 00 00 00 00 00 00     orl $0, (%rip)
+// CHECK-NEXT:  13: 48 81 0d 00 00 00 00 00 00 00 00  orq $0, (%rip)
+        .section or,"x"
+        orw $foo, bar(%rip)
+        orl $foo, bar(%rip)
+        orq $foo, bar(%rip)
+
+// CHECK:      Disassembly of section xor:
+// CHECK-NEXT: xor:
+// CHECK-NEXT:   0: 66 81 35 00 00 00 00 00 00        xorw $0, (%rip)
+// CHECK-NEXT:   9: 81 35 00 00 00 00 00 00 00 00     xorl $0, (%rip)
+// CHECK-NEXT:  13: 48 81 35 00 00 00 00 00 00 00 00  xorq $0, (%rip)
+        .section xor,"x"
+        xorw $foo, bar(%rip)
+        xorl $foo, bar(%rip)
+        xorq $foo, bar(%rip)
+
+// CHECK:      Disassembly of section add:
+// CHECK-NEXT: add:
+// CHECK-NEXT:   0: 66 81 05 00 00 00 00 00 00        addw $0, (%rip)
+// CHECK-NEXT:   9: 81 05 00 00 00 00 00 00 00 00     addl $0, (%rip)
+// CHECK-NEXT:  13: 48 81 05 00 00 00 00 00 00 00 00  addq $0, (%rip)
+        .section add,"x"
+        addw $foo, bar(%rip)
+        addl $foo, bar(%rip)
+        addq $foo, bar(%rip)
+
+// CHECK:      Disassembly of section sub:
+// CHECK-NEXT: sub:
+// CHECK-NEXT:   0: 66 81 2d 00 00 00 00 00 00        subw $0, (%rip)
+// CHECK-NEXT:   9: 81 2d 00 00 00 00 00 00 00 00     subl $0, (%rip)
+// CHECK-NEXT:  13: 48 81 2d 00 00 00 00 00 00 00 00  subq $0, (%rip)
+        .section sub,"x"
+        subw $foo, bar(%rip)
+        subl $foo, bar(%rip)
+        subq $foo, bar(%rip)
+
+// CHECK:      Disassembly of section cmp:
+// CHECK-NEXT: cmp:
+// CHECK-NEXT:   0: 66 81 3d 00 00 00 00 00 00        cmpw $0, (%rip)
+// CHECK-NEXT:   9: 81 3d 00 00 00 00 00 00 00 00     cmpl $0, (%rip)
+// CHECK-NEXT:  13: 48 81 3d 00 00 00 00 00 00 00 00  cmpq $0, (%rip)
+        .section cmp,"x"
+        cmpw $foo, bar(%rip)
+        cmpl $foo, bar(%rip)
+        cmpq $foo, bar(%rip)
diff --git a/test/MC/ELF/symver-pr23914.s b/test/MC/ELF/symver-pr23914.s
new file mode 100644
index 0000000..e8b4325
--- /dev/null
+++ b/test/MC/ELF/symver-pr23914.s
@@ -0,0 +1,16 @@
+// Regression test for PR23914.
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
+
+defined:
+        .symver defined, aaaaaaaaaaaaaaaaaa@@@AAAAAAAAAAAAA
+
+// CHECK:      Symbol {
+// CHECK:        Name: aaaaaaaaaaaaaaaaaa@@AAAAAAAAAAAAA
+// CHECK-NEXT:   Value: 0x0
+// CHECK-NEXT:   Size: 0
+// CHECK-NEXT:   Binding: Local
+// CHECK-NEXT:   Type: None
+// CHECK-NEXT:   Other: 0
+// CHECK-NEXT:   Section: .text
+// CHECK-NEXT: }
+
diff --git a/test/MC/ELF/undef-temp.s b/test/MC/ELF/undef-temp.s
new file mode 100644
index 0000000..45537a9
--- /dev/null
+++ b/test/MC/ELF/undef-temp.s
@@ -0,0 +1,4 @@
+// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux %s -o - 2>&1 | FileCheck %s
+
+// CHECK: Undefined temporary
+        .long .Lfoo
diff --git a/test/MC/ELF/undef.s b/test/MC/ELF/undef.s
index 9577ea2..47cd85a 100644
--- a/test/MC/ELF/undef.s
+++ b/test/MC/ELF/undef.s
@@ -2,7 +2,6 @@
 
 // Test which symbols should be in the symbol table
 
-        .long	.Lsym1
 .Lsym2:
 .Lsym3:
 .Lsym4 = .Lsym2 - .Lsym3
@@ -42,15 +41,6 @@ test2_b = undef + 1
 // CHECK-NEXT:     Section: .rodata.str1.1
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
-// CHECK-NEXT:     Name: .Lsym1
-// CHECK-NEXT:     Value: 0x0
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Binding: Global
-// CHECK-NEXT:     Type: None
-// CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: Undefined
-// CHECK-NEXT:   }
-// CHECK-NEXT:   Symbol {
 // CHECK-NEXT:     Name: sym6
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
diff --git a/test/MC/ELF/undef2.s b/test/MC/ELF/undef2.s
deleted file mode 100644
index 6aa66c0..0000000
--- a/test/MC/ELF/undef2.s
+++ /dev/null
@@ -1,18 +0,0 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck %s
-
-// Test that this produces an undefined reference to .Lfoo
-
-        je	.Lfoo
-
-// CHECK:       Section {
-// CHECK:         Name: .strtab
-
-// CHECK:       Symbol {
-// CHECK:         Name: .Lfoo
-// CHECK-NEXT:    Value:
-// CHECK-NEXT:    Size:
-// CHECK-NEXT:    Binding: Global
-// CHECK-NEXT:    Type:
-// CHECK-NEXT:    Other:
-// CHECK-NEXT:    Section:
-// CHECK-NEXT:  }
diff --git a/test/MC/MachO/ARM/directive-type-diagnostics.s b/test/MC/MachO/ARM/directive-type-diagnostics.s
new file mode 100644
index 0000000..f5f9b45
--- /dev/null
+++ b/test/MC/MachO/ARM/directive-type-diagnostics.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple arm-apple -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple armeb-apple -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple thumb-apple -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple thumbeb-apple -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+
+        .type symbol 32
+// CHECK: error: expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', '%<type>' or "<type>"
+// CHECK: .type symbol 32
+// CHECK:              ^
+
diff --git a/test/MC/MachO/cstexpr-gotpcrel-64.ll b/test/MC/MachO/cstexpr-gotpcrel-64.ll
index bf15564..bafddcb 100644
--- a/test/MC/MachO/cstexpr-gotpcrel-64.ll
+++ b/test/MC/MachO/cstexpr-gotpcrel-64.ll
@@ -84,3 +84,12 @@ define i32 @t0(i32 %a) {
 define i32** @t1() {
   ret i32** @bargotequiv
 }
+
+; Do not crash when a pattern cannot be matched as a GOT equivalent
+
+@a = external global i8
+@b = internal unnamed_addr constant i8* @a
+
+; X86-LABEL: _c:
+; X86:   .quad _b
+@c = global i8** @b
diff --git a/test/MC/Mips/branch-pseudos.s b/test/MC/Mips/branch-pseudos.s
index e9b151a..d5b06f7 100644
--- a/test/MC/Mips/branch-pseudos.s
+++ b/test/MC/Mips/branch-pseudos.s
@@ -7,41 +7,41 @@ local_label:
   blt $7, $8, local_label
 # CHECK: slt  $1, $7, $8       # encoding: [0x00,0xe8,0x08,0x2a]
 # CHECK: bnez $1, local_label  # encoding: [0x14,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   blt $7, $8, global_label
 # CHECK: slt  $1, $7, $8       # encoding: [0x00,0xe8,0x08,0x2a]
 # CHECK: bnez $1, global_label # encoding: [0x14,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: global_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   blt $7, $0, local_label
 # CHECK: bltz $7, local_label  # encoding: [0x04,0xe0,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   blt $0, $8, local_label
 # CHECK: bgtz $8, local_label  # encoding: [0x1d,0x00,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   blt $0, $0, local_label
 # CHECK: bltz $zero, local_label # encoding: [0x04,0x00,A,A]
-# CHECK:                         #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                         #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
 
   bltu $7, $8, local_label
 # CHECK: sltu $1, $7, $8       # encoding: [0x00,0xe8,0x08,0x2b]
 # CHECK: bnez $1, local_label  # encoding: [0x14,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bltu $7, $8, global_label
 # CHECK: sltu $1, $7, $8       # encoding: [0x00,0xe8,0x08,0x2b]
 # CHECK: bnez $1, global_label # encoding: [0x14,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: global_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bltu $7, $0, local_label
 # CHECK: nop
   bltu $0, $8, local_label
 # CHECK: bnez $8, local_label  # encoding: [0x15,0x00,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bltu $0, $0, local_label
 # CHECK: nop
@@ -49,141 +49,141 @@ local_label:
   ble $7, $8, local_label
 # CHECK: slt  $1, $8, $7       # encoding: [0x01,0x07,0x08,0x2a]
 # CHECK: beqz $1, local_label  # encoding: [0x10,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   ble $7, $8, global_label
 # CHECK: slt  $1, $8, $7       # encoding: [0x01,0x07,0x08,0x2a]
 # CHECK: beqz $1, global_label # encoding: [0x10,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: global_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   ble $7, $0, local_label
 # CHECK: blez $7, local_label  # encoding: [0x18,0xe0,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   ble $0, $8, local_label
 # CHECK: bgez $8, local_label  # encoding: [0x05,0x01,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   ble $0, $0, local_label
 # WARNING: :[[@LINE-1]]:3: warning: branch is always taken
 # CHECK: blez $zero, local_label # encoding: [0x18,0x00,A,A]
-# CHECK:                         #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                         #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
 
   bleu $7, $8, local_label
 # CHECK: sltu $1, $8, $7       # encoding: [0x01,0x07,0x08,0x2b]
 # CHECK: beqz $1, local_label  # encoding: [0x10,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bleu $7, $8, global_label
 # CHECK: sltu $1, $8, $7       # encoding: [0x01,0x07,0x08,0x2b]
 # CHECK: beqz $1, global_label # encoding: [0x10,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: global_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bleu $7, $0, local_label
 # CHECK: beqz $7, local_label  # encoding: [0x10,0xe0,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bleu $0, $8, local_label
 # WARNING: :[[@LINE-1]]:3: warning: branch is always taken
 # CHECK: b  local_label        # encoding: [0x10,0x00,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bleu $0, $0, local_label
 # WARNING: :[[@LINE-1]]:3: warning: branch is always taken
 # CHECK: b  local_label        # encoding: [0x10,0x00,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
 
   bge $7, $8, local_label
 # CHECK: slt  $1, $7, $8       # encoding: [0x00,0xe8,0x08,0x2a]
 # CHECK: beqz $1, local_label  # encoding: [0x10,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bge $7, $8, global_label
 # CHECK: slt  $1, $7, $8       # encoding: [0x00,0xe8,0x08,0x2a]
 # CHECK: beqz $1, global_label # encoding: [0x10,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: global_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bge $7, $0, local_label
 # CHECK: bgez $7, local_label  # encoding: [0x04,0xe1,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bge $0, $8, local_label
 # CHECK: blez $8, local_label  # encoding: [0x19,0x00,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bge $0, $0, local_label
 # WARNING: :[[@LINE-1]]:3: warning: branch is always taken
 # CHECK: bgez $zero, local_label # encoding: [0x04,0x01,A,A]
-# CHECK:                         #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                         #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
 
   bgeu $7, $8, local_label
 # CHECK: sltu $1, $7, $8       # encoding: [0x00,0xe8,0x08,0x2b]
 # CHECK: beqz $1, local_label  # encoding: [0x10,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bgeu $7, $8, global_label
 # CHECK: sltu $1, $7, $8       # encoding: [0x00,0xe8,0x08,0x2b]
 # CHECK: beqz $1, global_label # encoding: [0x10,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: global_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bgeu $7, $0, local_label
 # WARNING: :[[@LINE-1]]:3: warning: branch is always taken
 # CHECK: b  local_label        # encoding: [0x10,0x00,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bgeu $0, $8, local_label
 # CHECK: beqz $8, local_label  # encoding: [0x11,0x00,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bgeu $0, $0, local_label
 # WARNING: :[[@LINE-1]]:3: warning: branch is always taken
 # CHECK: b  local_label        # encoding: [0x10,0x00,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
 
   bgt $7, $8, local_label
 # CHECK: slt  $1, $8, $7       # encoding: [0x01,0x07,0x08,0x2a]
 # CHECK: bnez $1, local_label  # encoding: [0x14,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bgt $7, $8, global_label
 # CHECK: slt  $1, $8, $7       # encoding: [0x01,0x07,0x08,0x2a]
 # CHECK: bnez $1, global_label # encoding: [0x14,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: global_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bgt $7, $0, local_label
 # CHECK: bgtz $7, local_label  # encoding: [0x1c,0xe0,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bgt $0, $8, local_label
 # CHECK: bltz $8, local_label  # encoding: [0x05,0x00,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bgt $0, $0, local_label
 # CHECK: bgtz  $zero, local_label # encoding: [0x1c,0x00,A,A]
-# CHECK:                          #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                          #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
 
   bgtu $7, $8, local_label
 # CHECK: sltu $1, $8, $7       # encoding: [0x01,0x07,0x08,0x2b]
 # CHECK: bnez $1, local_label  # encoding: [0x14,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bgtu $7, $8, global_label
 # CHECK: sltu $1, $8, $7       # encoding: [0x01,0x07,0x08,0x2b]
 # CHECK: bnez $1, global_label # encoding: [0x14,0x20,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: global_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bgtu $7, $0, local_label
 # CHECK: bnez $7, local_label  # encoding: [0x14,0xe0,A,A]
-# CHECK:                       #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                       #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
   bgtu $0, $8, local_label
 # CHECK: nop
   bgtu $0, $0, local_label
 # CHECK: bnez $zero, local_label # encoding: [0x14,0x00,A,A]
-# CHECK:                         #   fixup A - offset: 0, value: local_label, kind: fixup_Mips_PC16
+# CHECK:                         #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
diff --git a/test/MC/Mips/expr1.s b/test/MC/Mips/expr1.s
index 7959315..4af6163 100644
--- a/test/MC/Mips/expr1.s
+++ b/test/MC/Mips/expr1.s
@@ -16,6 +16,11 @@
 # 32R2-EL:                             #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
 # 32R2-EL: lw   $4, %lo(foo+8)($4)     # encoding: [0x08'A',A,0x84,0x8c]
 # 32R2-EL:                             #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
+# 32R2-EL: lw   $4, 10($4)             # encoding: [0x0a,0x00,0x84,0x8c]
+# 32R2-EL: lw   $4, 15($4)             # encoding: [0x0f,0x00,0x84,0x8c]
+# 32R2-EL: lw   $4, 21($4)             # encoding: [0x15,0x00,0x84,0x8c]
+# 32R2-EL: lw   $4, 28($4)             # encoding: [0x1c,0x00,0x84,0x8c]
+# 32R2-EL: lw   $4, 6($4)              # encoding: [0x06,0x00,0x84,0x8c]
 # 32R2-EL: .space  64
 
 # MM-32R2-EL: .text
@@ -30,6 +35,11 @@
 # MM-32R2-EL:                          #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_MICROMIPS_LO16
 # MM-32R2-EL: lw   $4, %lo(foo+8)($4)  # encoding: [0x84'A',0xfc'A',0x08,0x00]
 # MM-32R2-EL:                          #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_MICROMIPS_LO16
+# MM-32R2-EL: lw   $4, 10($4)          # encoding: [0x84,0xfc,0x0a,0x00]
+# MM-32R2-EL: lw   $4, 15($4)          # encoding: [0x84,0xfc,0x0f,0x00]
+# MM-32R2-EL: lw   $4, 21($4)          # encoding: [0x84,0xfc,0x15,0x00]
+# MM-32R2-EL: lw   $4, 28($4)          # encoding: [0x84,0xfc,0x1c,0x00]
+# MM-32R2-EL: lw   $4, 6($4)           # encoding: [0x84,0xfc,0x06,0x00]
 # MM-32R2-EL: .space  64
 
   .globl  foo
@@ -40,5 +50,10 @@ foo:
   lw  $4,%lo (2 * 4) + foo($4)
   lw  $4,%lo((2 * 4) + foo)($4)
   lw  $4,(((%lo ((2 * 4) + foo))))($4)
+  lw  $4, (((1+2)+3)+4)($4)
+  lw  $4, ((((1+2)+3)+4)+5)($4)
+  lw  $4, (((((1+2)+3)+4)+5)+6)($4)
+  lw  $4, ((((((1+2)+3)+4)+5)+6)+7)($4)
+  lw  $4, (%lo((1+2)+65536)+3)($4)
   .space  64
   .end  foo
diff --git a/test/MC/Mips/micromips32r6/invalid.s b/test/MC/Mips/micromips32r6/invalid.s
new file mode 100644
index 0000000..8ba787a
--- /dev/null
+++ b/test/MC/Mips/micromips32r6/invalid.s
@@ -0,0 +1,6 @@
+# RUN: not llvm-mc %s -triple=mips -show-encoding -mcpu=mips32r6 -mattr=micromips 2>%t1
+# RUN: FileCheck %s < %t1
+
+  break 1024               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  break 1023, 1024         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  ei $32                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/micromips32r6/valid.s b/test/MC/Mips/micromips32r6/valid.s
index 94e19f2..a49622a 100644
--- a/test/MC/Mips/micromips32r6/valid.s
+++ b/test/MC/Mips/micromips32r6/valid.s
@@ -20,11 +20,18 @@
   balc 14572256            # CHECK: balc 14572256       # encoding: [0xb4,0x37,0x96,0xb8]
   bc 14572256              # CHECK: bc 14572256         # encoding: [0x94,0x37,0x96,0xb8]
   bitswap $4, $2           # CHECK: bitswap $4, $2      # encoding: [0x00,0x44,0x0b,0x3c]
+  break                    # CHECK: break               # encoding: [0x00,0x00,0x00,0x07]
+  break 7                  # CHECK: break 7             # encoding: [0x00,0x07,0x00,0x07]
+  break 7, 5               # CHECK: break 7, 5          # encoding: [0x00,0x07,0x01,0x47]
   cache 1, 8($5)           # CHECK: cache 1, 8($5)      # encoding: [0x20,0x25,0x60,0x08]
   clo $11, $a1             # CHECK: clo $11, $5         # encoding: [0x01,0x65,0x4b,0x3c]
   clz $sp, $gp             # CHECK: clz $sp, $gp        # encoding: [0x03,0x80,0xe8,0x50]
   div $3, $4, $5           # CHECK: div $3, $4, $5      # encoding: [0x00,0xa4,0x19,0x18]
   divu $3, $4, $5          # CHECK: divu $3, $4, $5     # encoding: [0x00,0xa4,0x19,0x98]
+  ehb                      # CHECK: ehb                 # encoding: [0x00,0x00,0x18,0x00]
+  ei                       # CHECK: ei                  # encoding: [0x00,0x00,0x57,0x7c]
+  ei $0                    # CHECK: ei                  # encoding: [0x00,0x00,0x57,0x7c]
+  ei $10                   # CHECK: ei $10              # encoding: [0x00,0x0a,0x57,0x7c]
   eret                     # CHECK: eret                # encoding: [0x00,0x00,0xf3,0x7c]
   eretnc                   # CHECK: eretnc              # encoding: [0x00,0x01,0xf3,0x7c]
   jialc $5, 256            # CHECK: jialc $5, 256       # encoding: [0x80,0x05,0x01,0x00]
@@ -37,6 +44,7 @@
   muh $3, $4, $5           # CHECK muh $3, $4, $5       # encoding: [0x00,0xa4,0x18,0x58]
   mulu $3, $4, $5          # CHECK mulu $3, $4, $5      # encoding: [0x00,0xa4,0x18,0x98]
   muhu $3, $4, $5          # CHECK muhu $3, $4, $5      # encoding: [0x00,0xa4,0x18,0xd8]
+  nop                      # CHECK: nop                 # encoding: [0x00,0x00,0x00,0x00]
   nor $3, $4, $5           # CHECK: nor $3, $4, $5      # encoding: [0x00,0xa4,0x1a,0xd0]
   or $3, $4, $5            # CHECK: or $3, $4, $5       # encoding: [0x00,0xa4,0x1a,0x90]
   ori $3, $4, 1234         # CHECK: ori $3, $4, 1234    # encoding: [0x50,0x64,0x04,0xd2]
@@ -45,6 +53,7 @@
   seh $3, $4               # CHECK: seh $3, $4          # encoding: [0x00,0x64,0x3b,0x3c]
   seleqz $2,$3,$4          # CHECK: seleqz $2, $3, $4   # encoding: [0x00,0x83,0x11,0x40]
   selnez $2,$3,$4          # CHECK: selnez $2, $3, $4   # encoding: [0x00,0x83,0x11,0x80]
+  sll $4, $3, 7            # CHECK: sll $4, $3, 7       # encoding: [0x00,0x83,0x38,0x00]
   sub $3, $4, $5           # CHECK: sub $3, $4, $5      # encoding: [0x00,0xa4,0x19,0x90]
   subu $3, $4, $5          # CHECK: subu $3, $4, $5     # encoding: [0x00,0xa4,0x19,0xd0]
   xor $3, $4, $5           # CHECK: xor $3, $4, $5      # encoding: [0x00,0xa4,0x1b,0x10]
diff --git a/test/MC/Mips/mips-cop0-reginfo.s b/test/MC/Mips/mips-cop0-reginfo.s
new file mode 100644
index 0000000..0508a37
--- /dev/null
+++ b/test/MC/Mips/mips-cop0-reginfo.s
@@ -0,0 +1,28 @@
+# RUN: llvm-mc -arch=mips -mcpu=mips32r2 -filetype=obj %s -o - | \
+# RUN:   llvm-readobj -sections -section-data - | \
+# RUN:     FileCheck %s -check-prefix=CHECK
+	mfc0	$16, $15, 1
+	mfc0	$16, $16, 1
+
+
+# Checking for the coprocessor 0's register usage was recorded
+# and emitted.
+# CHECK:  Section {
+# CHECK:     Index: 5
+# CHECK:     Name: .reginfo (27)
+# CHECK:     Type: SHT_MIPS_REGINFO (0x70000006)
+# CHECK:     Flags [ (0x2)
+# CHECK:       SHF_ALLOC (0x2)
+# CHECK:     ]
+# CHECK:     Address: 0x0
+# CHECK:     Offset: 0x50
+# CHECK:     Size: 24
+# CHECK:     Link: 0
+# CHECK:     Info: 0
+# CHECK:     AddressAlignment: 4
+# CHECK:     EntrySize: 24
+# CHECK:     SectionData (
+# CHECK:       0000: 00010000 00018000 00000000 00000000  |................|
+# CHECK:       0010: 00000000 00000000                    |........|
+# CHECK:     )
+# CHECK:   }
diff --git a/test/MC/Mips/mips-expansions-bad.s b/test/MC/Mips/mips-expansions-bad.s
index 6e747c3..416cb5f 100644
--- a/test/MC/Mips/mips-expansions-bad.s
+++ b/test/MC/Mips/mips-expansions-bad.s
@@ -18,11 +18,34 @@
   la $5, symbol
   # N64-ONLY: :[[@LINE-1]]:3: warning: instruction loads the 32-bit address of a 64-bit symbol
   # N32-ONLY-NOT: :[[@LINE-2]]:3: warning: instruction loads the 32-bit address of a 64-bit symbol
-  # 64-BIT: lui $5, %hi(symbol)
-  # 64-BIT: ori $5, $5, %lo(symbol)
   dli $5, 1
   # 32-BIT: :[[@LINE-1]]:3: error: instruction requires a 64-bit architecture
   bne $2, 0x100010001, 1332
   # 32-BIT: :[[@LINE-1]]:3: error: instruction requires a 32-bit immediate
   beq $2, 0x100010001, 1332
   # 32-BIT: :[[@LINE-1]]:3: error: instruction requires a 32-bit immediate
+  .set mips32r6
+  ulhu $5, 0
+  # 32-BIT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+  # 64-BIT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
+  .set mips32
+  ulhu $5, 1
+  # 32-BIT-NOT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+  # 64-BIT-NOT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
+  .set mips64r6
+  ulhu $5, 2
+  # 32-BIT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+  # 64-BIT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
+
+  .set mips32r6
+  ulw $5, 0
+  # 32-BIT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+  # 64-BIT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
+  .set mips32
+  ulw $5, 1
+  # 32-BIT-NOT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+  # 64-BIT-NOT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
+  .set mips64r6
+  ulw $5, 2
+  # 32-BIT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+  # 64-BIT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index bae446c..55de6d0 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -1,172 +1,522 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | \
-# RUN:   FileCheck %s
+# RUN:   FileCheck %s --check-prefix=CHECK-LE
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 | \
+# RUN:   FileCheck %s --check-prefix=CHECK-BE
 
 # Check that the IAS expands macro instructions in the same way as GAS.
 
 # Load immediate, done by MipsAsmParser::expandLoadImm():
   li $5, 123
-# CHECK:     ori     $5, $zero, 123   # encoding: [0x7b,0x00,0x05,0x34]
+# CHECK-LE:     ori     $5, $zero, 123   # encoding: [0x7b,0x00,0x05,0x34]
   li $6, -2345
-# CHECK:     addiu   $6, $zero, -2345 # encoding: [0xd7,0xf6,0x06,0x24]
+# CHECK-LE:     addiu   $6, $zero, -2345 # encoding: [0xd7,0xf6,0x06,0x24]
   li $7, 65538
-# CHECK:     lui     $7, 1            # encoding: [0x01,0x00,0x07,0x3c]
-# CHECK:     ori     $7, $7, 2        # encoding: [0x02,0x00,0xe7,0x34]
+# CHECK-LE:     lui     $7, 1            # encoding: [0x01,0x00,0x07,0x3c]
+# CHECK-LE:     ori     $7, $7, 2        # encoding: [0x02,0x00,0xe7,0x34]
   li $8, ~7
-# CHECK:     addiu   $8, $zero, -8    # encoding: [0xf8,0xff,0x08,0x24]
+# CHECK-LE:     addiu   $8, $zero, -8    # encoding: [0xf8,0xff,0x08,0x24]
   li $9, 0x10000
-# CHECK:     lui     $9, 1            # encoding: [0x01,0x00,0x09,0x3c]
-# CHECK-NOT: ori $9, $9, 0            # encoding: [0x00,0x00,0x29,0x35]
+# CHECK-LE:     lui     $9, 1            # encoding: [0x01,0x00,0x09,0x3c]
+# CHECK-LE-NOT: ori $9, $9, 0            # encoding: [0x00,0x00,0x29,0x35]
   li $10, ~(0x101010)
-# CHECK:     lui     $10, 65519       # encoding: [0xef,0xff,0x0a,0x3c]
-# CHECK:     ori     $10, $10, 61423  # encoding: [0xef,0xef,0x4a,0x35]
+# CHECK-LE:     lui     $10, 65519       # encoding: [0xef,0xff,0x0a,0x3c]
+# CHECK-LE:     ori     $10, $10, 61423  # encoding: [0xef,0xef,0x4a,0x35]
 
 # Load address, done by MipsAsmParser::expandLoadAddressReg()
 # and MipsAsmParser::expandLoadAddressImm():
   la $4, 20
-# CHECK: ori     $4, $zero, 20       # encoding: [0x14,0x00,0x04,0x34]
+# CHECK-LE: ori     $4, $zero, 20       # encoding: [0x14,0x00,0x04,0x34]
   la $7, 65538
-# CHECK: lui     $7, 1               # encoding: [0x01,0x00,0x07,0x3c]
-# CHECK: ori     $7, $7, 2           # encoding: [0x02,0x00,0xe7,0x34]
+# CHECK-LE: lui     $7, 1               # encoding: [0x01,0x00,0x07,0x3c]
+# CHECK-LE: ori     $7, $7, 2           # encoding: [0x02,0x00,0xe7,0x34]
   la $4, 20($5)
-# CHECK: ori     $4, $5, 20          # encoding: [0x14,0x00,0xa4,0x34]
+# CHECK-LE: ori     $4, $5, 20          # encoding: [0x14,0x00,0xa4,0x34]
   la $7, 65538($8)
-# CHECK: lui     $7, 1               # encoding: [0x01,0x00,0x07,0x3c]
-# CHECK: ori     $7, $7, 2           # encoding: [0x02,0x00,0xe7,0x34]
-# CHECK: addu    $7, $7, $8          # encoding: [0x21,0x38,0xe8,0x00]
+# CHECK-LE: lui     $7, 1               # encoding: [0x01,0x00,0x07,0x3c]
+# CHECK-LE: ori     $7, $7, 2           # encoding: [0x02,0x00,0xe7,0x34]
+# CHECK-LE: addu    $7, $7, $8          # encoding: [0x21,0x38,0xe8,0x00]
   la $8, 1f
-# CHECK: lui     $8, %hi($tmp0)      # encoding: [A,A,0x08,0x3c]
-# CHECK:                             #   fixup A - offset: 0, value: ($tmp0)@ABS_HI, kind: fixup_Mips_HI16
-# CHECK: ori     $8, $8, %lo($tmp0)  # encoding: [A,A,0x08,0x35]
-# CHECK:                             #   fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind: fixup_Mips_LO16
+# CHECK-LE: lui     $8, %hi($tmp0)      # encoding: [A,A,0x08,0x3c]
+# CHECK-LE:                             #   fixup A - offset: 0, value: ($tmp0)@ABS_HI, kind: fixup_Mips_HI16
+# CHECK-LE: ori     $8, $8, %lo($tmp0)  # encoding: [A,A,0x08,0x35]
+# CHECK-LE:                             #   fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind: fixup_Mips_LO16
   la $8, symbol
-# CHECK: lui     $8, %hi(symbol)     # encoding: [A,A,0x08,0x3c]
-# CHECK:                             #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
-# CHECK: ori     $8, $8, %lo(symbol) # encoding: [A,A,0x08,0x35]
-# CHECK:                             #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK-LE: lui     $8, %hi(symbol)     # encoding: [A,A,0x08,0x3c]
+# CHECK-LE:                             #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK-LE: ori     $8, $8, %lo(symbol) # encoding: [A,A,0x08,0x35]
+# CHECK-LE:                             #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
   la $8, symbol($9)
-# CHECK: lui  $8, %hi(symbol)        # encoding: [A,A,0x08,0x3c]
-# CHECK:                             #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
-# CHECK: ori  $8, $8, %lo(symbol)    # encoding: [A,A,0x08,0x35]
-# CHECK:                             #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
-# CHECK: addu $8, $8, $9             # encoding: [0x21,0x40,0x09,0x01]
+# CHECK-LE: lui  $8, %hi(symbol)        # encoding: [A,A,0x08,0x3c]
+# CHECK-LE:                             #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK-LE: ori  $8, $8, %lo(symbol)    # encoding: [A,A,0x08,0x35]
+# CHECK-LE:                             #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK-LE: addu $8, $8, $9             # encoding: [0x21,0x40,0x09,0x01]
+  la $8, symbol($8)
+# CHECK-LE: lui  $1, %hi(symbol)        # encoding: [A,A,0x01,0x3c]
+# CHECK-LE:                             #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK-LE: ori  $1, $1, %lo(symbol)    # encoding: [A,A,0x21,0x34]
+# CHECK-LE:                             #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK-LE: addu $8, $1, $8             # encoding: [0x21,0x40,0x28,0x00]
+  la $8, 20($8)
+# CHECK-LE: ori  $8, $8, 20             # encoding: [0x14,0x00,0x08,0x35]
+  la $8, 65538($8)
+# CHECK-LE: lui  $1, 1                  # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: ori  $1, $1, 2              # encoding: [0x02,0x00,0x21,0x34]
+# CHECK-LE: addu $8, $1, $8             # encoding: [0x21,0x40,0x28,0x00]
 
 # LW/SW and LDC1/SDC1 of symbol address, done by MipsAsmParser::expandMemInst():
   .set noat
   lw $10, symbol($4)
-# CHECK: lui     $10, %hi(symbol)        # encoding: [A,A,0x0a,0x3c]
-# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
-# CHECK: addu    $10, $10, $4            # encoding: [0x21,0x50,0x44,0x01]
-# CHECK: lw      $10, %lo(symbol)($10)   # encoding: [A,A,0x4a,0x8d]
-# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK-LE: lui     $10, %hi(symbol)        # encoding: [A,A,0x0a,0x3c]
+# CHECK-LE:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK-LE: addu    $10, $10, $4            # encoding: [0x21,0x50,0x44,0x01]
+# CHECK-LE: lw      $10, %lo(symbol)($10)   # encoding: [A,A,0x4a,0x8d]
+# CHECK-LE:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
   .set at
   sw $10, symbol($9)
-# CHECK: lui     $1, %hi(symbol)         # encoding: [A,A,0x01,0x3c]
-# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
-# CHECK: addu    $1, $1, $9              # encoding: [0x21,0x08,0x29,0x00]
-# CHECK: sw      $10, %lo(symbol)($1)    # encoding: [A,A,0x2a,0xac]
-# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK-LE: lui     $1, %hi(symbol)         # encoding: [A,A,0x01,0x3c]
+# CHECK-LE:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK-LE: addu    $1, $1, $9              # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: sw      $10, %lo(symbol)($1)    # encoding: [A,A,0x2a,0xac]
+# CHECK-LE:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
 
   lw $8, 1f
-# CHECK: lui $8, %hi($tmp0)              # encoding: [A,A,0x08,0x3c]
-# CHECK:                                 #   fixup A - offset: 0, value: ($tmp0)@ABS_HI, kind: fixup_Mips_HI16
-# CHECK: lw  $8, %lo($tmp0)($8)          # encoding: [A,A,0x08,0x8d]
-# CHECK:                                 #   fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind: fixup_Mips_LO16
+# CHECK-LE: lui $8, %hi($tmp0)              # encoding: [A,A,0x08,0x3c]
+# CHECK-LE:                                 #   fixup A - offset: 0, value: ($tmp0)@ABS_HI, kind: fixup_Mips_HI16
+# CHECK-LE: lw  $8, %lo($tmp0)($8)          # encoding: [A,A,0x08,0x8d]
+# CHECK-LE:                                 #   fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind: fixup_Mips_LO16
   sw $8, 1f
-# CHECK: lui $1, %hi($tmp0)              # encoding: [A,A,0x01,0x3c]
-# CHECK:                                 #   fixup A - offset: 0, value: ($tmp0)@ABS_HI, kind: fixup_Mips_HI16
-# CHECK: sw  $8, %lo($tmp0)($1)          # encoding: [A,A,0x28,0xac]
-# CHECK:                                 #   fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind: fixup_Mips_LO16
+# CHECK-LE: lui $1, %hi($tmp0)              # encoding: [A,A,0x01,0x3c]
+# CHECK-LE:                                 #   fixup A - offset: 0, value: ($tmp0)@ABS_HI, kind: fixup_Mips_HI16
+# CHECK-LE: sw  $8, %lo($tmp0)($1)          # encoding: [A,A,0x28,0xac]
+# CHECK-LE:                                 #   fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind: fixup_Mips_LO16
 
   lw $10, 655483($4)
-# CHECK: lui     $10, 10                 # encoding: [0x0a,0x00,0x0a,0x3c]
-# CHECK: addu    $10, $10, $4            # encoding: [0x21,0x50,0x44,0x01]
-# CHECK: lw      $10, 123($10)           # encoding: [0x7b,0x00,0x4a,0x8d]
+# CHECK-LE: lui     $10, 10                 # encoding: [0x0a,0x00,0x0a,0x3c]
+# CHECK-LE: addu    $10, $10, $4            # encoding: [0x21,0x50,0x44,0x01]
+# CHECK-LE: lw      $10, 123($10)           # encoding: [0x7b,0x00,0x4a,0x8d]
   sw $10, 123456($9)
-# CHECK: lui     $1, 2                   # encoding: [0x02,0x00,0x01,0x3c]
-# CHECK: addu    $1, $1, $9              # encoding: [0x21,0x08,0x29,0x00]
-# CHECK: sw      $10, 57920($1)          # encoding: [0x40,0xe2,0x2a,0xac]
+# CHECK-LE: lui     $1, 2                   # encoding: [0x02,0x00,0x01,0x3c]
+# CHECK-LE: addu    $1, $1, $9              # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: sw      $10, 57920($1)          # encoding: [0x40,0xe2,0x2a,0xac]
 
   lw $8, symbol
-# CHECK:     lui     $8, %hi(symbol)     # encoding: [A,A,0x08,0x3c]
-# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
-# CHECK-NOT: move    $8, $8              # encoding: [0x21,0x40,0x00,0x01]
-# CHECK:     lw      $8, %lo(symbol)($8) # encoding: [A,A,0x08,0x8d]
-# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK-LE:     lui     $8, %hi(symbol)     # encoding: [A,A,0x08,0x3c]
+# CHECK-LE:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK-LE-NOT: move    $8, $8              # encoding: [0x21,0x40,0x00,0x01]
+# CHECK-LE:     lw      $8, %lo(symbol)($8) # encoding: [A,A,0x08,0x8d]
+# CHECK-LE:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
   sw $8, symbol
-# CHECK:     lui     $1, %hi(symbol)     # encoding: [A,A,0x01,0x3c]
-# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
-# CHECK-NOT: move    $1, $1              # encoding: [0x21,0x08,0x20,0x00]
-# CHECK:     sw      $8, %lo(symbol)($1) # encoding: [A,A,0x28,0xac]
-# CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK-LE:     lui     $1, %hi(symbol)     # encoding: [A,A,0x01,0x3c]
+# CHECK-LE:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK-LE-NOT: move    $1, $1              # encoding: [0x21,0x08,0x20,0x00]
+# CHECK-LE:     sw      $8, %lo(symbol)($1) # encoding: [A,A,0x28,0xac]
+# CHECK-LE:                                 #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
 
   ldc1 $f0, symbol
-# CHECK: lui     $1, %hi(symbol)
-# CHECK: ldc1    $f0, %lo(symbol)($1)
+# CHECK-LE: lui     $1, %hi(symbol)
+# CHECK-LE: ldc1    $f0, %lo(symbol)($1)
   sdc1 $f0, symbol
-# CHECK: lui     $1, %hi(symbol)
-# CHECK: sdc1    $f0, %lo(symbol)($1)
+# CHECK-LE: lui     $1, %hi(symbol)
+# CHECK-LE: sdc1    $f0, %lo(symbol)($1)
 
 # Test BNE with an immediate as the 2nd operand.
   bne $2, 0, 1332
-# CHECK: bnez  $2, 1332          # encoding: [0x4d,0x01,0x40,0x14]
-# CHECK: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-LE: bnez  $2, 1332          # encoding: [0x4d,0x01,0x40,0x14]
+# CHECK-LE: nop                     # encoding: [0x00,0x00,0x00,0x00]
 
   bne $2, 123, 1332
-# CHECK: ori   $1, $zero, 123    # encoding: [0x7b,0x00,0x01,0x34]
-# CHECK: bne   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x14]
-# CHECK: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-LE: ori   $1, $zero, 123    # encoding: [0x7b,0x00,0x01,0x34]
+# CHECK-LE: bne   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x14]
+# CHECK-LE: nop                     # encoding: [0x00,0x00,0x00,0x00]
 
   bne $2, -2345, 1332
-# CHECK: addiu $1, $zero, -2345  # encoding: [0xd7,0xf6,0x01,0x24]
-# CHECK: bne   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x14]
-# CHECK: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-LE: addiu $1, $zero, -2345  # encoding: [0xd7,0xf6,0x01,0x24]
+# CHECK-LE: bne   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x14]
+# CHECK-LE: nop                     # encoding: [0x00,0x00,0x00,0x00]
 
   bne $2, 65538, 1332
-# CHECK: lui   $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
-# CHECK: ori   $1, $1, 2         # encoding: [0x02,0x00,0x21,0x34]
-# CHECK: bne   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x14]
-# CHECK: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-LE: lui   $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: ori   $1, $1, 2         # encoding: [0x02,0x00,0x21,0x34]
+# CHECK-LE: bne   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x14]
+# CHECK-LE: nop                     # encoding: [0x00,0x00,0x00,0x00]
 
   bne $2, ~7, 1332
-# CHECK: addiu $1, $zero, -8     # encoding: [0xf8,0xff,0x01,0x24]
-# CHECK: bne   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x14]
-# CHECK: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-LE: addiu $1, $zero, -8     # encoding: [0xf8,0xff,0x01,0x24]
+# CHECK-LE: bne   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x14]
+# CHECK-LE: nop                     # encoding: [0x00,0x00,0x00,0x00]
 
   bne $2, 0x10000, 1332
-# CHECK: lui   $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
-# CHECK: bne   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x14]
-# CHECK: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-LE: lui   $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: bne   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x14]
+# CHECK-LE: nop                     # encoding: [0x00,0x00,0x00,0x00]
 
 # Test BEQ with an immediate as the 2nd operand.
   beq $2, 0, 1332
-# CHECK: beqz  $2, 1332          # encoding: [0x4d,0x01,0x40,0x10]
-# CHECK: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-LE: beqz  $2, 1332          # encoding: [0x4d,0x01,0x40,0x10]
+# CHECK-LE: nop                     # encoding: [0x00,0x00,0x00,0x00]
 
   beq $2, 123, 1332
-# CHECK: ori   $1, $zero, 123    # encoding: [0x7b,0x00,0x01,0x34]
-# CHECK: beq   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x10]
-# CHECK: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-LE: ori   $1, $zero, 123    # encoding: [0x7b,0x00,0x01,0x34]
+# CHECK-LE: beq   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x10]
+# CHECK-LE: nop                     # encoding: [0x00,0x00,0x00,0x00]
 
   beq $2, -2345, 1332
-# CHECK: addiu $1, $zero, -2345  # encoding: [0xd7,0xf6,0x01,0x24]
-# CHECK: beq   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x10]
-# CHECK: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-LE: addiu $1, $zero, -2345  # encoding: [0xd7,0xf6,0x01,0x24]
+# CHECK-LE: beq   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x10]
+# CHECK-LE: nop                     # encoding: [0x00,0x00,0x00,0x00]
 
   beq $2, 65538, 1332
-# CHECK: lui   $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
-# CHECK: ori   $1, $1, 2         # encoding: [0x02,0x00,0x21,0x34]
-# CHECK: beq   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x10]
-# CHECK: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-LE: lui   $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: ori   $1, $1, 2         # encoding: [0x02,0x00,0x21,0x34]
+# CHECK-LE: beq   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x10]
+# CHECK-LE: nop                     # encoding: [0x00,0x00,0x00,0x00]
 
   beq $2, ~7, 1332
-# CHECK: addiu $1, $zero, -8     # encoding: [0xf8,0xff,0x01,0x24]
-# CHECK: beq   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x10]
-# CHECK: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-LE: addiu $1, $zero, -8     # encoding: [0xf8,0xff,0x01,0x24]
+# CHECK-LE: beq   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x10]
+# CHECK-LE: nop                     # encoding: [0x00,0x00,0x00,0x00]
 
   beq $2, 0x10000, 1332
-# CHECK: lui   $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
-# CHECK: beq   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x10]
-# CHECK: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-LE: lui   $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: beq   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x10]
+# CHECK-LE: nop                     # encoding: [0x00,0x00,0x00,0x00]
+
+# Test ULHU with immediate operand.
+  ulhu $8, 0
+# CHECK-BE: lbu  $1, 0($zero)      # encoding: [0x90,0x01,0x00,0x00]
+# CHECK-BE: lbu  $8, 1($zero)      # encoding: [0x90,0x08,0x00,0x01]
+# CHECK-BE: sll  $1, $1, 8         # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lbu  $1, 1($zero)      # encoding: [0x01,0x00,0x01,0x90]
+# CHECK-LE: lbu  $8, 0($zero)      # encoding: [0x00,0x00,0x08,0x90]
+# CHECK-LE: sll  $1, $1, 8         # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, 2
+# CHECK-BE: lbu  $1, 2($zero)      # encoding: [0x90,0x01,0x00,0x02]
+# CHECK-BE: lbu  $8, 3($zero)      # encoding: [0x90,0x08,0x00,0x03]
+# CHECK-BE: sll  $1, $1, 8         # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lbu  $1, 3($zero)      # encoding: [0x03,0x00,0x01,0x90]
+# CHECK-LE: lbu  $8, 2($zero)      # encoding: [0x02,0x00,0x08,0x90]
+# CHECK-LE: sll  $1, $1, 8         # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, 0x8000
+# CHECK-BE: ori  $1, $zero, 32768  # encoding: [0x34,0x01,0x80,0x00]
+# CHECK-BE: lbu  $8, 0($1)         # encoding: [0x90,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: ori  $1, $zero, 32768  # encoding: [0x00,0x80,0x01,0x34]
+# CHECK-LE: lbu  $8, 1($1)         # encoding: [0x01,0x00,0x28,0x90]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, -0x8000
+# CHECK-BE: lbu  $1, -32768($zero) # encoding: [0x90,0x01,0x80,0x00]
+# CHECK-BE: lbu  $8, -32767($zero) # encoding: [0x90,0x08,0x80,0x01]
+# CHECK-BE: sll  $1, $1, 8         # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lbu  $1, -32767($zero) # encoding: [0x01,0x80,0x01,0x90]
+# CHECK-LE: lbu  $8, -32768($zero) # encoding: [0x00,0x80,0x08,0x90]
+# CHECK-LE: sll  $1, $1, 8         # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, 0x10000
+# CHECK-BE: lui  $1, 1             # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: lbu  $8, 0($1)         # encoding: [0x90,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui  $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: lbu  $8, 1($1)         # encoding: [0x01,0x00,0x28,0x90]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, 0x18888
+# CHECK-BE: lui  $1, 1             # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: ori  $1, $1, 34952     # encoding: [0x34,0x21,0x88,0x88]
+# CHECK-BE: lbu  $8, 0($1)         # encoding: [0x90,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui  $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: ori  $1, $1, 34952     # encoding: [0x88,0x88,0x21,0x34]
+# CHECK-LE: lbu  $8, 1($1)         # encoding: [0x01,0x00,0x28,0x90]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, -32769
+# CHECK-BE: lui  $1, 65535         # encoding: [0x3c,0x01,0xff,0xff]
+# CHECK-BE: ori  $1, $1, 32767     # encoding: [0x34,0x21,0x7f,0xff]
+# CHECK-BE: lbu  $8, 0($1)         # encoding: [0x90,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui  $1, 65535         # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK-LE: ori  $1, $1, 32767     # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK-LE: lbu  $8, 1($1)         # encoding: [0x01,0x00,0x28,0x90]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, 32767
+# CHECK-BE: ori  $1, $zero, 32767  # encoding: [0x34,0x01,0x7f,0xff]
+# CHECK-BE: lbu  $8, 0($1)         # encoding: [0x90,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: ori  $1, $zero, 32767  # encoding: [0xff,0x7f,0x01,0x34]
+# CHECK-LE: lbu  $8, 1($1)         # encoding: [0x01,0x00,0x28,0x90]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+# Test ULHU with immediate offset and a source register operand.
+  ulhu $8, 0($9)
+# CHECK-BE: lbu  $1, 0($9)         # encoding: [0x91,0x21,0x00,0x00]
+# CHECK-BE: lbu  $8, 1($9)         # encoding: [0x91,0x28,0x00,0x01]
+# CHECK-BE: sll  $1, $1, 8         # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lbu  $1, 1($9)         # encoding: [0x01,0x00,0x21,0x91]
+# CHECK-LE: lbu  $8, 0($9)         # encoding: [0x00,0x00,0x28,0x91]
+# CHECK-LE: sll  $1, $1, 8         # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, 2($9)
+# CHECK-BE: lbu  $1, 2($9)         # encoding: [0x91,0x21,0x00,0x02]
+# CHECK-BE: lbu  $8, 3($9)         # encoding: [0x91,0x28,0x00,0x03]
+# CHECK-BE: sll  $1, $1, 8         # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lbu  $1, 3($9)         # encoding: [0x03,0x00,0x21,0x91]
+# CHECK-LE: lbu  $8, 2($9)         # encoding: [0x02,0x00,0x28,0x91]
+# CHECK-LE: sll  $1, $1, 8         # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, 0x8000($9)
+# CHECK-BE: ori  $1, $zero, 32768  # encoding: [0x34,0x01,0x80,0x00]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lbu  $8, 0($1)         # encoding: [0x90,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: ori  $1, $zero, 32768  # encoding: [0x00,0x80,0x01,0x34]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lbu  $8, 1($1)         # encoding: [0x01,0x00,0x28,0x90]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, -0x8000($9)
+# CHECK-BE: lbu  $1, -32768($9)    # encoding: [0x91,0x21,0x80,0x00]
+# CHECK-BE: lbu  $8, -32767($9)    # encoding: [0x91,0x28,0x80,0x01]
+# CHECK-BE: sll  $1, $1, 8         # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lbu  $1, -32767($9)    # encoding: [0x01,0x80,0x21,0x91]
+# CHECK-LE: lbu  $8, -32768($9)    # encoding: [0x00,0x80,0x28,0x91]
+# CHECK-LE: sll  $1, $1, 8         # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, 0x10000($9)
+# CHECK-BE: lui  $1, 1             # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lbu  $8, 0($1)         # encoding: [0x90,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui  $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lbu  $8, 1($1)         # encoding: [0x01,0x00,0x28,0x90]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, 0x18888($9)
+# CHECK-BE: lui  $1, 1             # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: ori  $1, $1, 34952     # encoding: [0x34,0x21,0x88,0x88]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lbu  $8, 0($1)         # encoding: [0x90,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui  $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: ori  $1, $1, 34952     # encoding: [0x88,0x88,0x21,0x34]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lbu  $8, 1($1)         # encoding: [0x01,0x00,0x28,0x90]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, -32769($9)
+# CHECK-BE: lui  $1, 65535         # encoding: [0x3c,0x01,0xff,0xff]
+# CHECK-BE: ori  $1, $1, 32767     # encoding: [0x34,0x21,0x7f,0xff]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lbu  $8, 0($1)         # encoding: [0x90,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui  $1, 65535         # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK-LE: ori  $1, $1, 32767     # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lbu  $8, 1($1)         # encoding: [0x01,0x00,0x28,0x90]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, 32767($9)
+# CHECK-BE: ori  $1, $zero, 32767  # encoding: [0x34,0x01,0x7f,0xff]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lbu  $8, 0($1)         # encoding: [0x90,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: ori  $1, $zero, 32767  # encoding: [0xff,0x7f,0x01,0x34]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lbu  $8, 1($1)         # encoding: [0x01,0x00,0x28,0x90]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+# Test ULW with immediate operand.
+  ulw $8, 0
+# CHECK-BE: lwl  $8, 0($zero)      # encoding: [0x88,0x08,0x00,0x00]
+# CHECK-BE: lwr  $8, 3($zero)      # encoding: [0x98,0x08,0x00,0x03]
+# CHECK-LE: lwl $8, 3($zero)       # encoding: [0x03,0x00,0x08,0x88]
+# CHECK-LE: lwr $8, 0($zero)       # encoding: [0x00,0x00,0x08,0x98]
+
+  ulw $8, 2
+# CHECK-BE: lwl  $8, 2($zero)      # encoding: [0x88,0x08,0x00,0x02]
+# CHECK-BE: lwr  $8, 5($zero)      # encoding: [0x98,0x08,0x00,0x05]
+# CHECK-LE: lwl $8, 5($zero)       # encoding: [0x05,0x00,0x08,0x88]
+# CHECK-LE: lwr $8, 2($zero)       # encoding: [0x02,0x00,0x08,0x98]
+
+  ulw $8, 0x8000
+# CHECK-BE: ori  $1, $zero, 32768  # encoding: [0x34,0x01,0x80,0x00]
+# CHECK-BE: lwl  $8, 0($1)         # encoding: [0x88,0x28,0x00,0x00]
+# CHECK-BE: lwr  $8, 3($1)         # encoding: [0x98,0x28,0x00,0x03]
+# CHECK-LE: ori $1, $zero, 32768   # encoding: [0x00,0x80,0x01,0x34]
+# CHECK-LE: lwl $8, 3($1)          # encoding: [0x03,0x00,0x28,0x88]
+# CHECK-LE: lwr $8, 0($1)          # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, -0x8000
+# CHECK-BE: lwl  $8, -32768($zero) # encoding: [0x88,0x08,0x80,0x00]
+# CHECK-BE: lwr  $8, -32765($zero) # encoding: [0x98,0x08,0x80,0x03]
+# CHECK-LE: lwl $8, -32765($zero)  # encoding: [0x03,0x80,0x08,0x88]
+# CHECK-LE: lwr $8, -32768($zero)  # encoding: [0x00,0x80,0x08,0x98]
+
+  ulw $8, 0x10000
+# CHECK-BE: lui  $1, 1             # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: lwl  $8, 0($1)         # encoding: [0x88,0x28,0x00,0x00]
+# CHECK-BE: lwr  $8, 3($1)         # encoding: [0x98,0x28,0x00,0x03]
+# CHECK-LE: lui $1, 1              # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: lwl $8, 3($1)          # encoding: [0x03,0x00,0x28,0x88]
+# CHECK-LE: lwr $8, 0($1)          # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, 0x18888
+# CHECK-BE: lui  $1, 1             # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: ori  $1, $1, 34952     # encoding: [0x34,0x21,0x88,0x88]
+# CHECK-BE: lwl  $8, 0($1)         # encoding: [0x88,0x28,0x00,0x00]
+# CHECK-BE: lwr  $8, 3($1)         # encoding: [0x98,0x28,0x00,0x03]
+# CHECK-LE: lui $1, 1              # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: ori $1, $1, 34952      # encoding: [0x88,0x88,0x21,0x34]
+# CHECK-LE: lwl $8, 3($1)          # encoding: [0x03,0x00,0x28,0x88]
+# CHECK-LE: lwr $8, 0($1)          # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, -32771
+# CHECK-BE: lui  $1, 65535         # encoding: [0x3c,0x01,0xff,0xff]
+# CHECK-BE: ori  $1, $1, 32765     # encoding: [0x34,0x21,0x7f,0xfd]
+# CHECK-BE: lwl  $8, 0($1)         # encoding: [0x88,0x28,0x00,0x00]
+# CHECK-BE: lwr  $8, 3($1)         # encoding: [0x98,0x28,0x00,0x03]
+# CHECK-LE: lui $1, 65535          # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK-LE: ori $1, $1, 32765      # encoding: [0xfd,0x7f,0x21,0x34]
+# CHECK-LE: lwl $8, 3($1)          # encoding: [0x03,0x00,0x28,0x88]
+# CHECK-LE: lwr $8, 0($1)          # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, 32765
+# CHECK-BE: ori  $1, $zero, 32765  # encoding: [0x34,0x01,0x7f,0xfd]
+# CHECK-BE: lwl  $8, 0($1)         # encoding: [0x88,0x28,0x00,0x00]
+# CHECK-BE: lwr  $8, 3($1)         # encoding: [0x98,0x28,0x00,0x03]
+# CHECK-LE: ori $1, $zero, 32765   # encoding: [0xfd,0x7f,0x01,0x34]
+# CHECK-LE: lwl $8, 3($1)          # encoding: [0x03,0x00,0x28,0x88]
+# CHECK-LE: lwr $8, 0($1)          # encoding: [0x00,0x00,0x28,0x98]
+
+# Test ULW with immediate offset and a source register operand.
+  ulw $8, 0($9)
+# CHECK-BE: lwl  $8, 0($9)         # encoding: [0x89,0x28,0x00,0x00]
+# CHECK-BE: lwr  $8, 3($9)         # encoding: [0x99,0x28,0x00,0x03]
+# CHECK-LE: lwl  $8, 3($9)         # encoding: [0x03,0x00,0x28,0x89]
+# CHECK-LE: lwr  $8, 0($9)         # encoding: [0x00,0x00,0x28,0x99]
+
+  ulw $8, 2($9)
+# CHECK-BE: lwl  $8, 2($9)         # encoding: [0x89,0x28,0x00,0x02]
+# CHECK-BE: lwr  $8, 5($9)         # encoding: [0x99,0x28,0x00,0x05]
+# CHECK-LE: lwl  $8, 5($9)         # encoding: [0x05,0x00,0x28,0x89]
+# CHECK-LE: lwr  $8, 2($9)         # encoding: [0x02,0x00,0x28,0x99]
+
+  ulw $8, 0x8000($9)
+# CHECK-BE: ori  $1, $zero, 32768  # encoding: [0x34,0x01,0x80,0x00]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lwl  $8, 0($1)         # encoding: [0x88,0x28,0x00,0x00]
+# CHECK-BE: lwr  $8, 3($1)         # encoding: [0x98,0x28,0x00,0x03]
+# CHECK-LE: ori  $1, $zero, 32768  # encoding: [0x00,0x80,0x01,0x34]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lwl  $8, 3($1)         # encoding: [0x03,0x00,0x28,0x88]
+# CHECK-LE: lwr  $8, 0($1)         # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, -0x8000($9)
+# CHECK-BE: lwl  $8, -32768($9)    # encoding: [0x89,0x28,0x80,0x00]
+# CHECK-BE: lwr  $8, -32765($9)    # encoding: [0x99,0x28,0x80,0x03]
+# CHECK-LE: lwl  $8, -32765($9)    # encoding: [0x03,0x80,0x28,0x89]
+# CHECK-LE: lwr  $8, -32768($9)    # encoding: [0x00,0x80,0x28,0x99]
+
+  ulw $8, 0x10000($9)
+# CHECK-BE: lui  $1, 1             # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lwl  $8, 0($1)         # encoding: [0x88,0x28,0x00,0x00]
+# CHECK-BE: lwr  $8, 3($1)         # encoding: [0x98,0x28,0x00,0x03]
+# CHECK-LE: lui  $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lwl  $8, 3($1)         # encoding: [0x03,0x00,0x28,0x88]
+# CHECK-LE: lwr  $8, 0($1)         # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, 0x18888($9)
+# CHECK-BE: lui  $1, 1             # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: ori  $1, $1, 34952     # encoding: [0x34,0x21,0x88,0x88]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lwl  $8, 0($1)         # encoding: [0x88,0x28,0x00,0x00]
+# CHECK-BE: lwr  $8, 3($1)         # encoding: [0x98,0x28,0x00,0x03]
+# CHECK-LE: lui  $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: ori  $1, $1, 34952     # encoding: [0x88,0x88,0x21,0x34]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lwl  $8, 3($1)         # encoding: [0x03,0x00,0x28,0x88]
+# CHECK-LE: lwr  $8, 0($1)         # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, -32771($9)
+# CHECK-BE: lui  $1, 65535         # encoding: [0x3c,0x01,0xff,0xff]
+# CHECK-BE: ori  $1, $1, 32765     # encoding: [0x34,0x21,0x7f,0xfd]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lwl  $8, 0($1)         # encoding: [0x88,0x28,0x00,0x00]
+# CHECK-BE: lwr  $8, 3($1)         # encoding: [0x98,0x28,0x00,0x03]
+# CHECK-LE: lui  $1, 65535         # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK-LE: ori  $1, $1, 32765     # encoding: [0xfd,0x7f,0x21,0x34]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lwl  $8, 3($1)         # encoding: [0x03,0x00,0x28,0x88]
+# CHECK-LE: lwr  $8, 0($1)         # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, 32765($9)
+# CHECK-BE: ori  $1, $zero, 32765  # encoding: [0x34,0x01,0x7f,0xfd]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lwl  $8, 0($1)         # encoding: [0x88,0x28,0x00,0x00]
+# CHECK-BE: lwr  $8, 3($1)         # encoding: [0x98,0x28,0x00,0x03]
+# CHECK-LE: ori  $1, $zero, 32765  # encoding: [0xfd,0x7f,0x01,0x34]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lwl  $8, 3($1)         # encoding: [0x03,0x00,0x28,0x88]
+# CHECK-LE: lwr  $8, 0($1)         # encoding: [0x00,0x00,0x28,0x98]
 
 1:
   add $4, $4, $4
diff --git a/test/MC/Mips/mips-fpu-instructions.s b/test/MC/Mips/mips-fpu-instructions.s
index bfaef9e..7c49717 100644
--- a/test/MC/Mips/mips-fpu-instructions.s
+++ b/test/MC/Mips/mips-fpu-instructions.s
@@ -139,7 +139,7 @@
 # FP move instructions
 #------------------------------------------------------------------------------
 # CHECK: bc1f    $BB_1                 # encoding: [A,A,0x00,0x45]
-# CHECK: #   fixup A - offset: 0, value: ($BB_1), kind: fixup_Mips_PC16
+# CHECK: #   fixup A - offset: 0, value: ($BB_1)-4, kind: fixup_Mips_PC16
 
 # CHECK:  cfc1    $6, $0               # encoding: [0x00,0x00,0x46,0x44]
 # CHECK:  ctc1    $10, $31             # encoding: [0x00,0xf8,0xca,0x44]
diff --git a/test/MC/Mips/mips32/valid.s b/test/MC/Mips/mips32/valid.s
index 3765044..fbe1551 100644
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@@ -89,7 +89,7 @@ a:
         madd      $zero,$9
         maddu     $s3,$gp
         maddu     $24,$s2
-        mfc0      $a2,$14,1
+        mfc0      $8,$15,1             # CHECK: mfc0 $8, $15, 1        # encoding: [0x40,0x08,0x78,0x01]
         mfc1      $a3,$f27
         mfhi      $s3
         mfhi      $sp
@@ -112,7 +112,7 @@ a:
         movz.s    $f25,$f7,$v1
         msub      $s7,$k1
         msubu     $15,$a1
-        mtc0      $9,$29,3
+        mtc0      $9,$15,1             # CHECK: mtc0 $9, $15, 1        # encoding: [0x40,0x89,0x78,0x01]
         mtc1      $s8,$f9
         mthi      $s1
         mtlo      $sp
diff --git a/test/MC/Mips/mips32r2/valid.s b/test/MC/Mips/mips32r2/valid.s
index ee7af3f..2e4366a 100644
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@@ -103,7 +103,7 @@ a:
         madd.s    $f1,$f31,$f19,$f25
         maddu     $s3,$gp
         maddu     $24,$s2
-        mfc0      $a2,$14,1
+        mfc0      $8,$15,1             # CHECK: mfc0 $8, $15, 1      # encoding: [0x40,0x08,0x78,0x01]
         mfc1      $a3,$f27
         mfhc1     $s8,$f24
         mfhi      $s3
@@ -129,7 +129,7 @@ a:
         msub.d    $f10,$f1,$f31,$f18
         msub.s    $f12,$f19,$f10,$f16
         msubu     $15,$a1
-        mtc0      $9,$29,3
+        mtc0      $9,$15,1             # CHECK: mtc0 $9, $15, 1     # encoding: [0x40,0x89,0x78,0x01]
         mtc1      $s8,$f9
         mthc1     $zero,$f16
         mthi      $s1
diff --git a/test/MC/Mips/mips32r3/valid.s b/test/MC/Mips/mips32r3/valid.s
index 0a4e5b1..f6ef1d3 100644
--- a/test/MC/Mips/mips32r3/valid.s
+++ b/test/MC/Mips/mips32r3/valid.s
@@ -103,7 +103,7 @@ a:
         madd.s    $f1,$f31,$f19,$f25
         maddu     $s3,$gp
         maddu     $24,$s2
-        mfc0      $a2,$14,1
+        mfc0      $8,$15,1             # CHECK: mfc0 $8, $15, 1        # encoding: [0x40,0x08,0x78,0x01]
         mfc1      $a3,$f27
         mfhc1     $s8,$f24
         mfhi      $s3
@@ -129,7 +129,7 @@ a:
         msub.d    $f10,$f1,$f31,$f18
         msub.s    $f12,$f19,$f10,$f16
         msubu     $15,$a1
-        mtc0      $9,$29,3
+        mtc0      $9,$15,1             # CHECK: mtc0 $9, $15, 1        # encoding: [0x40,0x89,0x78,0x01]
         mtc1      $s8,$f9
         mthc1     $zero,$f16
         mthi      $s1
diff --git a/test/MC/Mips/mips32r5/valid.s b/test/MC/Mips/mips32r5/valid.s
index 036b908..f12d751 100644
--- a/test/MC/Mips/mips32r5/valid.s
+++ b/test/MC/Mips/mips32r5/valid.s
@@ -103,7 +103,7 @@ a:
         madd.s    $f1,$f31,$f19,$f25
         maddu     $s3,$gp
         maddu     $24,$s2
-        mfc0      $a2,$14,1
+        mfc0      $8,$15,1             # CHECK: mfc0 $8, $15, 1        # encoding: [0x40,0x08,0x78,0x01]
         mfc1      $a3,$f27
         mfhc1     $s8,$f24
         mfhi      $s3
@@ -129,7 +129,7 @@ a:
         msub.d    $f10,$f1,$f31,$f18
         msub.s    $f12,$f19,$f10,$f16
         msubu     $15,$a1
-        mtc0      $9,$29,3
+        mtc0      $9,$15,1             # CHECK: mtc0 $9, $15, 1        # encoding: [0x40,0x89,0x78,0x01]
         mtc1      $s8,$f9
         mthc1     $zero,$f16
         mthi      $s1
diff --git a/test/MC/Mips/mips32r6/relocations.s b/test/MC/Mips/mips32r6/relocations.s
index 13b3387..eda7497 100644
--- a/test/MC/Mips/mips32r6/relocations.s
+++ b/test/MC/Mips/mips32r6/relocations.s
@@ -10,22 +10,22 @@
 # CHECK-FIXUP:                    value: bar, kind: fixup_MIPS_PC19_S2
 # CHECK-FIXUP: beqc $5, $6, bar # encoding: [0x20,0xa6,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
-# CHECK-FIXUP:                      value: bar, kind: fixup_Mips_PC16
+# CHECK-FIXUP:                      value: bar-4, kind: fixup_Mips_PC16
 # CHECK-FIXUP: bnec $5, $6, bar # encoding: [0x60,0xa6,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
-# CHECK-FIXUP:                      value: bar, kind: fixup_Mips_PC16
+# CHECK-FIXUP:                      value: bar-4, kind: fixup_Mips_PC16
 # CHECK-FIXUP: beqzc $9, bar    # encoding: [0xd9,0b001AAAAA,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
-# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC21_S2
+# CHECK-FIXUP:                      value: bar-4, kind: fixup_MIPS_PC21_S2
 # CHECK-FIXUP: bnezc $9, bar    # encoding: [0xf9,0b001AAAAA,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
-# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC21_S2
+# CHECK-FIXUP:                      value: bar-4, kind: fixup_MIPS_PC21_S2
 # CHECK-FIXUP: balc  bar        # encoding: [0b111010AA,A,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
-# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC26_S2
+# CHECK-FIXUP:                      value: bar-4, kind: fixup_MIPS_PC26_S2
 # CHECK-FIXUP: bc    bar        # encoding: [0b110010AA,A,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
-# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC26_S2
+# CHECK-FIXUP:                      value: bar-4, kind: fixup_MIPS_PC26_S2
 # CHECK-FIXUP: aluipc $2, %pcrel_hi(bar)    # encoding: [0xec,0x5f,A,A]
 # CHECK-FIXUP:                              #   fixup A - offset: 0,
 # CHECK-FIXUP:                                  value: bar@PCREL_HI16,
diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s
index 2c3a5b2..52752c5 100644
--- a/test/MC/Mips/mips32r6/valid.s
+++ b/test/MC/Mips/mips32r6/valid.s
@@ -108,8 +108,10 @@ a:
         lsa     $2, $3, $4, 3    # CHECK: lsa  $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5]
         lwpc    $2,268           # CHECK: lwpc $2, 268     # encoding: [0xec,0x48,0x00,0x43]
         lwupc   $2,268           # CHECK: lwupc $2, 268    # encoding: [0xec,0x50,0x00,0x43]
+        mfc0    $8,$15,1         # CHECK: mfc0 $8, $15, 1  # encoding: [0x40,0x08,0x78,0x01]
         mod     $2,$3,$4         # CHECK: mod $2, $3, $4   # encoding: [0x00,0x64,0x10,0xda]
         modu    $2,$3,$4         # CHECK: modu $2, $3, $4  # encoding: [0x00,0x64,0x10,0xdb]
+        mtc0    $9,$15,1         # CHECK: mtc0 $9, $15, 1  # encoding: [0x40,0x89,0x78,0x01]
         mul     $2,$3,$4         # CHECK: mul $2, $3, $4   # encoding: [0x00,0x64,0x10,0x98]
         muh     $2,$3,$4         # CHECK: muh $2, $3, $4   # encoding: [0x00,0x64,0x10,0xd8]
         mulu    $2,$3,$4         # CHECK: mulu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x99]
diff --git a/test/MC/Mips/mips64-expansions.s b/test/MC/Mips/mips64-expansions.s
index 620793a..a66a520 100644
--- a/test/MC/Mips/mips64-expansions.s
+++ b/test/MC/Mips/mips64-expansions.s
@@ -271,3 +271,183 @@
 # CHECK: ori  $1, $1, 65535         # encoding: [0xff,0xff,0x21,0x34]
 # CHECK: beq  $2, $1, 1332          # encoding: [0x4d,0x01,0x41,0x10]
 # CHECK: nop                        # encoding: [0x00,0x00,0x00,0x00]
+
+# Test ulhu with 64-bit immediate addresses.
+  ulhu $8, 0x100010001
+# CHECK: lui  $1, 1            # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: ori  $1, $1, 1        # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: dsll $1, $1, 16       # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 1        # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: lbu  $8, 1($1)        # encoding: [0x01,0x00,0x28,0x90]
+# CHECK: lbu  $1, 0($1)        # encoding: [0x00,0x00,0x21,0x90]
+# CHECK: sll  $8, $8, 8        # encoding: [0x00,0x42,0x08,0x00]
+# CHECK: or   $8, $8, $1       # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, 0x1000100010001
+# CHECK: lui  $1, 1            # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: ori  $1, $1, 1        # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: dsll $1, $1, 16       # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 1        # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: dsll $1, $1, 16       # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 1        # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: lbu  $8, 1($1)        # encoding: [0x01,0x00,0x28,0x90]
+# CHECK: lbu  $1, 0($1)        # encoding: [0x00,0x00,0x21,0x90]
+# CHECK: sll  $8, $8, 8        # encoding: [0x00,0x42,0x08,0x00]
+# CHECK: or   $8, $8, $1       # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, -0x100010001
+# CHECK: lui  $1, 65535        # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori  $1, $1, 65534    # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll $1, $1, 16       # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 65534    # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll $1, $1, 16       # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 65535    # encoding: [0xff,0xff,0x21,0x34]
+# CHECK: lbu  $8, 1($1)        # encoding: [0x01,0x00,0x28,0x90]
+# CHECK: lbu  $1, 0($1)        # encoding: [0x00,0x00,0x21,0x90]
+# CHECK: sll  $8, $8, 8        # encoding: [0x00,0x42,0x08,0x00]
+# CHECK: or   $8, $8, $1       # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, -0x1000100010001
+# CHECK: lui  $1, 65534        # encoding: [0xfe,0xff,0x01,0x3c]
+# CHECK: ori  $1, $1, 65534    # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll $1, $1, 16       # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 65534    # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll $1, $1, 16       # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 65535    # encoding: [0xff,0xff,0x21,0x34]
+# CHECK: lbu  $8, 1($1)        # encoding: [0x01,0x00,0x28,0x90]
+# CHECK: lbu  $1, 0($1)        # encoding: [0x00,0x00,0x21,0x90]
+# CHECK: sll  $8, $8, 8        # encoding: [0x00,0x42,0x08,0x00]
+# CHECK: or   $8, $8, $1       # encoding: [0x25,0x40,0x01,0x01]
+
+# Test ulhu with source register and 64-bit immediate offset.
+  ulhu $8, 0x100010001($9)
+# CHECK: lui   $1, 1           # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: ori   $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: dsll  $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: daddu $1, $1, $9      # encoding: [0x2d,0x08,0x29,0x00]
+# CHECK: lbu   $8, 1($1)       # encoding: [0x01,0x00,0x28,0x90]
+# CHECK: lbu   $1, 0($1)       # encoding: [0x00,0x00,0x21,0x90]
+# CHECK: sll   $8, $8, 8       # encoding: [0x00,0x42,0x08,0x00]
+# CHECK: or    $8, $8, $1      # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, 0x1000100010001($9)
+# CHECK: lui   $1, 1           # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: ori   $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: dsll  $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: dsll  $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: daddu $1, $1, $9      # encoding: [0x2d,0x08,0x29,0x00]
+# CHECK: lbu   $8, 1($1)       # encoding: [0x01,0x00,0x28,0x90]
+# CHECK: lbu   $1, 0($1)       # encoding: [0x00,0x00,0x21,0x90]
+# CHECK: sll   $8, $8, 8       # encoding: [0x00,0x42,0x08,0x00]
+# CHECK: or    $8, $8, $1      # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, -0x100010001($9)
+# CHECK: lui   $1, 65535       # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori   $1, $1, 65534   # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll  $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 65534   # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll  $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 65535   # encoding: [0xff,0xff,0x21,0x34]
+# CHECK: daddu $1, $1, $9      # encoding: [0x2d,0x08,0x29,0x00]
+# CHECK: lbu   $8, 1($1)       # encoding: [0x01,0x00,0x28,0x90]
+# CHECK: lbu   $1, 0($1)       # encoding: [0x00,0x00,0x21,0x90]
+# CHECK: sll   $8, $8, 8       # encoding: [0x00,0x42,0x08,0x00]
+# CHECK: or    $8, $8, $1      # encoding: [0x25,0x40,0x01,0x01]
+
+  ulhu $8, -0x1000100010001($9)
+# CHECK: lui   $1, 65534       # encoding: [0xfe,0xff,0x01,0x3c]
+# CHECK: ori   $1, $1, 65534   # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll  $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 65534   # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll  $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 65535   # encoding: [0xff,0xff,0x21,0x34]
+# CHECK: daddu $1, $1, $9      # encoding: [0x2d,0x08,0x29,0x00]
+# CHECK: lbu   $8, 1($1)       # encoding: [0x01,0x00,0x28,0x90]
+# CHECK: lbu   $1, 0($1)       # encoding: [0x00,0x00,0x21,0x90]
+# CHECK: sll   $8, $8, 8       # encoding: [0x00,0x42,0x08,0x00]
+# CHECK: or    $8, $8, $1      # encoding: [0x25,0x40,0x01,0x01]
+
+# Test ulw with 64-bit immediate addresses.
+  ulw $8, 0x100010001
+# CHECK: lui  $1, 1           # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: ori  $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: dsll $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: lwl  $8, 3($1)       # encoding: [0x03,0x00,0x28,0x88]
+# CHECK: lwr  $8, 0($1)       # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, 0x1000100010001
+# CHECK: lui  $1, 1           # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: ori  $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: dsll $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: dsll $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: lwl  $8, 3($1)       # encoding: [0x03,0x00,0x28,0x88]
+# CHECK: lwr  $8, 0($1)       # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, -0x100010001
+# CHECK: lui  $1, 65535       # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori  $1, $1, 65534   # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 65534   # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 65535   # encoding: [0xff,0xff,0x21,0x34]
+# CHECK: lwl  $8, 3($1)       # encoding: [0x03,0x00,0x28,0x88]
+# CHECK: lwr  $8, 0($1)       # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, -0x1000100010001
+# CHECK: lui  $1, 65534       # encoding: [0xfe,0xff,0x01,0x3c]
+# CHECK: ori  $1, $1, 65534   # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 65534   # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori  $1, $1, 65535   # encoding: [0xff,0xff,0x21,0x34]
+# CHECK: lwl  $8, 3($1)       # encoding: [0x03,0x00,0x28,0x88]
+# CHECK: lwr  $8, 0($1)       # encoding: [0x00,0x00,0x28,0x98]
+
+# Test ulw with source register and 64-bit immediate offset.
+  ulw $8, 0x100010001($9)
+# CHECK: lui   $1, 1          # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: ori   $1, $1, 1      # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: dsll  $1, $1, 16     # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 1      # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: daddu $1, $1, $9     # encoding: [0x2d,0x08,0x29,0x00]
+# CHECK: lwl   $8, 3($1)      # encoding: [0x03,0x00,0x28,0x88]
+# CHECK: lwr   $8, 0($1)      # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, 0x1000100010001($9)
+# CHECK: lui   $1, 1          # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: ori   $1, $1, 1      # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: dsll  $1, $1, 16     # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 1      # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: dsll  $1, $1, 16     # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 1      # encoding: [0x01,0x00,0x21,0x34]
+# CHECK: daddu $1, $1, $9     # encoding: [0x2d,0x08,0x29,0x00]
+# CHECK: lwl   $8, 3($1)      # encoding: [0x03,0x00,0x28,0x88]
+# CHECK: lwr   $8, 0($1)      # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, -0x100010001($9)
+# CHECK: lui   $1, 65535      # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori   $1, $1, 65534  # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll  $1, $1, 16     # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 65534  # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll  $1, $1, 16     # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 65535  # encoding: [0xff,0xff,0x21,0x34]
+# CHECK: daddu $1, $1, $9     # encoding: [0x2d,0x08,0x29,0x00]
+# CHECK: lwl   $8, 3($1)      # encoding: [0x03,0x00,0x28,0x88]
+# CHECK: lwr   $8, 0($1)      # encoding: [0x00,0x00,0x28,0x98]
+
+  ulw $8, -0x1000100010001($9)
+# CHECK: lui   $1, 65534      # encoding: [0xfe,0xff,0x01,0x3c]
+# CHECK: ori   $1, $1, 65534  # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll  $1, $1, 16     # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 65534  # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: dsll  $1, $1, 16     # encoding: [0x38,0x0c,0x01,0x00]
+# CHECK: ori   $1, $1, 65535  # encoding: [0xff,0xff,0x21,0x34]
+# CHECK: daddu $1, $1, $9     # encoding: [0x2d,0x08,0x29,0x00]
+# CHECK: lwl   $8, 3($1)      # encoding: [0x03,0x00,0x28,0x88]
+# CHECK: lwr   $8, 0($1)      # encoding: [0x00,0x00,0x28,0x98]
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index 0bb9f17..03ea6c1 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -82,7 +82,9 @@ a:
         div.d     $f29,$f20,$f27
         div.s     $f4,$f5,$f15
         divu      $zero,$25,$15
+        dmfc0     $10, $16, 2          # CHECK: dmfc0 $10, $16, 2           # encoding: [0x40,0x2a,0x80,0x02]
         dmfc1     $12,$f13
+        dmtc0     $4, $10, 0           # CHECK: dmtc0 $4, $10, 0            # encoding: [0x40,0xa4,0x50,0x00]
         dmtc1     $s0,$f14
         dmult     $s7,$9
         dmultu    $a1,$a2
@@ -154,7 +156,7 @@ a:
         maddu     $24,$s2
         madd.d    $f18, $f22, $f26, $f20  # encoding: [0x4e,0xd4,0xd4,0xa1]
         madd.s    $f2, $f30, $f18, $f24   # encoding: [0x4f,0xd8,0x90,0xa0]
-        mfc0      $a2,$14,1
+        mfc0      $8,$15,1             # CHECK: mfc0 $8, $15, 1        # encoding: [0x40,0x08,0x78,0x01]
         mfc1      $a3,$f27
         mfhi      $s3
         mfhi      $sp
@@ -181,7 +183,7 @@ a:
         msubu     $15,$a1
         msub.d    $f10, $f2, $f30, $f18   # encoding: [0x4c,0x52,0xf2,0xa9]
         msub.s    $f12, $f18, $f10, $f16  # encoding: [0x4e,0x50,0x53,0x28]
-        mtc0      $9,$29,3
+        mtc0      $9,$15,1                # CHECK: mtc0 $9, $15, 1     # encoding: [0x40,0x89,0x78,0x01]
         mtc1      $s8,$f9
         mthi      $s1
         mtlo      $sp
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index aad8e28..37753ae 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -84,7 +84,9 @@ a:
         div.d     $f29,$f20,$f27
         div.s     $f4,$f5,$f15
         divu      $zero,$25,$15
+        dmfc0     $10,$16,2            # CHECK: dmfc0 $10, $16, 2           # encoding: [0x40,0x2a,0x80,0x02]
         dmfc1     $12,$f13
+        dmtc0     $4,$10,0             # CHECK: dmtc0 $4, $10, 0            # encoding: [0x40,0xa4,0x50,0x00]
         dmtc1     $s0,$f14
         dmult     $s7,$9
         dmultu    $a1,$a2
@@ -169,7 +171,7 @@ a:
         madd.s    $f1,$f31,$f19,$f25
         maddu     $s3,$gp
         maddu     $24,$s2
-        mfc0      $a2,$14,1
+        mfc0      $8,$15,1             # CHECK: mfc0 $8, $15, 1        # encoding: [0x40,0x08,0x78,0x01]
         mfc1      $a3,$f27
         mfhc1     $s8,$f24
         mfhi      $s3
@@ -196,7 +198,7 @@ a:
         msub      $s7,$k1
         msub.s    $f12,$f19,$f10,$f16
         msubu     $15,$a1
-        mtc0      $9,$29,3
+        mtc0      $9,$15,1             # CHECK: mtc0 $9, $15, 1     # encoding: [0x40,0x89,0x78,0x01]
         mtc1      $s8,$f9
         mthc1     $zero,$f16
         mthi      $s1
diff --git a/test/MC/Mips/mips64r3/valid.s b/test/MC/Mips/mips64r3/valid.s
index 3592055..c5d4848 100644
--- a/test/MC/Mips/mips64r3/valid.s
+++ b/test/MC/Mips/mips64r3/valid.s
@@ -84,7 +84,9 @@ a:
         div.d     $f29,$f20,$f27
         div.s     $f4,$f5,$f15
         divu      $zero,$25,$15
+        dmfc0     $10, $16, 2          # CHECK: dmfc0 $10, $16, 2           # encoding: [0x40,0x2a,0x80,0x02]
         dmfc1     $12,$f13
+        dmtc0     $4, $10, 0           # CHECK: dmtc0 $4, $10, 0            # encoding: [0x40,0xa4,0x50,0x00]
         dmtc1     $s0,$f14
         dmult     $s7,$9
         dmultu    $a1,$a2
@@ -169,7 +171,7 @@ a:
         madd.s    $f1,$f31,$f19,$f25
         maddu     $s3,$gp
         maddu     $24,$s2
-        mfc0      $a2,$14,1
+        mfc0      $8,$15,1             # CHECK: mfc0 $8, $15, 1        # encoding: [0x40,0x08,0x78,0x01]
         mfc1      $a3,$f27
         mfhc1     $s8,$f24
         mfhi      $s3
@@ -196,7 +198,7 @@ a:
         msub      $s7,$k1
         msub.s    $f12,$f19,$f10,$f16
         msubu     $15,$a1
-        mtc0      $9,$29,3
+        mtc0      $9,$15,1             # CHECK: mtc0 $9, $15, 1        # encoding: [0x40,0x89,0x78,0x01]
         mtc1      $s8,$f9
         mthc1     $zero,$f16
         mthi      $s1
diff --git a/test/MC/Mips/mips64r5/valid.s b/test/MC/Mips/mips64r5/valid.s
index 8446630..d4e52dc 100644
--- a/test/MC/Mips/mips64r5/valid.s
+++ b/test/MC/Mips/mips64r5/valid.s
@@ -84,7 +84,9 @@ a:
         div.d     $f29,$f20,$f27
         div.s     $f4,$f5,$f15
         divu      $zero,$25,$15
+        dmfc0     $10, $16, 2          # CHECK: dmfc0 $10, $16, 2           # encoding: [0x40,0x2a,0x80,0x02]
         dmfc1     $12,$f13
+        dmtc0     $4, $10, 0           # CHECK: dmtc0 $4, $10, 0            # encoding: [0x40,0xa4,0x50,0x00]
         dmtc1     $s0,$f14
         dmult     $s7,$9
         dmultu    $a1,$a2
@@ -169,7 +171,7 @@ a:
         madd.s    $f1,$f31,$f19,$f25
         maddu     $s3,$gp
         maddu     $24,$s2
-        mfc0      $a2,$14,1
+        mfc0      $8,$15,1             # CHECK: mfc0 $8, $15, 1        # encoding: [0x40,0x08,0x78,0x01]
         mfc1      $a3,$f27
         mfhc1     $s8,$f24
         mfhi      $s3
@@ -196,7 +198,7 @@ a:
         msub      $s7,$k1
         msub.s    $f12,$f19,$f10,$f16
         msubu     $15,$a1
-        mtc0      $9,$29,3
+        mtc0      $9,$15,1             # CHECK: mtc0 $9, $15, 1        # encoding: [0x40,0x89,0x78,0x01]
         mtc1      $s8,$f9
         mthc1     $zero,$f16
         mthi      $s1
diff --git a/test/MC/Mips/mips64r6/relocations.s b/test/MC/Mips/mips64r6/relocations.s
index 651ebfb..8374cb8 100644
--- a/test/MC/Mips/mips64r6/relocations.s
+++ b/test/MC/Mips/mips64r6/relocations.s
@@ -1,6 +1,6 @@
-# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r6 \
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 \
 # RUN:   | FileCheck %s -check-prefix=CHECK-FIXUP
-# RUN: llvm-mc %s -filetype=obj -triple=mips-unknown-linux -mcpu=mips64r6 \
+# RUN: llvm-mc %s -filetype=obj -triple=mips64-unknown-linux -mcpu=mips64r6 \
 # RUN:   | llvm-readobj -r | FileCheck %s -check-prefix=CHECK-ELF
 #------------------------------------------------------------------------------
 # Check that the assembler can handle the documented syntax for fixups.
@@ -10,22 +10,22 @@
 # CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC19_S2
 # CHECK-FIXUP: beqc     $5, $6, bar # encoding: [0x20,0xa6,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
-# CHECK-FIXUP:                      value: bar, kind: fixup_Mips_PC16
+# CHECK-FIXUP:                      value: bar-4, kind: fixup_Mips_PC16
 # CHECK-FIXUP: bnec $5, $6, bar # encoding: [0x60,0xa6,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
-# CHECK-FIXUP:                      value: bar, kind: fixup_Mips_PC16
+# CHECK-FIXUP:                      value: bar-4, kind: fixup_Mips_PC16
 # CHECK-FIXUP: beqzc $9, bar    # encoding: [0xd9,0b001AAAAA,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
-# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC21_S2
+# CHECK-FIXUP:                      value: bar-4, kind: fixup_MIPS_PC21_S2
 # CHECK-FIXUP: bnezc $9, bar    # encoding: [0xf9,0b001AAAAA,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
-# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC21_S2
+# CHECK-FIXUP:                      value: bar-4, kind: fixup_MIPS_PC21_S2
 # CHECK-FIXUP: balc  bar        # encoding: [0b111010AA,A,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
-# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC26_S2
+# CHECK-FIXUP:                      value: bar-4, kind: fixup_MIPS_PC26_S2
 # CHECK-FIXUP: bc    bar        # encoding: [0b110010AA,A,A,A]
 # CHECK-FIXUP:                  #   fixup A - offset: 0,
-# CHECK-FIXUP:                      value: bar, kind: fixup_MIPS_PC26_S2
+# CHECK-FIXUP:                      value: bar-4, kind: fixup_MIPS_PC26_S2
 # CHECK-FIXUP: aluipc $2, %pcrel_hi(bar)    # encoding: [0xec,0x5f,A,A]
 # CHECK-FIXUP:                              #   fixup A - offset: 0,
 # CHECK-FIXUP:                                  value: bar@PCREL_HI16,
@@ -48,18 +48,18 @@
 # Check that the appropriate relocations were created.
 #------------------------------------------------------------------------------
 # CHECK-ELF: Relocations [
-# CHECK-ELF:     0x0 R_MIPS_PC19_S2 bar 0x0
-# CHECK-ELF:     0x4 R_MIPS_PC16 bar 0x0
-# CHECK-ELF:     0x8 R_MIPS_PC16 bar 0x0
-# CHECK-ELF:     0xC R_MIPS_PC21_S2 bar 0x0
-# CHECK-ELF:     0x10 R_MIPS_PC21_S2 bar 0x0
-# CHECK-ELF:     0x14 R_MIPS_PC26_S2 bar 0x0
-# CHECK-ELF:     0x18 R_MIPS_PC26_S2 bar 0x0
-# CHECK-ELF:     0x1C R_MIPS_PCHI16 bar 0x0
-# CHECK-ELF:     0x20 R_MIPS_PCLO16 bar 0x0
-# CHECK-ELF:     0x24 R_MIPS_PC18_S3 bar 0x0
-# CHECK-ELF:     0x28 R_MIPS_PC19_S2 bar 0x0
-# CHECK-ELF:     0x2C R_MIPS_PC19_S2 bar 0x0
+# CHECK-ELF:     0x0 R_MIPS_PC19_S2/R_MIPS_NONE/R_MIPS_NONE bar 0x0
+# CHECK-ELF:     0x4 R_MIPS_PC16/R_MIPS_NONE/R_MIPS_NONE bar 0xFFFFFFFFFFFFFFFC
+# CHECK-ELF:     0x8 R_MIPS_PC16/R_MIPS_NONE/R_MIPS_NONE bar 0xFFFFFFFFFFFFFFFC
+# CHECK-ELF:     0xC R_MIPS_PC21_S2/R_MIPS_NONE/R_MIPS_NONE bar 0xFFFFFFFFFFFFFFFC
+# CHECK-ELF:     0x10 R_MIPS_PC21_S2/R_MIPS_NONE/R_MIPS_NONE bar 0xFFFFFFFFFFFFFFFC
+# CHECK-ELF:     0x14 R_MIPS_PC26_S2/R_MIPS_NONE/R_MIPS_NONE bar 0xFFFFFFFFFFFFFFFC
+# CHECK-ELF:     0x18 R_MIPS_PC26_S2/R_MIPS_NONE/R_MIPS_NONE bar 0xFFFFFFFFFFFFFFFC
+# CHECK-ELF:     0x1C R_MIPS_PCHI16/R_MIPS_NONE/R_MIPS_NONE bar 0x0
+# CHECK-ELF:     0x20 R_MIPS_PCLO16/R_MIPS_NONE/R_MIPS_NONE bar 0x0
+# CHECK-ELF:     0x24 R_MIPS_PC18_S3/R_MIPS_NONE/R_MIPS_NONE bar 0x0
+# CHECK-ELF:     0x28 R_MIPS_PC19_S2/R_MIPS_NONE/R_MIPS_NONE bar 0x0
+# CHECK-ELF:     0x2C R_MIPS_PC19_S2/R_MIPS_NONE/R_MIPS_NONE bar 0x0
 # CHECK-ELF: ]
 
   addiupc   $2,bar
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
index cdcb50b..3dc771a8 100644
--- a/test/MC/Mips/mips64r6/valid.s
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -117,8 +117,10 @@ a:
         div     $2,$3,$4         # CHECK: div $2, $3, $4   # encoding: [0x00,0x64,0x10,0x9a]
         divu    $2,$3,$4         # CHECK: divu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x9b]
         dlsa    $2, $3, $4, 3    # CHECK: dlsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xd5]
+        dmfc0   $10, $16, 2      # CHECK: dmfc0 $10, $16, 2  # encoding: [0x40,0x2a,0x80,0x02]
         dmod    $2,$3,$4         # CHECK: dmod $2, $3, $4  # encoding: [0x00,0x64,0x10,0xde]
         dmodu   $2,$3,$4         # CHECK: dmodu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdf]
+        dmtc0   $4, $10, 0       # CHECK: dmtc0 $4, $10, 0 # encoding: [0x40,0xa4,0x50,0x00]
         dmuh    $2,$3,$4         # CHECK: dmuh $2, $3, $4  # encoding: [0x00,0x64,0x10,0xdc]
         dmuhu   $2,$3,$4         # CHECK: dmuhu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdd]
         dmul    $2,$3,$4         # CHECK: dmul $2, $3, $4  # encoding: [0x00,0x64,0x10,0x9c]
@@ -158,8 +160,10 @@ a:
         min.s   $f0, $f2, $f4    # CHECK: min.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1c]
         mina.d  $f0, $f2, $f4    # CHECK: mina.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1e]
         mina.s  $f0, $f2, $f4    # CHECK: mina.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1e]
+        mfc0    $8,$15,1         # CHECK: mfc0 $8, $15, 1      # encoding: [0x40,0x08,0x78,0x01]
         mod     $2,$3,$4         # CHECK: mod $2, $3, $4   # encoding: [0x00,0x64,0x10,0xda]
         modu    $2,$3,$4         # CHECK: modu $2, $3, $4  # encoding: [0x00,0x64,0x10,0xdb]
+        mtc0    $9,$15,1         # CHECK: mtc0 $9, $15, 1        # encoding: [0x40,0x89,0x78,0x01]
         msubf.d $f2,$f3,$f4      # CHECK: msubf.d $f2, $f3, $f4  # encoding: [0x46,0x24,0x18,0x99]
         msubf.s $f2,$f3,$f4      # CHECK: msubf.s $f2, $f3, $f4  # encoding: [0x46,0x04,0x18,0x99]
         muh     $2,$3,$4         # CHECK: muh $2, $3, $4   # encoding: [0x00,0x64,0x10,0xd8]
diff --git a/test/MC/Mips/mips_abi_flags_xx.s b/test/MC/Mips/mips_abi_flags_xx.s
index cd6c9de..349b70d 100644
--- a/test/MC/Mips/mips_abi_flags_xx.s
+++ b/test/MC/Mips/mips_abi_flags_xx.s
@@ -2,15 +2,15 @@
 # RUN:   FileCheck %s -check-prefix=CHECK-ASM
 #
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32 -filetype=obj -o - | \
-# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations -mips-abi-flags - | \
 # RUN:     FileCheck %s -check-prefix=CHECK-OBJ -check-prefix=CHECK-OBJ-R1
 
 # RUN: llvm-mc /dev/null -arch=mips -mcpu=mips32 -mattr=fpxx -filetype=obj -o - | \
-# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations -mips-abi-flags - | \
 # RUN:     FileCheck %s -check-prefix=CHECK-OBJ -check-prefix=CHECK-OBJ-R1
 
 # RUN: llvm-mc /dev/null -arch=mips -mcpu=mips32r6 -mattr=fpxx -filetype=obj -o - | \
-# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations -mips-abi-flags - | \
 # RUN:     FileCheck %s -check-prefix=CHECK-OBJ -check-prefix=CHECK-OBJ-R6
 
 # CHECK-ASM: .module fp=xx
@@ -31,12 +31,23 @@
 # CHECK-OBJ:         EntrySize: 24
 # CHECK-OBJ:         Relocations [
 # CHECK-OBJ:         ]
-# CHECK-OBJ:         SectionData (
-# CHECK-OBJ-R1:        0000: 00002001 01010005 00000000 00000000  |.. .............|
-# CHECK-OBJ-R6:        0000: 00002006 01010005 00000000 00000000  |.. .............|
-# CHECK-OBJ:           0010: 00000001 00000000                    |........|
-# CHECK-OBJ:         )
 # CHECK-OBJ-LABEL: }
+# CHECK-OBJ:       MIPS ABI Flags {
+# CHECK-OBJ-NEXT:    Version: 0
+# CHECK-OBJ-R1-NEXT: ISA: {{MIPS32$}}
+# CHECK-OBJ-R6-NEXT: ISA: MIPS32r6
+# CHECK-OBJ-NEXT:    ISA Extension: None (0x0)
+# CHECK-OBJ-NEXT:    ASEs [ (0x0)
+# CHECK-OBJ-NEXT:    ]
+# CHECK-OBJ-NEXT:    FP ABI: Hard float (32-bit CPU, Any FPU) (0x5)
+# CHECK-OBJ-NEXT:    GPR size: 32
+# CHECK-OBJ-NEXT:    CPR1 size: 32
+# CHECK-OBJ-NEXT:    CPR2 size: 0
+# CHECK-OBJ-NEXT:    Flags 1 [ (0x1)
+# CHECK-OBJ-NEXT:      ODDSPREG (0x1)
+# CHECK-OBJ-NEXT:    ]
+# CHECK-OBJ-NEXT:    Flags 2: 0x0
+# CHECK-OBJ-NEXT:  }
 
         .module fp=xx
 
diff --git a/test/MC/Mips/mips_abi_flags_xx_set.s b/test/MC/Mips/mips_abi_flags_xx_set.s
index a548972..b31e295 100644
--- a/test/MC/Mips/mips_abi_flags_xx_set.s
+++ b/test/MC/Mips/mips_abi_flags_xx_set.s
@@ -2,7 +2,7 @@
 # RUN:   FileCheck %s -check-prefix=CHECK-ASM
 #
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32 -filetype=obj -o - | \
-# RUN:   llvm-readobj -sections -section-data -section-relocations - | \
+# RUN:   llvm-readobj -sections -section-data -section-relocations -mips-abi-flags - | \
 # RUN:     FileCheck %s -check-prefix=CHECK-OBJ
 
 # CHECK-ASM: .module fp=xx
@@ -24,11 +24,22 @@
 # CHECK-OBJ:         EntrySize: 24
 # CHECK-OBJ:         Relocations [
 # CHECK-OBJ:         ]
-# CHECK-OBJ:         SectionData (
-# CHECK-OBJ:           0000: 00002001 01010005 00000000 00000000  |.. .............|
-# CHECK-OBJ:           0010: 00000001 00000000                    |........|
-# CHECK-OBJ:         )
 # CHECK-OBJ-LABEL: }
+# CHECK-OBJ:       MIPS ABI Flags {
+# CHECK-OBJ-NEXT:    Version: 0
+# CHECK-OBJ-NEXT:    ISA: {{MIPS32$}}
+# CHECK-OBJ-NEXT:    ISA Extension: None (0x0)
+# CHECK-OBJ-NEXT:    ASEs [ (0x0)
+# CHECK-OBJ-NEXT:    ]
+# CHECK-OBJ-NEXT:    FP ABI: Hard float (32-bit CPU, Any FPU) (0x5)
+# CHECK-OBJ-NEXT:    GPR size: 32
+# CHECK-OBJ-NEXT:    CPR1 size: 32
+# CHECK-OBJ-NEXT:    CPR2 size: 0
+# CHECK-OBJ-NEXT:    Flags 1 [ (0x1)
+# CHECK-OBJ-NEXT:      ODDSPREG (0x1)
+# CHECK-OBJ-NEXT:    ]
+# CHECK-OBJ-NEXT:    Flags 2: 0x0
+# CHECK-OBJ-NEXT:  }
 
         .module fp=xx
         .set    fp=64
diff --git a/test/MC/Mips/module-hardfloat.s b/test/MC/Mips/module-hardfloat.s
new file mode 100644
index 0000000..51f7248
--- /dev/null
+++ b/test/MC/Mips/module-hardfloat.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -filetype=obj -o - | \
+# RUN:   llvm-readobj -mips-abi-flags - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ
+
+# CHECK-ASM: .module hardfloat
+
+# Check if the MIPS.abiflags section was correctly emitted:
+# CHECK-OBJ: MIPS ABI Flags {
+# CHECK-OBJ:   FP ABI: Hard float (32-bit CPU, Any FPU) (0x5)
+# CHECK-OBJ:   CPR1 size: 32
+# CHECK-OBJ:   Flags 1 [ (0x1)
+# CHECK-OBJ:     ODDSPREG (0x1)
+# CHECK-OBJ:   ]
+# CHECK-OBJ: }
+
+  .module fp=xx
+  .module oddspreg
+  .module softfloat
+  .module hardfloat
+
+# FIXME: Test should include gnu_attributes directive when implemented.
+#        An explicit .gnu_attribute must be checked against the effective
+#        command line options and any inconsistencies reported via a warning.
diff --git a/test/MC/Mips/module-softfloat.s b/test/MC/Mips/module-softfloat.s
new file mode 100644
index 0000000..18559c5
--- /dev/null
+++ b/test/MC/Mips/module-softfloat.s
@@ -0,0 +1,20 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 | \
+# RUN:   FileCheck %s -check-prefix=CHECK-ASM
+#
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -filetype=obj -o - | \
+# RUN:   llvm-readobj -mips-abi-flags - | \
+# RUN:     FileCheck %s -check-prefix=CHECK-OBJ
+
+# CHECK-ASM: .module softfloat
+
+# Check if the MIPS.abiflags section was correctly emitted:
+# CHECK-OBJ: MIPS ABI Flags {
+# CHECK-OBJ:   FP ABI: Soft float (0x3)
+# CHECK-OBJ:   CPR1 size: 0
+# CHECK-OBJ: }
+
+  .module softfloat
+
+# FIXME: Test should include gnu_attributes directive when implemented.
+#        An explicit .gnu_attribute must be checked against the effective
+#        command line options and any inconsistencies reported via a warning.
diff --git a/test/MC/Mips/relocation.s b/test/MC/Mips/relocation.s
index 3a5f5a9..f8030d1 100644
--- a/test/MC/Mips/relocation.s
+++ b/test/MC/Mips/relocation.s
@@ -171,12 +171,12 @@
         beqzc $2, foo                      // RELOC: R_MIPS_PC21_S2 foo
                                            // ENCBE: beqzc $2, foo # encoding: [0xd8,0b010AAAAA,A,A]
                                            // ENCLE: beqzc $2, foo # encoding: [A,A,0b010AAAAA,0xd8]
-                                           // FIXUP: # fixup A - offset: 0, value: foo, kind: fixup_MIPS_PC21_S2
+                                           // FIXUP: # fixup A - offset: 0, value: foo-4, kind: fixup_MIPS_PC21_S2
 
         bc foo                             // RELOC: R_MIPS_PC26_S2 foo
                                            // ENCBE: bc foo # encoding: [0b110010AA,A,A,A]
                                            // ENCLE: bc foo # encoding: [A,A,A,0b110010AA]
-                                           // FIXUP: # fixup A - offset: 0, value: foo, kind: fixup_MIPS_PC26_S2
+                                           // FIXUP: # fixup A - offset: 0, value: foo-4, kind: fixup_MIPS_PC26_S2
 
         .set mips64r6
         ldpc $2, foo                       // RELOC: R_MIPS_PC18_S3 foo
diff --git a/test/MC/Mips/set-nomacro.s b/test/MC/Mips/set-nomacro.s
index 00d6b21..3f82f81 100644
--- a/test/MC/Mips/set-nomacro.s
+++ b/test/MC/Mips/set-nomacro.s
@@ -60,6 +60,13 @@
   bgtu $0, $8, local_label
   bgtu $0, $0, local_label
 
+  ulhu $5, 0
+
+  ulw $8, 2
+  ulw $8, 0x8000
+  ulw $8, 2($9)
+  ulw $8, 0x8000($9)
+
   add $4, $5, $6
 
   .set noreorder
@@ -168,5 +175,17 @@
   bgtu $0, $0, local_label
 # CHECK-NOT: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
 
+  ulhu $5, 0
+# CHECK: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
+
+  ulw $8, 2
+# CHECK-NOT: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
+  ulw $8, 0x8000
+# CHECK: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
+  ulw $8, 2($9)
+# CHECK-NOT: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
+  ulw $8, 0x8000($9)
+# CHECK: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
+
   add $4, $5, $6
 # CHECK-NOT: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
diff --git a/test/MC/Mips/set-oddspreg-nooddspreg-error.s b/test/MC/Mips/set-oddspreg-nooddspreg-error.s
new file mode 100644
index 0000000..5fb1308
--- /dev/null
+++ b/test/MC/Mips/set-oddspreg-nooddspreg-error.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32 -mattr=+nooddspreg 2>%t1
+# RUN: FileCheck %s < %t1
+
+  .set oddspreg
+  sub.s $f1, $f2, $f2
+  # CHECK-NOT: :[[@LINE-1]]:{{[0-9]+}}: error: -mno-odd-spreg prohibits the use of odd FPU registers
+
+  .set nooddspreg
+  sub.s $f1, $f2, $f2
+  # CHECK: :[[@LINE-1]]:9: error: -mno-odd-spreg prohibits the use of odd FPU registers
diff --git a/test/MC/Mips/set-oddspreg-nooddspreg.s b/test/MC/Mips/set-oddspreg-nooddspreg.s
new file mode 100644
index 0000000..a057c48
--- /dev/null
+++ b/test/MC/Mips/set-oddspreg-nooddspreg.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+nooddspreg | \
+# RUN:   FileCheck %s
+
+  .set oddspreg
+  sub.s $f1, $f2, $f2
+  .set nooddspreg
+
+# CHECK: .set oddspreg
+# CHECK: sub.s $f1, $f2, $f2
+# CHECK: .set nooddspreg
diff --git a/test/MC/Mips/update-module-level-options.s b/test/MC/Mips/update-module-level-options.s
new file mode 100644
index 0000000..3d6e97c
--- /dev/null
+++ b/test/MC/Mips/update-module-level-options.s
@@ -0,0 +1,14 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+fp64,-nooddspreg 2>&1 | \
+# RUN:   FileCheck %s
+
+  .module nooddspreg
+  add.s $f1, $f2, $f4
+# CHECK: :[[@LINE-1]]:9: error: -mno-odd-spreg prohibits the use of odd FPU registers
+
+  .set oddspreg
+  add.s $f1, $f2, $f4
+# CHECK-NOT: :[[@LINE-1]]:{{[0-9]+}}: error: -mno-odd-spreg prohibits the use of odd FPU registers
+
+  .set mips0
+  add.s $f1, $f2, $f4
+# CHECK: :[[@LINE-1]]:9: error: -mno-odd-spreg prohibits the use of odd FPU registers
diff --git a/test/MC/PowerPC/ppc64-encoding-vmx.s b/test/MC/PowerPC/ppc64-encoding-vmx.s
index 5c62d2a..d8825bf 100644
--- a/test/MC/PowerPC/ppc64-encoding-vmx.s
+++ b/test/MC/PowerPC/ppc64-encoding-vmx.s
@@ -1,5 +1,5 @@
 
-# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s 
 # RUN: llvm-mc -triple powerpc64le-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s
 
 # Vector facility
@@ -110,7 +110,13 @@
 # CHECK-BE: vmrglw 2, 3, 4                  # encoding: [0x10,0x43,0x21,0x8c]
 # CHECK-LE: vmrglw 2, 3, 4                  # encoding: [0x8c,0x21,0x43,0x10]
             vmrglw 2, 3, 4
-
+# CHECK-BE: vmrgew 2, 3, 4                  # encoding: [0x10,0x43,0x27,0x8c]
+# CHECK-LE: vmrgew 2, 3, 4                  # encoding: [0x8c,0x27,0x43,0x10]
+            vmrgew 2, 3, 4
+# CHECK-BE: vmrgow 2, 3, 4                  # encoding: [0x10,0x43,0x26,0x8c]
+# CHECK-LE: vmrgow 2, 3, 4                  # encoding: [0x8c,0x26,0x43,0x10]
+            vmrgow 2, 3, 4
+	
 # CHECK-BE: vspltb 2, 3, 1                  # encoding: [0x10,0x41,0x1a,0x0c]
 # CHECK-LE: vspltb 2, 3, 1                  # encoding: [0x0c,0x1a,0x41,0x10]
             vspltb 2, 3, 1
diff --git a/test/MC/X86/AlignedBundling/misaligned-bundle-group.s b/test/MC/X86/AlignedBundling/misaligned-bundle-group.s
new file mode 100644
index 0000000..04b3374
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/misaligned-bundle-group.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - \
+# RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s
+# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mc-relax-all %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - \
+# RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s
+
+        .text
+foo:
+        .bundle_align_mode 5
+        push    %ebp # 1 byte
+        .align  16
+        .bundle_lock align_to_end
+# CHECK:            1:  nopw %cs:(%eax,%eax)
+# CHECK:            10: nopw %cs:(%eax,%eax)
+# CHECK-RELAX:      1f: nop
+# CHECK-RELAX:      20: nopw %cs:(%eax,%eax)
+# CHECK-RELAX:      2f: nopw %cs:(%eax,%eax)
+# CHECK-OPT:        1b: calll -4
+# CHECK-RELAX:      3b: calll -4
+        calll   bar # 5 bytes
+        .bundle_unlock
+        ret         # 1 byte
diff --git a/test/MC/X86/AlignedBundling/misaligned-bundle.s b/test/MC/X86/AlignedBundling/misaligned-bundle.s
new file mode 100644
index 0000000..08d6161
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/misaligned-bundle.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - \
+# RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s
+# RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mc-relax-all %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - \
+# RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s
+
+        .text
+foo:
+        .bundle_align_mode 5
+        push    %ebp          # 1 byte
+        .align  16
+# CHECK:            1:  nopw %cs:(%eax,%eax)
+# CHECK-RELAX:      10: nopw %cs:(%eax,%eax)
+# CHECK-RELAX:      1f: nop
+# CHECK-OPT:        10: movl $1, (%esp)
+# CHECK-RELAX:      20: movl $1, (%esp)
+        movl $0x1, (%esp)     # 7 bytes
+        movl $0x1, (%esp)     # 7 bytes
+# CHECK-OPT:        1e: nop
+        movl $0x2, 0x1(%esp)  # 8 bytes
+        movl $0x2, 0x1(%esp)  # 8 bytes
+# CHECK-RELAX:      3e: nop
+# CHECK-RELAX:      40: movl $2, 1(%esp)
+        movl $0x2, 0x1(%esp)  # 8 bytes
+        movl $0x2, (%esp)     # 7 bytes
+# CHECK-OPT:        3f: nop
+# CHECK-OPT:        40: movl $3, (%esp)
+        movl $0x3, (%esp)     # 7 bytes
+        movl $0x3, (%esp)     # 7 bytes
+        ret
diff --git a/test/MC/X86/AlignedBundling/rodata-section.s b/test/MC/X86/AlignedBundling/rodata-section.s
new file mode 100644
index 0000000..21f2c73
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/rodata-section.s
@@ -0,0 +1,30 @@
+# RUN: llvm-mc -triple=i686-nacl -filetype=obj %s -o - \
+# RUN:    | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+# RUN: llvm-mc -triple=i686-nacl -filetype=obj -mc-relax-all %s -o - \
+# RUN:    | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+
+  .bundle_align_mode 5
+  .text
+  .align	32, 0x90
+# CHECK: 0: movl $14, 8(%esp)
+  movl	$.str2, 8(%esp)
+# CHECK: 8: movl $7, 4(%esp)
+  movl	$.str1, 4(%esp)
+# CHECK: 10: movl $0, (%esp)
+  movl	$.str, (%esp)
+
+  .type	.str,@object
+  .section	.rodata,"a",@progbits
+.str:
+  .asciz	"hello1"
+  .size	.str, 7
+
+  .type	.str1,@object
+.str1:
+  .asciz	"hello2"
+  .size	.str1, 7
+
+  .type	.str2,@object
+.str2:
+  .asciz	"hello3"
+  .size	.str2, 7
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s
index e52dfac..079cb885 100644
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@@ -9290,227 +9290,4675 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
 // CHECK:  encoding: [0x62,0x71,0xce,0x00,0x7b,0xb2,0xf8,0xfb,0xff,0xff]
           vcvtusi2ssq -1032(%rdx), %xmm22, %xmm14
 
+// CHECK: vfmadd132ps %zmm25, %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0x92,0x5d,0x40,0x98,0xc9]
+          vfmadd132ps %zmm25, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps %zmm25, %zmm20, %zmm1 {%k1}
+// CHECK:  encoding: [0x62,0x92,0x5d,0x41,0x98,0xc9]
+          vfmadd132ps %zmm25, %zmm20, %zmm1 {%k1}
+
+// CHECK: vfmadd132ps %zmm25, %zmm20, %zmm1 {%k1} {z}
+// CHECK:  encoding: [0x62,0x92,0x5d,0xc1,0x98,0xc9]
+          vfmadd132ps %zmm25, %zmm20, %zmm1 {%k1} {z}
+
+// CHECK: vfmadd132ps {rn-sae}, %zmm25, %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0x92,0x5d,0x10,0x98,0xc9]
+          vfmadd132ps {rn-sae}, %zmm25, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps {ru-sae}, %zmm25, %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0x92,0x5d,0x50,0x98,0xc9]
+          vfmadd132ps {ru-sae}, %zmm25, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps {rd-sae}, %zmm25, %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0x92,0x5d,0x30,0x98,0xc9]
+          vfmadd132ps {rd-sae}, %zmm25, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps {rz-sae}, %zmm25, %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0x92,0x5d,0x70,0x98,0xc9]
+          vfmadd132ps {rz-sae}, %zmm25, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps (%rcx), %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x5d,0x40,0x98,0x09]
+          vfmadd132ps (%rcx), %zmm20, %zmm1
+
+// CHECK: vfmadd132ps 291(%rax,%r14,8), %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0xb2,0x5d,0x40,0x98,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd132ps 291(%rax,%r14,8), %zmm20, %zmm1
+
+// CHECK: vfmadd132ps (%rcx){1to16}, %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x5d,0x50,0x98,0x09]
+          vfmadd132ps (%rcx){1to16}, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps 8128(%rdx), %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x5d,0x40,0x98,0x4a,0x7f]
+          vfmadd132ps 8128(%rdx), %zmm20, %zmm1
+
+// CHECK: vfmadd132ps 8192(%rdx), %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x5d,0x40,0x98,0x8a,0x00,0x20,0x00,0x00]
+          vfmadd132ps 8192(%rdx), %zmm20, %zmm1
+
+// CHECK: vfmadd132ps -8192(%rdx), %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x5d,0x40,0x98,0x4a,0x80]
+          vfmadd132ps -8192(%rdx), %zmm20, %zmm1
+
+// CHECK: vfmadd132ps -8256(%rdx), %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x5d,0x40,0x98,0x8a,0xc0,0xdf,0xff,0xff]
+          vfmadd132ps -8256(%rdx), %zmm20, %zmm1
+
+// CHECK: vfmadd132ps 508(%rdx){1to16}, %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x5d,0x50,0x98,0x4a,0x7f]
+          vfmadd132ps 508(%rdx){1to16}, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps 512(%rdx){1to16}, %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x5d,0x50,0x98,0x8a,0x00,0x02,0x00,0x00]
+          vfmadd132ps 512(%rdx){1to16}, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps -512(%rdx){1to16}, %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x5d,0x50,0x98,0x4a,0x80]
+          vfmadd132ps -512(%rdx){1to16}, %zmm20, %zmm1
+
+// CHECK: vfmadd132ps -516(%rdx){1to16}, %zmm20, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x5d,0x50,0x98,0x8a,0xfc,0xfd,0xff,0xff]
+          vfmadd132ps -516(%rdx){1to16}, %zmm20, %zmm1
+
+// CHECK: vfmadd132pd %zmm21, %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x22,0xfd,0x40,0x98,0xd5]
+          vfmadd132pd %zmm21, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd %zmm21, %zmm16, %zmm26 {%k5}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x45,0x98,0xd5]
+          vfmadd132pd %zmm21, %zmm16, %zmm26 {%k5}
+
+// CHECK: vfmadd132pd %zmm21, %zmm16, %zmm26 {%k5} {z}
+// CHECK:  encoding: [0x62,0x22,0xfd,0xc5,0x98,0xd5]
+          vfmadd132pd %zmm21, %zmm16, %zmm26 {%k5} {z}
+
+// CHECK: vfmadd132pd {rn-sae}, %zmm21, %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x22,0xfd,0x10,0x98,0xd5]
+          vfmadd132pd {rn-sae}, %zmm21, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd {ru-sae}, %zmm21, %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x22,0xfd,0x50,0x98,0xd5]
+          vfmadd132pd {ru-sae}, %zmm21, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd {rd-sae}, %zmm21, %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x22,0xfd,0x30,0x98,0xd5]
+          vfmadd132pd {rd-sae}, %zmm21, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd {rz-sae}, %zmm21, %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x22,0xfd,0x70,0x98,0xd5]
+          vfmadd132pd {rz-sae}, %zmm21, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd (%rcx), %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xfd,0x40,0x98,0x11]
+          vfmadd132pd (%rcx), %zmm16, %zmm26
+
+// CHECK: vfmadd132pd 291(%rax,%r14,8), %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x22,0xfd,0x40,0x98,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd132pd 291(%rax,%r14,8), %zmm16, %zmm26
+
+// CHECK: vfmadd132pd (%rcx){1to8}, %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xfd,0x50,0x98,0x11]
+          vfmadd132pd (%rcx){1to8}, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd 8128(%rdx), %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xfd,0x40,0x98,0x52,0x7f]
+          vfmadd132pd 8128(%rdx), %zmm16, %zmm26
+
+// CHECK: vfmadd132pd 8192(%rdx), %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xfd,0x40,0x98,0x92,0x00,0x20,0x00,0x00]
+          vfmadd132pd 8192(%rdx), %zmm16, %zmm26
+
+// CHECK: vfmadd132pd -8192(%rdx), %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xfd,0x40,0x98,0x52,0x80]
+          vfmadd132pd -8192(%rdx), %zmm16, %zmm26
+
+// CHECK: vfmadd132pd -8256(%rdx), %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xfd,0x40,0x98,0x92,0xc0,0xdf,0xff,0xff]
+          vfmadd132pd -8256(%rdx), %zmm16, %zmm26
+
+// CHECK: vfmadd132pd 1016(%rdx){1to8}, %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xfd,0x50,0x98,0x52,0x7f]
+          vfmadd132pd 1016(%rdx){1to8}, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd 1024(%rdx){1to8}, %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xfd,0x50,0x98,0x92,0x00,0x04,0x00,0x00]
+          vfmadd132pd 1024(%rdx){1to8}, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd -1024(%rdx){1to8}, %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xfd,0x50,0x98,0x52,0x80]
+          vfmadd132pd -1024(%rdx){1to8}, %zmm16, %zmm26
+
+// CHECK: vfmadd132pd -1032(%rdx){1to8}, %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xfd,0x50,0x98,0x92,0xf8,0xfb,0xff,0xff]
+          vfmadd132pd -1032(%rdx){1to8}, %zmm16, %zmm26
+
+// CHECK: vfmadd213ps %zmm14, %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xc2,0x65,0x40,0xa8,0xe6]
+          vfmadd213ps %zmm14, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps %zmm14, %zmm19, %zmm20 {%k4}
+// CHECK:  encoding: [0x62,0xc2,0x65,0x44,0xa8,0xe6]
+          vfmadd213ps %zmm14, %zmm19, %zmm20 {%k4}
+
+// CHECK: vfmadd213ps %zmm14, %zmm19, %zmm20 {%k4} {z}
+// CHECK:  encoding: [0x62,0xc2,0x65,0xc4,0xa8,0xe6]
+          vfmadd213ps %zmm14, %zmm19, %zmm20 {%k4} {z}
+
+// CHECK: vfmadd213ps {rn-sae}, %zmm14, %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xc2,0x65,0x10,0xa8,0xe6]
+          vfmadd213ps {rn-sae}, %zmm14, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps {ru-sae}, %zmm14, %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xc2,0x65,0x50,0xa8,0xe6]
+          vfmadd213ps {ru-sae}, %zmm14, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps {rd-sae}, %zmm14, %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xc2,0x65,0x30,0xa8,0xe6]
+          vfmadd213ps {rd-sae}, %zmm14, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps {rz-sae}, %zmm14, %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xc2,0x65,0x70,0xa8,0xe6]
+          vfmadd213ps {rz-sae}, %zmm14, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps (%rcx), %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x65,0x40,0xa8,0x21]
+          vfmadd213ps (%rcx), %zmm19, %zmm20
+
+// CHECK: vfmadd213ps 291(%rax,%r14,8), %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xa2,0x65,0x40,0xa8,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd213ps 291(%rax,%r14,8), %zmm19, %zmm20
+
+// CHECK: vfmadd213ps (%rcx){1to16}, %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x65,0x50,0xa8,0x21]
+          vfmadd213ps (%rcx){1to16}, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps 8128(%rdx), %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x65,0x40,0xa8,0x62,0x7f]
+          vfmadd213ps 8128(%rdx), %zmm19, %zmm20
+
+// CHECK: vfmadd213ps 8192(%rdx), %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x65,0x40,0xa8,0xa2,0x00,0x20,0x00,0x00]
+          vfmadd213ps 8192(%rdx), %zmm19, %zmm20
+
+// CHECK: vfmadd213ps -8192(%rdx), %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x65,0x40,0xa8,0x62,0x80]
+          vfmadd213ps -8192(%rdx), %zmm19, %zmm20
+
+// CHECK: vfmadd213ps -8256(%rdx), %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x65,0x40,0xa8,0xa2,0xc0,0xdf,0xff,0xff]
+          vfmadd213ps -8256(%rdx), %zmm19, %zmm20
+
+// CHECK: vfmadd213ps 508(%rdx){1to16}, %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x65,0x50,0xa8,0x62,0x7f]
+          vfmadd213ps 508(%rdx){1to16}, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps 512(%rdx){1to16}, %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x65,0x50,0xa8,0xa2,0x00,0x02,0x00,0x00]
+          vfmadd213ps 512(%rdx){1to16}, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps -512(%rdx){1to16}, %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x65,0x50,0xa8,0x62,0x80]
+          vfmadd213ps -512(%rdx){1to16}, %zmm19, %zmm20
+
+// CHECK: vfmadd213ps -516(%rdx){1to16}, %zmm19, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x65,0x50,0xa8,0xa2,0xfc,0xfd,0xff,0xff]
+          vfmadd213ps -516(%rdx){1to16}, %zmm19, %zmm20
+
+// CHECK: vfmadd213pd %zmm25, %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0x82,0xfd,0x40,0xa8,0xd1]
+          vfmadd213pd %zmm25, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd %zmm25, %zmm16, %zmm18 {%k3}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x43,0xa8,0xd1]
+          vfmadd213pd %zmm25, %zmm16, %zmm18 {%k3}
+
+// CHECK: vfmadd213pd %zmm25, %zmm16, %zmm18 {%k3} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0xc3,0xa8,0xd1]
+          vfmadd213pd %zmm25, %zmm16, %zmm18 {%k3} {z}
+
+// CHECK: vfmadd213pd {rn-sae}, %zmm25, %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0x82,0xfd,0x10,0xa8,0xd1]
+          vfmadd213pd {rn-sae}, %zmm25, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd {ru-sae}, %zmm25, %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0x82,0xfd,0x50,0xa8,0xd1]
+          vfmadd213pd {ru-sae}, %zmm25, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd {rd-sae}, %zmm25, %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0x82,0xfd,0x30,0xa8,0xd1]
+          vfmadd213pd {rd-sae}, %zmm25, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd {rz-sae}, %zmm25, %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0x82,0xfd,0x70,0xa8,0xd1]
+          vfmadd213pd {rz-sae}, %zmm25, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd (%rcx), %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x11]
+          vfmadd213pd (%rcx), %zmm16, %zmm18
+
+// CHECK: vfmadd213pd 291(%rax,%r14,8), %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x40,0xa8,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd213pd 291(%rax,%r14,8), %zmm16, %zmm18
+
+// CHECK: vfmadd213pd (%rcx){1to8}, %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x11]
+          vfmadd213pd (%rcx){1to8}, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd 8128(%rdx), %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x52,0x7f]
+          vfmadd213pd 8128(%rdx), %zmm16, %zmm18
+
+// CHECK: vfmadd213pd 8192(%rdx), %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x92,0x00,0x20,0x00,0x00]
+          vfmadd213pd 8192(%rdx), %zmm16, %zmm18
+
+// CHECK: vfmadd213pd -8192(%rdx), %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x52,0x80]
+          vfmadd213pd -8192(%rdx), %zmm16, %zmm18
+
+// CHECK: vfmadd213pd -8256(%rdx), %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x40,0xa8,0x92,0xc0,0xdf,0xff,0xff]
+          vfmadd213pd -8256(%rdx), %zmm16, %zmm18
+
+// CHECK: vfmadd213pd 1016(%rdx){1to8}, %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x52,0x7f]
+          vfmadd213pd 1016(%rdx){1to8}, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd 1024(%rdx){1to8}, %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x92,0x00,0x04,0x00,0x00]
+          vfmadd213pd 1024(%rdx){1to8}, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd -1024(%rdx){1to8}, %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x52,0x80]
+          vfmadd213pd -1024(%rdx){1to8}, %zmm16, %zmm18
+
+// CHECK: vfmadd213pd -1032(%rdx){1to8}, %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x50,0xa8,0x92,0xf8,0xfb,0xff,0xff]
+          vfmadd213pd -1032(%rdx){1to8}, %zmm16, %zmm18
+
+// CHECK: vfmadd231ps %zmm25, %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x02,0x1d,0x40,0xb8,0xd9]
+          vfmadd231ps %zmm25, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps %zmm25, %zmm28, %zmm27 {%k3}
+// CHECK:  encoding: [0x62,0x02,0x1d,0x43,0xb8,0xd9]
+          vfmadd231ps %zmm25, %zmm28, %zmm27 {%k3}
+
+// CHECK: vfmadd231ps %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK:  encoding: [0x62,0x02,0x1d,0xc3,0xb8,0xd9]
+          vfmadd231ps %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd231ps {rn-sae}, %zmm25, %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x02,0x1d,0x10,0xb8,0xd9]
+          vfmadd231ps {rn-sae}, %zmm25, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps {ru-sae}, %zmm25, %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x02,0x1d,0x50,0xb8,0xd9]
+          vfmadd231ps {ru-sae}, %zmm25, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps {rd-sae}, %zmm25, %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x02,0x1d,0x30,0xb8,0xd9]
+          vfmadd231ps {rd-sae}, %zmm25, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps {rz-sae}, %zmm25, %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x02,0x1d,0x70,0xb8,0xd9]
+          vfmadd231ps {rz-sae}, %zmm25, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps (%rcx), %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x1d,0x40,0xb8,0x19]
+          vfmadd231ps (%rcx), %zmm28, %zmm27
+
+// CHECK: vfmadd231ps 291(%rax,%r14,8), %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x22,0x1d,0x40,0xb8,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd231ps 291(%rax,%r14,8), %zmm28, %zmm27
+
+// CHECK: vfmadd231ps (%rcx){1to16}, %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x1d,0x50,0xb8,0x19]
+          vfmadd231ps (%rcx){1to16}, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps 8128(%rdx), %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x1d,0x40,0xb8,0x5a,0x7f]
+          vfmadd231ps 8128(%rdx), %zmm28, %zmm27
+
+// CHECK: vfmadd231ps 8192(%rdx), %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x1d,0x40,0xb8,0x9a,0x00,0x20,0x00,0x00]
+          vfmadd231ps 8192(%rdx), %zmm28, %zmm27
+
+// CHECK: vfmadd231ps -8192(%rdx), %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x1d,0x40,0xb8,0x5a,0x80]
+          vfmadd231ps -8192(%rdx), %zmm28, %zmm27
+
+// CHECK: vfmadd231ps -8256(%rdx), %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x1d,0x40,0xb8,0x9a,0xc0,0xdf,0xff,0xff]
+          vfmadd231ps -8256(%rdx), %zmm28, %zmm27
+
+// CHECK: vfmadd231ps 508(%rdx){1to16}, %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x1d,0x50,0xb8,0x5a,0x7f]
+          vfmadd231ps 508(%rdx){1to16}, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps 512(%rdx){1to16}, %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x1d,0x50,0xb8,0x9a,0x00,0x02,0x00,0x00]
+          vfmadd231ps 512(%rdx){1to16}, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps -512(%rdx){1to16}, %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x1d,0x50,0xb8,0x5a,0x80]
+          vfmadd231ps -512(%rdx){1to16}, %zmm28, %zmm27
+
+// CHECK: vfmadd231ps -516(%rdx){1to16}, %zmm28, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x1d,0x50,0xb8,0x9a,0xfc,0xfd,0xff,0xff]
+          vfmadd231ps -516(%rdx){1to16}, %zmm28, %zmm27
+
+// CHECK: vfmadd231pd %zmm9, %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x42,0xcd,0x48,0xb8,0xf1]
+          vfmadd231pd %zmm9, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd %zmm9, %zmm6, %zmm30 {%k4}
+// CHECK:  encoding: [0x62,0x42,0xcd,0x4c,0xb8,0xf1]
+          vfmadd231pd %zmm9, %zmm6, %zmm30 {%k4}
+
+// CHECK: vfmadd231pd %zmm9, %zmm6, %zmm30 {%k4} {z}
+// CHECK:  encoding: [0x62,0x42,0xcd,0xcc,0xb8,0xf1]
+          vfmadd231pd %zmm9, %zmm6, %zmm30 {%k4} {z}
+
+// CHECK: vfmadd231pd {rn-sae}, %zmm9, %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x42,0xcd,0x18,0xb8,0xf1]
+          vfmadd231pd {rn-sae}, %zmm9, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd {ru-sae}, %zmm9, %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x42,0xcd,0x58,0xb8,0xf1]
+          vfmadd231pd {ru-sae}, %zmm9, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd {rd-sae}, %zmm9, %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x42,0xcd,0x38,0xb8,0xf1]
+          vfmadd231pd {rd-sae}, %zmm9, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd {rz-sae}, %zmm9, %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x42,0xcd,0x78,0xb8,0xf1]
+          vfmadd231pd {rz-sae}, %zmm9, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd (%rcx), %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x62,0xcd,0x48,0xb8,0x31]
+          vfmadd231pd (%rcx), %zmm6, %zmm30
+
+// CHECK: vfmadd231pd 291(%rax,%r14,8), %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x22,0xcd,0x48,0xb8,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd231pd 291(%rax,%r14,8), %zmm6, %zmm30
+
+// CHECK: vfmadd231pd (%rcx){1to8}, %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x62,0xcd,0x58,0xb8,0x31]
+          vfmadd231pd (%rcx){1to8}, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd 8128(%rdx), %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x62,0xcd,0x48,0xb8,0x72,0x7f]
+          vfmadd231pd 8128(%rdx), %zmm6, %zmm30
+
+// CHECK: vfmadd231pd 8192(%rdx), %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x62,0xcd,0x48,0xb8,0xb2,0x00,0x20,0x00,0x00]
+          vfmadd231pd 8192(%rdx), %zmm6, %zmm30
+
+// CHECK: vfmadd231pd -8192(%rdx), %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x62,0xcd,0x48,0xb8,0x72,0x80]
+          vfmadd231pd -8192(%rdx), %zmm6, %zmm30
+
+// CHECK: vfmadd231pd -8256(%rdx), %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x62,0xcd,0x48,0xb8,0xb2,0xc0,0xdf,0xff,0xff]
+          vfmadd231pd -8256(%rdx), %zmm6, %zmm30
+
+// CHECK: vfmadd231pd 1016(%rdx){1to8}, %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x62,0xcd,0x58,0xb8,0x72,0x7f]
+          vfmadd231pd 1016(%rdx){1to8}, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd 1024(%rdx){1to8}, %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x62,0xcd,0x58,0xb8,0xb2,0x00,0x04,0x00,0x00]
+          vfmadd231pd 1024(%rdx){1to8}, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd -1024(%rdx){1to8}, %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x62,0xcd,0x58,0xb8,0x72,0x80]
+          vfmadd231pd -1024(%rdx){1to8}, %zmm6, %zmm30
+
+// CHECK: vfmadd231pd -1032(%rdx){1to8}, %zmm6, %zmm30
+// CHECK:  encoding: [0x62,0x62,0xcd,0x58,0xb8,0xb2,0xf8,0xfb,0xff,0xff]
+          vfmadd231pd -1032(%rdx){1to8}, %zmm6, %zmm30
+
+// CHECK: vfmsub132ps %zmm16, %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xb2,0x15,0x48,0x9a,0xc8]
+          vfmsub132ps %zmm16, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps %zmm16, %zmm13, %zmm1 {%k4}
+// CHECK:  encoding: [0x62,0xb2,0x15,0x4c,0x9a,0xc8]
+          vfmsub132ps %zmm16, %zmm13, %zmm1 {%k4}
+
+// CHECK: vfmsub132ps %zmm16, %zmm13, %zmm1 {%k4} {z}
+// CHECK:  encoding: [0x62,0xb2,0x15,0xcc,0x9a,0xc8]
+          vfmsub132ps %zmm16, %zmm13, %zmm1 {%k4} {z}
+
+// CHECK: vfmsub132ps {rn-sae}, %zmm16, %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xb2,0x15,0x18,0x9a,0xc8]
+          vfmsub132ps {rn-sae}, %zmm16, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps {ru-sae}, %zmm16, %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xb2,0x15,0x58,0x9a,0xc8]
+          vfmsub132ps {ru-sae}, %zmm16, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps {rd-sae}, %zmm16, %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xb2,0x15,0x38,0x9a,0xc8]
+          vfmsub132ps {rd-sae}, %zmm16, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps {rz-sae}, %zmm16, %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xb2,0x15,0x78,0x9a,0xc8]
+          vfmsub132ps {rz-sae}, %zmm16, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps (%rcx), %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x15,0x48,0x9a,0x09]
+          vfmsub132ps (%rcx), %zmm13, %zmm1
+
+// CHECK: vfmsub132ps 291(%rax,%r14,8), %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xb2,0x15,0x48,0x9a,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub132ps 291(%rax,%r14,8), %zmm13, %zmm1
+
+// CHECK: vfmsub132ps (%rcx){1to16}, %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x15,0x58,0x9a,0x09]
+          vfmsub132ps (%rcx){1to16}, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps 8128(%rdx), %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x15,0x48,0x9a,0x4a,0x7f]
+          vfmsub132ps 8128(%rdx), %zmm13, %zmm1
+
+// CHECK: vfmsub132ps 8192(%rdx), %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x15,0x48,0x9a,0x8a,0x00,0x20,0x00,0x00]
+          vfmsub132ps 8192(%rdx), %zmm13, %zmm1
+
+// CHECK: vfmsub132ps -8192(%rdx), %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x15,0x48,0x9a,0x4a,0x80]
+          vfmsub132ps -8192(%rdx), %zmm13, %zmm1
+
+// CHECK: vfmsub132ps -8256(%rdx), %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x15,0x48,0x9a,0x8a,0xc0,0xdf,0xff,0xff]
+          vfmsub132ps -8256(%rdx), %zmm13, %zmm1
+
+// CHECK: vfmsub132ps 508(%rdx){1to16}, %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x15,0x58,0x9a,0x4a,0x7f]
+          vfmsub132ps 508(%rdx){1to16}, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps 512(%rdx){1to16}, %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x15,0x58,0x9a,0x8a,0x00,0x02,0x00,0x00]
+          vfmsub132ps 512(%rdx){1to16}, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps -512(%rdx){1to16}, %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x15,0x58,0x9a,0x4a,0x80]
+          vfmsub132ps -512(%rdx){1to16}, %zmm13, %zmm1
+
+// CHECK: vfmsub132ps -516(%rdx){1to16}, %zmm13, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0x15,0x58,0x9a,0x8a,0xfc,0xfd,0xff,0xff]
+          vfmsub132ps -516(%rdx){1to16}, %zmm13, %zmm1
+
+// CHECK: vfmsub132pd %zmm27, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0x82,0x9d,0x48,0x9a,0xf3]
+          vfmsub132pd %zmm27, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd %zmm27, %zmm12, %zmm22 {%k2}
+// CHECK:  encoding: [0x62,0x82,0x9d,0x4a,0x9a,0xf3]
+          vfmsub132pd %zmm27, %zmm12, %zmm22 {%k2}
+
+// CHECK: vfmsub132pd %zmm27, %zmm12, %zmm22 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0x9d,0xca,0x9a,0xf3]
+          vfmsub132pd %zmm27, %zmm12, %zmm22 {%k2} {z}
+
+// CHECK: vfmsub132pd {rn-sae}, %zmm27, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0x82,0x9d,0x18,0x9a,0xf3]
+          vfmsub132pd {rn-sae}, %zmm27, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd {ru-sae}, %zmm27, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0x82,0x9d,0x58,0x9a,0xf3]
+          vfmsub132pd {ru-sae}, %zmm27, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd {rd-sae}, %zmm27, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0x82,0x9d,0x38,0x9a,0xf3]
+          vfmsub132pd {rd-sae}, %zmm27, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd {rz-sae}, %zmm27, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0x82,0x9d,0x78,0x9a,0xf3]
+          vfmsub132pd {rz-sae}, %zmm27, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd (%rcx), %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x48,0x9a,0x31]
+          vfmsub132pd (%rcx), %zmm12, %zmm22
+
+// CHECK: vfmsub132pd 291(%rax,%r14,8), %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x48,0x9a,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub132pd 291(%rax,%r14,8), %zmm12, %zmm22
+
+// CHECK: vfmsub132pd (%rcx){1to8}, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x58,0x9a,0x31]
+          vfmsub132pd (%rcx){1to8}, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd 8128(%rdx), %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x48,0x9a,0x72,0x7f]
+          vfmsub132pd 8128(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsub132pd 8192(%rdx), %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x48,0x9a,0xb2,0x00,0x20,0x00,0x00]
+          vfmsub132pd 8192(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsub132pd -8192(%rdx), %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x48,0x9a,0x72,0x80]
+          vfmsub132pd -8192(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsub132pd -8256(%rdx), %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x48,0x9a,0xb2,0xc0,0xdf,0xff,0xff]
+          vfmsub132pd -8256(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsub132pd 1016(%rdx){1to8}, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x58,0x9a,0x72,0x7f]
+          vfmsub132pd 1016(%rdx){1to8}, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd 1024(%rdx){1to8}, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x58,0x9a,0xb2,0x00,0x04,0x00,0x00]
+          vfmsub132pd 1024(%rdx){1to8}, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd -1024(%rdx){1to8}, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x58,0x9a,0x72,0x80]
+          vfmsub132pd -1024(%rdx){1to8}, %zmm12, %zmm22
+
+// CHECK: vfmsub132pd -1032(%rdx){1to8}, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x58,0x9a,0xb2,0xf8,0xfb,0xff,0xff]
+          vfmsub132pd -1032(%rdx){1to8}, %zmm12, %zmm22
+
+// CHECK: vfmsub213ps %zmm10, %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xc2,0x4d,0x40,0xaa,0xf2]
+          vfmsub213ps %zmm10, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps %zmm10, %zmm22, %zmm22 {%k6}
+// CHECK:  encoding: [0x62,0xc2,0x4d,0x46,0xaa,0xf2]
+          vfmsub213ps %zmm10, %zmm22, %zmm22 {%k6}
+
+// CHECK: vfmsub213ps %zmm10, %zmm22, %zmm22 {%k6} {z}
+// CHECK:  encoding: [0x62,0xc2,0x4d,0xc6,0xaa,0xf2]
+          vfmsub213ps %zmm10, %zmm22, %zmm22 {%k6} {z}
+
+// CHECK: vfmsub213ps {rn-sae}, %zmm10, %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xc2,0x4d,0x10,0xaa,0xf2]
+          vfmsub213ps {rn-sae}, %zmm10, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps {ru-sae}, %zmm10, %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xc2,0x4d,0x50,0xaa,0xf2]
+          vfmsub213ps {ru-sae}, %zmm10, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps {rd-sae}, %zmm10, %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xc2,0x4d,0x30,0xaa,0xf2]
+          vfmsub213ps {rd-sae}, %zmm10, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps {rz-sae}, %zmm10, %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xc2,0x4d,0x70,0xaa,0xf2]
+          vfmsub213ps {rz-sae}, %zmm10, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps (%rcx), %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x40,0xaa,0x31]
+          vfmsub213ps (%rcx), %zmm22, %zmm22
+
+// CHECK: vfmsub213ps 291(%rax,%r14,8), %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x40,0xaa,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub213ps 291(%rax,%r14,8), %zmm22, %zmm22
+
+// CHECK: vfmsub213ps (%rcx){1to16}, %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x50,0xaa,0x31]
+          vfmsub213ps (%rcx){1to16}, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps 8128(%rdx), %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x40,0xaa,0x72,0x7f]
+          vfmsub213ps 8128(%rdx), %zmm22, %zmm22
+
+// CHECK: vfmsub213ps 8192(%rdx), %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x40,0xaa,0xb2,0x00,0x20,0x00,0x00]
+          vfmsub213ps 8192(%rdx), %zmm22, %zmm22
+
+// CHECK: vfmsub213ps -8192(%rdx), %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x40,0xaa,0x72,0x80]
+          vfmsub213ps -8192(%rdx), %zmm22, %zmm22
+
+// CHECK: vfmsub213ps -8256(%rdx), %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x40,0xaa,0xb2,0xc0,0xdf,0xff,0xff]
+          vfmsub213ps -8256(%rdx), %zmm22, %zmm22
+
+// CHECK: vfmsub213ps 508(%rdx){1to16}, %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x50,0xaa,0x72,0x7f]
+          vfmsub213ps 508(%rdx){1to16}, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps 512(%rdx){1to16}, %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x50,0xaa,0xb2,0x00,0x02,0x00,0x00]
+          vfmsub213ps 512(%rdx){1to16}, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps -512(%rdx){1to16}, %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x50,0xaa,0x72,0x80]
+          vfmsub213ps -512(%rdx){1to16}, %zmm22, %zmm22
+
+// CHECK: vfmsub213ps -516(%rdx){1to16}, %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x50,0xaa,0xb2,0xfc,0xfd,0xff,0xff]
+          vfmsub213ps -516(%rdx){1to16}, %zmm22, %zmm22
+
+// CHECK: vfmsub213pd %zmm4, %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x48,0xaa,0xec]
+          vfmsub213pd %zmm4, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd %zmm4, %zmm10, %zmm5 {%k1}
+// CHECK:  encoding: [0x62,0xf2,0xad,0x49,0xaa,0xec]
+          vfmsub213pd %zmm4, %zmm10, %zmm5 {%k1}
+
+// CHECK: vfmsub213pd %zmm4, %zmm10, %zmm5 {%k1} {z}
+// CHECK:  encoding: [0x62,0xf2,0xad,0xc9,0xaa,0xec]
+          vfmsub213pd %zmm4, %zmm10, %zmm5 {%k1} {z}
+
+// CHECK: vfmsub213pd {rn-sae}, %zmm4, %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x18,0xaa,0xec]
+          vfmsub213pd {rn-sae}, %zmm4, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd {ru-sae}, %zmm4, %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x58,0xaa,0xec]
+          vfmsub213pd {ru-sae}, %zmm4, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd {rd-sae}, %zmm4, %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x38,0xaa,0xec]
+          vfmsub213pd {rd-sae}, %zmm4, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd {rz-sae}, %zmm4, %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x78,0xaa,0xec]
+          vfmsub213pd {rz-sae}, %zmm4, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd (%rcx), %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x48,0xaa,0x29]
+          vfmsub213pd (%rcx), %zmm10, %zmm5
+
+// CHECK: vfmsub213pd 291(%rax,%r14,8), %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xb2,0xad,0x48,0xaa,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub213pd 291(%rax,%r14,8), %zmm10, %zmm5
+
+// CHECK: vfmsub213pd (%rcx){1to8}, %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x58,0xaa,0x29]
+          vfmsub213pd (%rcx){1to8}, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd 8128(%rdx), %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x48,0xaa,0x6a,0x7f]
+          vfmsub213pd 8128(%rdx), %zmm10, %zmm5
+
+// CHECK: vfmsub213pd 8192(%rdx), %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x48,0xaa,0xaa,0x00,0x20,0x00,0x00]
+          vfmsub213pd 8192(%rdx), %zmm10, %zmm5
+
+// CHECK: vfmsub213pd -8192(%rdx), %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x48,0xaa,0x6a,0x80]
+          vfmsub213pd -8192(%rdx), %zmm10, %zmm5
+
+// CHECK: vfmsub213pd -8256(%rdx), %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x48,0xaa,0xaa,0xc0,0xdf,0xff,0xff]
+          vfmsub213pd -8256(%rdx), %zmm10, %zmm5
+
+// CHECK: vfmsub213pd 1016(%rdx){1to8}, %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x58,0xaa,0x6a,0x7f]
+          vfmsub213pd 1016(%rdx){1to8}, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd 1024(%rdx){1to8}, %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x58,0xaa,0xaa,0x00,0x04,0x00,0x00]
+          vfmsub213pd 1024(%rdx){1to8}, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd -1024(%rdx){1to8}, %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x58,0xaa,0x6a,0x80]
+          vfmsub213pd -1024(%rdx){1to8}, %zmm10, %zmm5
+
+// CHECK: vfmsub213pd -1032(%rdx){1to8}, %zmm10, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xad,0x58,0xaa,0xaa,0xf8,0xfb,0xff,0xff]
+          vfmsub213pd -1032(%rdx){1to8}, %zmm10, %zmm5
+
+// CHECK: vfmsub231ps %zmm27, %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0x92,0x55,0x40,0xba,0xf3]
+          vfmsub231ps %zmm27, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps %zmm27, %zmm21, %zmm6 {%k3}
+// CHECK:  encoding: [0x62,0x92,0x55,0x43,0xba,0xf3]
+          vfmsub231ps %zmm27, %zmm21, %zmm6 {%k3}
+
+// CHECK: vfmsub231ps %zmm27, %zmm21, %zmm6 {%k3} {z}
+// CHECK:  encoding: [0x62,0x92,0x55,0xc3,0xba,0xf3]
+          vfmsub231ps %zmm27, %zmm21, %zmm6 {%k3} {z}
+
+// CHECK: vfmsub231ps {rn-sae}, %zmm27, %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0x92,0x55,0x10,0xba,0xf3]
+          vfmsub231ps {rn-sae}, %zmm27, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps {ru-sae}, %zmm27, %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0x92,0x55,0x50,0xba,0xf3]
+          vfmsub231ps {ru-sae}, %zmm27, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps {rd-sae}, %zmm27, %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0x92,0x55,0x30,0xba,0xf3]
+          vfmsub231ps {rd-sae}, %zmm27, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps {rz-sae}, %zmm27, %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0x92,0x55,0x70,0xba,0xf3]
+          vfmsub231ps {rz-sae}, %zmm27, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps (%rcx), %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0xf2,0x55,0x40,0xba,0x31]
+          vfmsub231ps (%rcx), %zmm21, %zmm6
+
+// CHECK: vfmsub231ps 291(%rax,%r14,8), %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0xb2,0x55,0x40,0xba,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub231ps 291(%rax,%r14,8), %zmm21, %zmm6
+
+// CHECK: vfmsub231ps (%rcx){1to16}, %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0xf2,0x55,0x50,0xba,0x31]
+          vfmsub231ps (%rcx){1to16}, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps 8128(%rdx), %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0xf2,0x55,0x40,0xba,0x72,0x7f]
+          vfmsub231ps 8128(%rdx), %zmm21, %zmm6
+
+// CHECK: vfmsub231ps 8192(%rdx), %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0xf2,0x55,0x40,0xba,0xb2,0x00,0x20,0x00,0x00]
+          vfmsub231ps 8192(%rdx), %zmm21, %zmm6
+
+// CHECK: vfmsub231ps -8192(%rdx), %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0xf2,0x55,0x40,0xba,0x72,0x80]
+          vfmsub231ps -8192(%rdx), %zmm21, %zmm6
+
+// CHECK: vfmsub231ps -8256(%rdx), %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0xf2,0x55,0x40,0xba,0xb2,0xc0,0xdf,0xff,0xff]
+          vfmsub231ps -8256(%rdx), %zmm21, %zmm6
+
+// CHECK: vfmsub231ps 508(%rdx){1to16}, %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0xf2,0x55,0x50,0xba,0x72,0x7f]
+          vfmsub231ps 508(%rdx){1to16}, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps 512(%rdx){1to16}, %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0xf2,0x55,0x50,0xba,0xb2,0x00,0x02,0x00,0x00]
+          vfmsub231ps 512(%rdx){1to16}, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps -512(%rdx){1to16}, %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0xf2,0x55,0x50,0xba,0x72,0x80]
+          vfmsub231ps -512(%rdx){1to16}, %zmm21, %zmm6
+
+// CHECK: vfmsub231ps -516(%rdx){1to16}, %zmm21, %zmm6
+// CHECK:  encoding: [0x62,0xf2,0x55,0x50,0xba,0xb2,0xfc,0xfd,0xff,0xff]
+          vfmsub231ps -516(%rdx){1to16}, %zmm21, %zmm6
+
+// CHECK: vfmsub231pd %zmm11, %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xd2,0x9d,0x48,0xba,0xeb]
+          vfmsub231pd %zmm11, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd %zmm11, %zmm12, %zmm5 {%k2}
+// CHECK:  encoding: [0x62,0xd2,0x9d,0x4a,0xba,0xeb]
+          vfmsub231pd %zmm11, %zmm12, %zmm5 {%k2}
+
+// CHECK: vfmsub231pd %zmm11, %zmm12, %zmm5 {%k2} {z}
+// CHECK:  encoding: [0x62,0xd2,0x9d,0xca,0xba,0xeb]
+          vfmsub231pd %zmm11, %zmm12, %zmm5 {%k2} {z}
+
+// CHECK: vfmsub231pd {rn-sae}, %zmm11, %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xd2,0x9d,0x18,0xba,0xeb]
+          vfmsub231pd {rn-sae}, %zmm11, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd {ru-sae}, %zmm11, %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xd2,0x9d,0x58,0xba,0xeb]
+          vfmsub231pd {ru-sae}, %zmm11, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd {rd-sae}, %zmm11, %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xd2,0x9d,0x38,0xba,0xeb]
+          vfmsub231pd {rd-sae}, %zmm11, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd {rz-sae}, %zmm11, %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xd2,0x9d,0x78,0xba,0xeb]
+          vfmsub231pd {rz-sae}, %zmm11, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd (%rcx), %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x9d,0x48,0xba,0x29]
+          vfmsub231pd (%rcx), %zmm12, %zmm5
+
+// CHECK: vfmsub231pd 291(%rax,%r14,8), %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xb2,0x9d,0x48,0xba,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub231pd 291(%rax,%r14,8), %zmm12, %zmm5
+
+// CHECK: vfmsub231pd (%rcx){1to8}, %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x9d,0x58,0xba,0x29]
+          vfmsub231pd (%rcx){1to8}, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd 8128(%rdx), %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x9d,0x48,0xba,0x6a,0x7f]
+          vfmsub231pd 8128(%rdx), %zmm12, %zmm5
+
+// CHECK: vfmsub231pd 8192(%rdx), %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x9d,0x48,0xba,0xaa,0x00,0x20,0x00,0x00]
+          vfmsub231pd 8192(%rdx), %zmm12, %zmm5
+
+// CHECK: vfmsub231pd -8192(%rdx), %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x9d,0x48,0xba,0x6a,0x80]
+          vfmsub231pd -8192(%rdx), %zmm12, %zmm5
+
+// CHECK: vfmsub231pd -8256(%rdx), %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x9d,0x48,0xba,0xaa,0xc0,0xdf,0xff,0xff]
+          vfmsub231pd -8256(%rdx), %zmm12, %zmm5
+
+// CHECK: vfmsub231pd 1016(%rdx){1to8}, %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x9d,0x58,0xba,0x6a,0x7f]
+          vfmsub231pd 1016(%rdx){1to8}, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd 1024(%rdx){1to8}, %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x9d,0x58,0xba,0xaa,0x00,0x04,0x00,0x00]
+          vfmsub231pd 1024(%rdx){1to8}, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd -1024(%rdx){1to8}, %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x9d,0x58,0xba,0x6a,0x80]
+          vfmsub231pd -1024(%rdx){1to8}, %zmm12, %zmm5
+
+// CHECK: vfmsub231pd -1032(%rdx){1to8}, %zmm12, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0x9d,0x58,0xba,0xaa,0xf8,0xfb,0xff,0xff]
+          vfmsub231pd -1032(%rdx){1to8}, %zmm12, %zmm5
+
+// CHECK: vfmaddsub132ps %zmm20, %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x32,0x35,0x48,0x96,0xd4]
+          vfmaddsub132ps %zmm20, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps %zmm20, %zmm9, %zmm10 {%k3}
+// CHECK:  encoding: [0x62,0x32,0x35,0x4b,0x96,0xd4]
+          vfmaddsub132ps %zmm20, %zmm9, %zmm10 {%k3}
+
+// CHECK: vfmaddsub132ps %zmm20, %zmm9, %zmm10 {%k3} {z}
+// CHECK:  encoding: [0x62,0x32,0x35,0xcb,0x96,0xd4]
+          vfmaddsub132ps %zmm20, %zmm9, %zmm10 {%k3} {z}
+
+// CHECK: vfmaddsub132ps {rn-sae}, %zmm20, %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x32,0x35,0x18,0x96,0xd4]
+          vfmaddsub132ps {rn-sae}, %zmm20, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps {ru-sae}, %zmm20, %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x32,0x35,0x58,0x96,0xd4]
+          vfmaddsub132ps {ru-sae}, %zmm20, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps {rd-sae}, %zmm20, %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x32,0x35,0x38,0x96,0xd4]
+          vfmaddsub132ps {rd-sae}, %zmm20, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps {rz-sae}, %zmm20, %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x32,0x35,0x78,0x96,0xd4]
+          vfmaddsub132ps {rz-sae}, %zmm20, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps (%rcx), %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x35,0x48,0x96,0x11]
+          vfmaddsub132ps (%rcx), %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps 291(%rax,%r14,8), %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x32,0x35,0x48,0x96,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub132ps 291(%rax,%r14,8), %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps (%rcx){1to16}, %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x35,0x58,0x96,0x11]
+          vfmaddsub132ps (%rcx){1to16}, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps 8128(%rdx), %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x35,0x48,0x96,0x52,0x7f]
+          vfmaddsub132ps 8128(%rdx), %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps 8192(%rdx), %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x35,0x48,0x96,0x92,0x00,0x20,0x00,0x00]
+          vfmaddsub132ps 8192(%rdx), %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps -8192(%rdx), %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x35,0x48,0x96,0x52,0x80]
+          vfmaddsub132ps -8192(%rdx), %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps -8256(%rdx), %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x35,0x48,0x96,0x92,0xc0,0xdf,0xff,0xff]
+          vfmaddsub132ps -8256(%rdx), %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps 508(%rdx){1to16}, %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x35,0x58,0x96,0x52,0x7f]
+          vfmaddsub132ps 508(%rdx){1to16}, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps 512(%rdx){1to16}, %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x35,0x58,0x96,0x92,0x00,0x02,0x00,0x00]
+          vfmaddsub132ps 512(%rdx){1to16}, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps -512(%rdx){1to16}, %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x35,0x58,0x96,0x52,0x80]
+          vfmaddsub132ps -512(%rdx){1to16}, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132ps -516(%rdx){1to16}, %zmm9, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x35,0x58,0x96,0x92,0xfc,0xfd,0xff,0xff]
+          vfmaddsub132ps -516(%rdx){1to16}, %zmm9, %zmm10
+
+// CHECK: vfmaddsub132pd %zmm21, %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xa2,0xb5,0x40,0x96,0xe5]
+          vfmaddsub132pd %zmm21, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd %zmm21, %zmm25, %zmm20 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0xb5,0x42,0x96,0xe5]
+          vfmaddsub132pd %zmm21, %zmm25, %zmm20 {%k2}
+
+// CHECK: vfmaddsub132pd %zmm21, %zmm25, %zmm20 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0xb5,0xc2,0x96,0xe5]
+          vfmaddsub132pd %zmm21, %zmm25, %zmm20 {%k2} {z}
+
+// CHECK: vfmaddsub132pd {rn-sae}, %zmm21, %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xa2,0xb5,0x10,0x96,0xe5]
+          vfmaddsub132pd {rn-sae}, %zmm21, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd {ru-sae}, %zmm21, %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xa2,0xb5,0x50,0x96,0xe5]
+          vfmaddsub132pd {ru-sae}, %zmm21, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd {rd-sae}, %zmm21, %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xa2,0xb5,0x30,0x96,0xe5]
+          vfmaddsub132pd {rd-sae}, %zmm21, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd {rz-sae}, %zmm21, %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xa2,0xb5,0x70,0x96,0xe5]
+          vfmaddsub132pd {rz-sae}, %zmm21, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd (%rcx), %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x40,0x96,0x21]
+          vfmaddsub132pd (%rcx), %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd 291(%rax,%r14,8), %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xa2,0xb5,0x40,0x96,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub132pd 291(%rax,%r14,8), %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd (%rcx){1to8}, %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x50,0x96,0x21]
+          vfmaddsub132pd (%rcx){1to8}, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd 8128(%rdx), %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x40,0x96,0x62,0x7f]
+          vfmaddsub132pd 8128(%rdx), %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd 8192(%rdx), %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x40,0x96,0xa2,0x00,0x20,0x00,0x00]
+          vfmaddsub132pd 8192(%rdx), %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd -8192(%rdx), %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x40,0x96,0x62,0x80]
+          vfmaddsub132pd -8192(%rdx), %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd -8256(%rdx), %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x40,0x96,0xa2,0xc0,0xdf,0xff,0xff]
+          vfmaddsub132pd -8256(%rdx), %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd 1016(%rdx){1to8}, %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x50,0x96,0x62,0x7f]
+          vfmaddsub132pd 1016(%rdx){1to8}, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd 1024(%rdx){1to8}, %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x50,0x96,0xa2,0x00,0x04,0x00,0x00]
+          vfmaddsub132pd 1024(%rdx){1to8}, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd -1024(%rdx){1to8}, %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x50,0x96,0x62,0x80]
+          vfmaddsub132pd -1024(%rdx){1to8}, %zmm25, %zmm20
+
+// CHECK: vfmaddsub132pd -1032(%rdx){1to8}, %zmm25, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x50,0x96,0xa2,0xf8,0xfb,0xff,0xff]
+          vfmaddsub132pd -1032(%rdx){1to8}, %zmm25, %zmm20
+
+// CHECK: vfmaddsub213ps %zmm28, %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0x82,0x3d,0x40,0xa6,0xcc]
+          vfmaddsub213ps %zmm28, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps %zmm28, %zmm24, %zmm17 {%k6}
+// CHECK:  encoding: [0x62,0x82,0x3d,0x46,0xa6,0xcc]
+          vfmaddsub213ps %zmm28, %zmm24, %zmm17 {%k6}
+
+// CHECK: vfmaddsub213ps %zmm28, %zmm24, %zmm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0x82,0x3d,0xc6,0xa6,0xcc]
+          vfmaddsub213ps %zmm28, %zmm24, %zmm17 {%k6} {z}
+
+// CHECK: vfmaddsub213ps {rn-sae}, %zmm28, %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0x82,0x3d,0x10,0xa6,0xcc]
+          vfmaddsub213ps {rn-sae}, %zmm28, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps {ru-sae}, %zmm28, %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0x82,0x3d,0x50,0xa6,0xcc]
+          vfmaddsub213ps {ru-sae}, %zmm28, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps {rd-sae}, %zmm28, %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0x82,0x3d,0x30,0xa6,0xcc]
+          vfmaddsub213ps {rd-sae}, %zmm28, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps {rz-sae}, %zmm28, %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0x82,0x3d,0x70,0xa6,0xcc]
+          vfmaddsub213ps {rz-sae}, %zmm28, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps (%rcx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x09]
+          vfmaddsub213ps (%rcx), %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps 291(%rax,%r14,8), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xa2,0x3d,0x40,0xa6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub213ps 291(%rax,%r14,8), %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps (%rcx){1to16}, %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x09]
+          vfmaddsub213ps (%rcx){1to16}, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps 8128(%rdx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x4a,0x7f]
+          vfmaddsub213ps 8128(%rdx), %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps 8192(%rdx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x8a,0x00,0x20,0x00,0x00]
+          vfmaddsub213ps 8192(%rdx), %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps -8192(%rdx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x4a,0x80]
+          vfmaddsub213ps -8192(%rdx), %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps -8256(%rdx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x40,0xa6,0x8a,0xc0,0xdf,0xff,0xff]
+          vfmaddsub213ps -8256(%rdx), %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps 508(%rdx){1to16}, %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x4a,0x7f]
+          vfmaddsub213ps 508(%rdx){1to16}, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps 512(%rdx){1to16}, %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x8a,0x00,0x02,0x00,0x00]
+          vfmaddsub213ps 512(%rdx){1to16}, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps -512(%rdx){1to16}, %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x4a,0x80]
+          vfmaddsub213ps -512(%rdx){1to16}, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213ps -516(%rdx){1to16}, %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x50,0xa6,0x8a,0xfc,0xfd,0xff,0xff]
+          vfmaddsub213ps -516(%rdx){1to16}, %zmm24, %zmm17
+
+// CHECK: vfmaddsub213pd %zmm10, %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x42,0xcd,0x48,0xa6,0xd2]
+          vfmaddsub213pd %zmm10, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd %zmm10, %zmm6, %zmm26 {%k6}
+// CHECK:  encoding: [0x62,0x42,0xcd,0x4e,0xa6,0xd2]
+          vfmaddsub213pd %zmm10, %zmm6, %zmm26 {%k6}
+
+// CHECK: vfmaddsub213pd %zmm10, %zmm6, %zmm26 {%k6} {z}
+// CHECK:  encoding: [0x62,0x42,0xcd,0xce,0xa6,0xd2]
+          vfmaddsub213pd %zmm10, %zmm6, %zmm26 {%k6} {z}
+
+// CHECK: vfmaddsub213pd {rn-sae}, %zmm10, %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x42,0xcd,0x18,0xa6,0xd2]
+          vfmaddsub213pd {rn-sae}, %zmm10, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd {ru-sae}, %zmm10, %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x42,0xcd,0x58,0xa6,0xd2]
+          vfmaddsub213pd {ru-sae}, %zmm10, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd {rd-sae}, %zmm10, %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x42,0xcd,0x38,0xa6,0xd2]
+          vfmaddsub213pd {rd-sae}, %zmm10, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd {rz-sae}, %zmm10, %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x42,0xcd,0x78,0xa6,0xd2]
+          vfmaddsub213pd {rz-sae}, %zmm10, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd (%rcx), %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x48,0xa6,0x11]
+          vfmaddsub213pd (%rcx), %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd 291(%rax,%r14,8), %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x22,0xcd,0x48,0xa6,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub213pd 291(%rax,%r14,8), %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd (%rcx){1to8}, %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x58,0xa6,0x11]
+          vfmaddsub213pd (%rcx){1to8}, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd 8128(%rdx), %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x48,0xa6,0x52,0x7f]
+          vfmaddsub213pd 8128(%rdx), %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd 8192(%rdx), %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x48,0xa6,0x92,0x00,0x20,0x00,0x00]
+          vfmaddsub213pd 8192(%rdx), %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd -8192(%rdx), %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x48,0xa6,0x52,0x80]
+          vfmaddsub213pd -8192(%rdx), %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd -8256(%rdx), %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x48,0xa6,0x92,0xc0,0xdf,0xff,0xff]
+          vfmaddsub213pd -8256(%rdx), %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd 1016(%rdx){1to8}, %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x58,0xa6,0x52,0x7f]
+          vfmaddsub213pd 1016(%rdx){1to8}, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd 1024(%rdx){1to8}, %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x58,0xa6,0x92,0x00,0x04,0x00,0x00]
+          vfmaddsub213pd 1024(%rdx){1to8}, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd -1024(%rdx){1to8}, %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x58,0xa6,0x52,0x80]
+          vfmaddsub213pd -1024(%rdx){1to8}, %zmm6, %zmm26
+
+// CHECK: vfmaddsub213pd -1032(%rdx){1to8}, %zmm6, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x58,0xa6,0x92,0xf8,0xfb,0xff,0xff]
+          vfmaddsub213pd -1032(%rdx){1to8}, %zmm6, %zmm26
+
+// CHECK: vfmaddsub231ps %zmm19, %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x32,0x55,0x40,0xb6,0xfb]
+          vfmaddsub231ps %zmm19, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps %zmm19, %zmm21, %zmm15 {%k6}
+// CHECK:  encoding: [0x62,0x32,0x55,0x46,0xb6,0xfb]
+          vfmaddsub231ps %zmm19, %zmm21, %zmm15 {%k6}
+
+// CHECK: vfmaddsub231ps %zmm19, %zmm21, %zmm15 {%k6} {z}
+// CHECK:  encoding: [0x62,0x32,0x55,0xc6,0xb6,0xfb]
+          vfmaddsub231ps %zmm19, %zmm21, %zmm15 {%k6} {z}
+
+// CHECK: vfmaddsub231ps {rn-sae}, %zmm19, %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x32,0x55,0x10,0xb6,0xfb]
+          vfmaddsub231ps {rn-sae}, %zmm19, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps {ru-sae}, %zmm19, %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x32,0x55,0x50,0xb6,0xfb]
+          vfmaddsub231ps {ru-sae}, %zmm19, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps {rd-sae}, %zmm19, %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x32,0x55,0x30,0xb6,0xfb]
+          vfmaddsub231ps {rd-sae}, %zmm19, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps {rz-sae}, %zmm19, %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x32,0x55,0x70,0xb6,0xfb]
+          vfmaddsub231ps {rz-sae}, %zmm19, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps (%rcx), %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x55,0x40,0xb6,0x39]
+          vfmaddsub231ps (%rcx), %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps 291(%rax,%r14,8), %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x32,0x55,0x40,0xb6,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub231ps 291(%rax,%r14,8), %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps (%rcx){1to16}, %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x55,0x50,0xb6,0x39]
+          vfmaddsub231ps (%rcx){1to16}, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps 8128(%rdx), %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x55,0x40,0xb6,0x7a,0x7f]
+          vfmaddsub231ps 8128(%rdx), %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps 8192(%rdx), %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x55,0x40,0xb6,0xba,0x00,0x20,0x00,0x00]
+          vfmaddsub231ps 8192(%rdx), %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps -8192(%rdx), %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x55,0x40,0xb6,0x7a,0x80]
+          vfmaddsub231ps -8192(%rdx), %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps -8256(%rdx), %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x55,0x40,0xb6,0xba,0xc0,0xdf,0xff,0xff]
+          vfmaddsub231ps -8256(%rdx), %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps 508(%rdx){1to16}, %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x55,0x50,0xb6,0x7a,0x7f]
+          vfmaddsub231ps 508(%rdx){1to16}, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps 512(%rdx){1to16}, %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x55,0x50,0xb6,0xba,0x00,0x02,0x00,0x00]
+          vfmaddsub231ps 512(%rdx){1to16}, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps -512(%rdx){1to16}, %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x55,0x50,0xb6,0x7a,0x80]
+          vfmaddsub231ps -512(%rdx){1to16}, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231ps -516(%rdx){1to16}, %zmm21, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x55,0x50,0xb6,0xba,0xfc,0xfd,0xff,0xff]
+          vfmaddsub231ps -516(%rdx){1to16}, %zmm21, %zmm15
+
+// CHECK: vfmaddsub231pd %zmm24, %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x12,0xa5,0x40,0xb6,0xc8]
+          vfmaddsub231pd %zmm24, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd %zmm24, %zmm27, %zmm9 {%k7}
+// CHECK:  encoding: [0x62,0x12,0xa5,0x47,0xb6,0xc8]
+          vfmaddsub231pd %zmm24, %zmm27, %zmm9 {%k7}
+
+// CHECK: vfmaddsub231pd %zmm24, %zmm27, %zmm9 {%k7} {z}
+// CHECK:  encoding: [0x62,0x12,0xa5,0xc7,0xb6,0xc8]
+          vfmaddsub231pd %zmm24, %zmm27, %zmm9 {%k7} {z}
+
+// CHECK: vfmaddsub231pd {rn-sae}, %zmm24, %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x12,0xa5,0x10,0xb6,0xc8]
+          vfmaddsub231pd {rn-sae}, %zmm24, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd {ru-sae}, %zmm24, %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x12,0xa5,0x50,0xb6,0xc8]
+          vfmaddsub231pd {ru-sae}, %zmm24, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd {rd-sae}, %zmm24, %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x12,0xa5,0x30,0xb6,0xc8]
+          vfmaddsub231pd {rd-sae}, %zmm24, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd {rz-sae}, %zmm24, %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x12,0xa5,0x70,0xb6,0xc8]
+          vfmaddsub231pd {rz-sae}, %zmm24, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd (%rcx), %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x72,0xa5,0x40,0xb6,0x09]
+          vfmaddsub231pd (%rcx), %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd 291(%rax,%r14,8), %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x32,0xa5,0x40,0xb6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub231pd 291(%rax,%r14,8), %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd (%rcx){1to8}, %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x72,0xa5,0x50,0xb6,0x09]
+          vfmaddsub231pd (%rcx){1to8}, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd 8128(%rdx), %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x72,0xa5,0x40,0xb6,0x4a,0x7f]
+          vfmaddsub231pd 8128(%rdx), %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd 8192(%rdx), %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x72,0xa5,0x40,0xb6,0x8a,0x00,0x20,0x00,0x00]
+          vfmaddsub231pd 8192(%rdx), %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd -8192(%rdx), %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x72,0xa5,0x40,0xb6,0x4a,0x80]
+          vfmaddsub231pd -8192(%rdx), %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd -8256(%rdx), %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x72,0xa5,0x40,0xb6,0x8a,0xc0,0xdf,0xff,0xff]
+          vfmaddsub231pd -8256(%rdx), %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd 1016(%rdx){1to8}, %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x72,0xa5,0x50,0xb6,0x4a,0x7f]
+          vfmaddsub231pd 1016(%rdx){1to8}, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd 1024(%rdx){1to8}, %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x72,0xa5,0x50,0xb6,0x8a,0x00,0x04,0x00,0x00]
+          vfmaddsub231pd 1024(%rdx){1to8}, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd -1024(%rdx){1to8}, %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x72,0xa5,0x50,0xb6,0x4a,0x80]
+          vfmaddsub231pd -1024(%rdx){1to8}, %zmm27, %zmm9
+
+// CHECK: vfmaddsub231pd -1032(%rdx){1to8}, %zmm27, %zmm9
+// CHECK:  encoding: [0x62,0x72,0xa5,0x50,0xb6,0x8a,0xf8,0xfb,0xff,0xff]
+          vfmaddsub231pd -1032(%rdx){1to8}, %zmm27, %zmm9
+
+// CHECK: vfmsubadd132ps %zmm21, %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xb2,0x15,0x48,0x97,0xd5]
+          vfmsubadd132ps %zmm21, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps %zmm21, %zmm13, %zmm2 {%k7}
+// CHECK:  encoding: [0x62,0xb2,0x15,0x4f,0x97,0xd5]
+          vfmsubadd132ps %zmm21, %zmm13, %zmm2 {%k7}
+
+// CHECK: vfmsubadd132ps %zmm21, %zmm13, %zmm2 {%k7} {z}
+// CHECK:  encoding: [0x62,0xb2,0x15,0xcf,0x97,0xd5]
+          vfmsubadd132ps %zmm21, %zmm13, %zmm2 {%k7} {z}
+
+// CHECK: vfmsubadd132ps {rn-sae}, %zmm21, %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xb2,0x15,0x18,0x97,0xd5]
+          vfmsubadd132ps {rn-sae}, %zmm21, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps {ru-sae}, %zmm21, %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xb2,0x15,0x58,0x97,0xd5]
+          vfmsubadd132ps {ru-sae}, %zmm21, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps {rd-sae}, %zmm21, %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xb2,0x15,0x38,0x97,0xd5]
+          vfmsubadd132ps {rd-sae}, %zmm21, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps {rz-sae}, %zmm21, %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xb2,0x15,0x78,0x97,0xd5]
+          vfmsubadd132ps {rz-sae}, %zmm21, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps (%rcx), %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0x15,0x48,0x97,0x11]
+          vfmsubadd132ps (%rcx), %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps 291(%rax,%r14,8), %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xb2,0x15,0x48,0x97,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd132ps 291(%rax,%r14,8), %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps (%rcx){1to16}, %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0x15,0x58,0x97,0x11]
+          vfmsubadd132ps (%rcx){1to16}, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps 8128(%rdx), %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0x15,0x48,0x97,0x52,0x7f]
+          vfmsubadd132ps 8128(%rdx), %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps 8192(%rdx), %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0x15,0x48,0x97,0x92,0x00,0x20,0x00,0x00]
+          vfmsubadd132ps 8192(%rdx), %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps -8192(%rdx), %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0x15,0x48,0x97,0x52,0x80]
+          vfmsubadd132ps -8192(%rdx), %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps -8256(%rdx), %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0x15,0x48,0x97,0x92,0xc0,0xdf,0xff,0xff]
+          vfmsubadd132ps -8256(%rdx), %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps 508(%rdx){1to16}, %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0x15,0x58,0x97,0x52,0x7f]
+          vfmsubadd132ps 508(%rdx){1to16}, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps 512(%rdx){1to16}, %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0x15,0x58,0x97,0x92,0x00,0x02,0x00,0x00]
+          vfmsubadd132ps 512(%rdx){1to16}, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps -512(%rdx){1to16}, %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0x15,0x58,0x97,0x52,0x80]
+          vfmsubadd132ps -512(%rdx){1to16}, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132ps -516(%rdx){1to16}, %zmm13, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0x15,0x58,0x97,0x92,0xfc,0xfd,0xff,0xff]
+          vfmsubadd132ps -516(%rdx){1to16}, %zmm13, %zmm2
+
+// CHECK: vfmsubadd132pd %zmm18, %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x40,0x97,0xea]
+          vfmsubadd132pd %zmm18, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd %zmm18, %zmm28, %zmm21 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x47,0x97,0xea]
+          vfmsubadd132pd %zmm18, %zmm28, %zmm21 {%k7}
+
+// CHECK: vfmsubadd132pd %zmm18, %zmm28, %zmm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x9d,0xc7,0x97,0xea]
+          vfmsubadd132pd %zmm18, %zmm28, %zmm21 {%k7} {z}
+
+// CHECK: vfmsubadd132pd {rn-sae}, %zmm18, %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x10,0x97,0xea]
+          vfmsubadd132pd {rn-sae}, %zmm18, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd {ru-sae}, %zmm18, %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x50,0x97,0xea]
+          vfmsubadd132pd {ru-sae}, %zmm18, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd {rd-sae}, %zmm18, %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x30,0x97,0xea]
+          vfmsubadd132pd {rd-sae}, %zmm18, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd {rz-sae}, %zmm18, %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x70,0x97,0xea]
+          vfmsubadd132pd {rz-sae}, %zmm18, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd (%rcx), %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x97,0x29]
+          vfmsubadd132pd (%rcx), %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd 291(%rax,%r14,8), %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x40,0x97,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd132pd 291(%rax,%r14,8), %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd (%rcx){1to8}, %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x97,0x29]
+          vfmsubadd132pd (%rcx){1to8}, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd 8128(%rdx), %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x97,0x6a,0x7f]
+          vfmsubadd132pd 8128(%rdx), %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd 8192(%rdx), %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x97,0xaa,0x00,0x20,0x00,0x00]
+          vfmsubadd132pd 8192(%rdx), %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd -8192(%rdx), %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x97,0x6a,0x80]
+          vfmsubadd132pd -8192(%rdx), %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd -8256(%rdx), %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x97,0xaa,0xc0,0xdf,0xff,0xff]
+          vfmsubadd132pd -8256(%rdx), %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd 1016(%rdx){1to8}, %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x97,0x6a,0x7f]
+          vfmsubadd132pd 1016(%rdx){1to8}, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd 1024(%rdx){1to8}, %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x97,0xaa,0x00,0x04,0x00,0x00]
+          vfmsubadd132pd 1024(%rdx){1to8}, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd -1024(%rdx){1to8}, %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x97,0x6a,0x80]
+          vfmsubadd132pd -1024(%rdx){1to8}, %zmm28, %zmm21
+
+// CHECK: vfmsubadd132pd -1032(%rdx){1to8}, %zmm28, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x97,0xaa,0xf8,0xfb,0xff,0xff]
+          vfmsubadd132pd -1032(%rdx){1to8}, %zmm28, %zmm21
+
+// CHECK: vfmsubadd213ps %zmm14, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xc2,0x1d,0x48,0xa7,0xf6]
+          vfmsubadd213ps %zmm14, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps %zmm14, %zmm12, %zmm22 {%k6}
+// CHECK:  encoding: [0x62,0xc2,0x1d,0x4e,0xa7,0xf6]
+          vfmsubadd213ps %zmm14, %zmm12, %zmm22 {%k6}
+
+// CHECK: vfmsubadd213ps %zmm14, %zmm12, %zmm22 {%k6} {z}
+// CHECK:  encoding: [0x62,0xc2,0x1d,0xce,0xa7,0xf6]
+          vfmsubadd213ps %zmm14, %zmm12, %zmm22 {%k6} {z}
+
+// CHECK: vfmsubadd213ps {rn-sae}, %zmm14, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xc2,0x1d,0x18,0xa7,0xf6]
+          vfmsubadd213ps {rn-sae}, %zmm14, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps {ru-sae}, %zmm14, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xc2,0x1d,0x58,0xa7,0xf6]
+          vfmsubadd213ps {ru-sae}, %zmm14, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps {rd-sae}, %zmm14, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xc2,0x1d,0x38,0xa7,0xf6]
+          vfmsubadd213ps {rd-sae}, %zmm14, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps {rz-sae}, %zmm14, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xc2,0x1d,0x78,0xa7,0xf6]
+          vfmsubadd213ps {rz-sae}, %zmm14, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps (%rcx), %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x48,0xa7,0x31]
+          vfmsubadd213ps (%rcx), %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps 291(%rax,%r14,8), %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xa2,0x1d,0x48,0xa7,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd213ps 291(%rax,%r14,8), %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps (%rcx){1to16}, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x58,0xa7,0x31]
+          vfmsubadd213ps (%rcx){1to16}, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps 8128(%rdx), %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x48,0xa7,0x72,0x7f]
+          vfmsubadd213ps 8128(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps 8192(%rdx), %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x48,0xa7,0xb2,0x00,0x20,0x00,0x00]
+          vfmsubadd213ps 8192(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps -8192(%rdx), %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x48,0xa7,0x72,0x80]
+          vfmsubadd213ps -8192(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps -8256(%rdx), %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x48,0xa7,0xb2,0xc0,0xdf,0xff,0xff]
+          vfmsubadd213ps -8256(%rdx), %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps 508(%rdx){1to16}, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x58,0xa7,0x72,0x7f]
+          vfmsubadd213ps 508(%rdx){1to16}, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps 512(%rdx){1to16}, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x58,0xa7,0xb2,0x00,0x02,0x00,0x00]
+          vfmsubadd213ps 512(%rdx){1to16}, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps -512(%rdx){1to16}, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x58,0xa7,0x72,0x80]
+          vfmsubadd213ps -512(%rdx){1to16}, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213ps -516(%rdx){1to16}, %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x58,0xa7,0xb2,0xfc,0xfd,0xff,0xff]
+          vfmsubadd213ps -516(%rdx){1to16}, %zmm12, %zmm22
+
+// CHECK: vfmsubadd213pd %zmm2, %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x40,0xa7,0xd2]
+          vfmsubadd213pd %zmm2, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd %zmm2, %zmm23, %zmm2 {%k6}
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x46,0xa7,0xd2]
+          vfmsubadd213pd %zmm2, %zmm23, %zmm2 {%k6}
+
+// CHECK: vfmsubadd213pd %zmm2, %zmm23, %zmm2 {%k6} {z}
+// CHECK:  encoding: [0x62,0xf2,0xc5,0xc6,0xa7,0xd2]
+          vfmsubadd213pd %zmm2, %zmm23, %zmm2 {%k6} {z}
+
+// CHECK: vfmsubadd213pd {rn-sae}, %zmm2, %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x10,0xa7,0xd2]
+          vfmsubadd213pd {rn-sae}, %zmm2, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd {ru-sae}, %zmm2, %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x50,0xa7,0xd2]
+          vfmsubadd213pd {ru-sae}, %zmm2, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd {rd-sae}, %zmm2, %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x30,0xa7,0xd2]
+          vfmsubadd213pd {rd-sae}, %zmm2, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd {rz-sae}, %zmm2, %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x70,0xa7,0xd2]
+          vfmsubadd213pd {rz-sae}, %zmm2, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd (%rcx), %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x11]
+          vfmsubadd213pd (%rcx), %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd 291(%rax,%r14,8), %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xb2,0xc5,0x40,0xa7,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd213pd 291(%rax,%r14,8), %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd (%rcx){1to8}, %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x11]
+          vfmsubadd213pd (%rcx){1to8}, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd 8128(%rdx), %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x52,0x7f]
+          vfmsubadd213pd 8128(%rdx), %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd 8192(%rdx), %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x92,0x00,0x20,0x00,0x00]
+          vfmsubadd213pd 8192(%rdx), %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd -8192(%rdx), %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x52,0x80]
+          vfmsubadd213pd -8192(%rdx), %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd -8256(%rdx), %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x40,0xa7,0x92,0xc0,0xdf,0xff,0xff]
+          vfmsubadd213pd -8256(%rdx), %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd 1016(%rdx){1to8}, %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x52,0x7f]
+          vfmsubadd213pd 1016(%rdx){1to8}, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd 1024(%rdx){1to8}, %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x92,0x00,0x04,0x00,0x00]
+          vfmsubadd213pd 1024(%rdx){1to8}, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd -1024(%rdx){1to8}, %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x52,0x80]
+          vfmsubadd213pd -1024(%rdx){1to8}, %zmm23, %zmm2
+
+// CHECK: vfmsubadd213pd -1032(%rdx){1to8}, %zmm23, %zmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x50,0xa7,0x92,0xf8,0xfb,0xff,0xff]
+          vfmsubadd213pd -1032(%rdx){1to8}, %zmm23, %zmm2
+
+// CHECK: vfmsubadd231ps %zmm1, %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x40,0xb7,0xc1]
+          vfmsubadd231ps %zmm1, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps %zmm1, %zmm19, %zmm8 {%k2}
+// CHECK:  encoding: [0x62,0x72,0x65,0x42,0xb7,0xc1]
+          vfmsubadd231ps %zmm1, %zmm19, %zmm8 {%k2}
+
+// CHECK: vfmsubadd231ps %zmm1, %zmm19, %zmm8 {%k2} {z}
+// CHECK:  encoding: [0x62,0x72,0x65,0xc2,0xb7,0xc1]
+          vfmsubadd231ps %zmm1, %zmm19, %zmm8 {%k2} {z}
+
+// CHECK: vfmsubadd231ps {rn-sae}, %zmm1, %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x10,0xb7,0xc1]
+          vfmsubadd231ps {rn-sae}, %zmm1, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps {ru-sae}, %zmm1, %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x50,0xb7,0xc1]
+          vfmsubadd231ps {ru-sae}, %zmm1, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps {rd-sae}, %zmm1, %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x30,0xb7,0xc1]
+          vfmsubadd231ps {rd-sae}, %zmm1, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps {rz-sae}, %zmm1, %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x70,0xb7,0xc1]
+          vfmsubadd231ps {rz-sae}, %zmm1, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps (%rcx), %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x40,0xb7,0x01]
+          vfmsubadd231ps (%rcx), %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps 291(%rax,%r14,8), %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x32,0x65,0x40,0xb7,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd231ps 291(%rax,%r14,8), %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps (%rcx){1to16}, %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x50,0xb7,0x01]
+          vfmsubadd231ps (%rcx){1to16}, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps 8128(%rdx), %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x40,0xb7,0x42,0x7f]
+          vfmsubadd231ps 8128(%rdx), %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps 8192(%rdx), %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x40,0xb7,0x82,0x00,0x20,0x00,0x00]
+          vfmsubadd231ps 8192(%rdx), %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps -8192(%rdx), %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x40,0xb7,0x42,0x80]
+          vfmsubadd231ps -8192(%rdx), %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps -8256(%rdx), %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x40,0xb7,0x82,0xc0,0xdf,0xff,0xff]
+          vfmsubadd231ps -8256(%rdx), %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps 508(%rdx){1to16}, %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x50,0xb7,0x42,0x7f]
+          vfmsubadd231ps 508(%rdx){1to16}, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps 512(%rdx){1to16}, %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x50,0xb7,0x82,0x00,0x02,0x00,0x00]
+          vfmsubadd231ps 512(%rdx){1to16}, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps -512(%rdx){1to16}, %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x50,0xb7,0x42,0x80]
+          vfmsubadd231ps -512(%rdx){1to16}, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231ps -516(%rdx){1to16}, %zmm19, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x65,0x50,0xb7,0x82,0xfc,0xfd,0xff,0xff]
+          vfmsubadd231ps -516(%rdx){1to16}, %zmm19, %zmm8
+
+// CHECK: vfmsubadd231pd %zmm21, %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xa2,0xa5,0x40,0xb7,0xc5]
+          vfmsubadd231pd %zmm21, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd %zmm21, %zmm27, %zmm16 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0xa5,0x42,0xb7,0xc5]
+          vfmsubadd231pd %zmm21, %zmm27, %zmm16 {%k2}
+
+// CHECK: vfmsubadd231pd %zmm21, %zmm27, %zmm16 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0xa5,0xc2,0xb7,0xc5]
+          vfmsubadd231pd %zmm21, %zmm27, %zmm16 {%k2} {z}
+
+// CHECK: vfmsubadd231pd {rn-sae}, %zmm21, %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xa2,0xa5,0x10,0xb7,0xc5]
+          vfmsubadd231pd {rn-sae}, %zmm21, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd {ru-sae}, %zmm21, %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xa2,0xa5,0x50,0xb7,0xc5]
+          vfmsubadd231pd {ru-sae}, %zmm21, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd {rd-sae}, %zmm21, %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xa2,0xa5,0x30,0xb7,0xc5]
+          vfmsubadd231pd {rd-sae}, %zmm21, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd {rz-sae}, %zmm21, %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xa2,0xa5,0x70,0xb7,0xc5]
+          vfmsubadd231pd {rz-sae}, %zmm21, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd (%rcx), %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x01]
+          vfmsubadd231pd (%rcx), %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd 291(%rax,%r14,8), %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xa2,0xa5,0x40,0xb7,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd231pd 291(%rax,%r14,8), %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd (%rcx){1to8}, %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x01]
+          vfmsubadd231pd (%rcx){1to8}, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd 8128(%rdx), %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x42,0x7f]
+          vfmsubadd231pd 8128(%rdx), %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd 8192(%rdx), %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x82,0x00,0x20,0x00,0x00]
+          vfmsubadd231pd 8192(%rdx), %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd -8192(%rdx), %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x42,0x80]
+          vfmsubadd231pd -8192(%rdx), %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd -8256(%rdx), %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x40,0xb7,0x82,0xc0,0xdf,0xff,0xff]
+          vfmsubadd231pd -8256(%rdx), %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd 1016(%rdx){1to8}, %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x42,0x7f]
+          vfmsubadd231pd 1016(%rdx){1to8}, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd 1024(%rdx){1to8}, %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x82,0x00,0x04,0x00,0x00]
+          vfmsubadd231pd 1024(%rdx){1to8}, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd -1024(%rdx){1to8}, %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x42,0x80]
+          vfmsubadd231pd -1024(%rdx){1to8}, %zmm27, %zmm16
+
+// CHECK: vfmsubadd231pd -1032(%rdx){1to8}, %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x50,0xb7,0x82,0xf8,0xfb,0xff,0xff]
+          vfmsubadd231pd -1032(%rdx){1to8}, %zmm27, %zmm16
+
+// CHECK: vfnmadd132ps %zmm10, %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x40,0x9c,0xea]
+          vfnmadd132ps %zmm10, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps %zmm10, %zmm16, %zmm21 {%k5}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x45,0x9c,0xea]
+          vfnmadd132ps %zmm10, %zmm16, %zmm21 {%k5}
+
+// CHECK: vfnmadd132ps %zmm10, %zmm16, %zmm21 {%k5} {z}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0xc5,0x9c,0xea]
+          vfnmadd132ps %zmm10, %zmm16, %zmm21 {%k5} {z}
+
+// CHECK: vfnmadd132ps {rn-sae}, %zmm10, %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x10,0x9c,0xea]
+          vfnmadd132ps {rn-sae}, %zmm10, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps {ru-sae}, %zmm10, %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x50,0x9c,0xea]
+          vfnmadd132ps {ru-sae}, %zmm10, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps {rd-sae}, %zmm10, %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x30,0x9c,0xea]
+          vfnmadd132ps {rd-sae}, %zmm10, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps {rz-sae}, %zmm10, %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x70,0x9c,0xea]
+          vfnmadd132ps {rz-sae}, %zmm10, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps (%rcx), %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x40,0x9c,0x29]
+          vfnmadd132ps (%rcx), %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps 291(%rax,%r14,8), %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x40,0x9c,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd132ps 291(%rax,%r14,8), %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps (%rcx){1to16}, %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x50,0x9c,0x29]
+          vfnmadd132ps (%rcx){1to16}, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps 8128(%rdx), %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x40,0x9c,0x6a,0x7f]
+          vfnmadd132ps 8128(%rdx), %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps 8192(%rdx), %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x40,0x9c,0xaa,0x00,0x20,0x00,0x00]
+          vfnmadd132ps 8192(%rdx), %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps -8192(%rdx), %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x40,0x9c,0x6a,0x80]
+          vfnmadd132ps -8192(%rdx), %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps -8256(%rdx), %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x40,0x9c,0xaa,0xc0,0xdf,0xff,0xff]
+          vfnmadd132ps -8256(%rdx), %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps 508(%rdx){1to16}, %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x50,0x9c,0x6a,0x7f]
+          vfnmadd132ps 508(%rdx){1to16}, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps 512(%rdx){1to16}, %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x50,0x9c,0xaa,0x00,0x02,0x00,0x00]
+          vfnmadd132ps 512(%rdx){1to16}, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps -512(%rdx){1to16}, %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x50,0x9c,0x6a,0x80]
+          vfnmadd132ps -512(%rdx){1to16}, %zmm16, %zmm21
+
+// CHECK: vfnmadd132ps -516(%rdx){1to16}, %zmm16, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x50,0x9c,0xaa,0xfc,0xfd,0xff,0xff]
+          vfnmadd132ps -516(%rdx){1to16}, %zmm16, %zmm21
+
+// CHECK: vfnmadd132pd %zmm1, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x48,0x9c,0xe1]
+          vfnmadd132pd %zmm1, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd %zmm1, %zmm14, %zmm12 {%k7}
+// CHECK:  encoding: [0x62,0x72,0x8d,0x4f,0x9c,0xe1]
+          vfnmadd132pd %zmm1, %zmm14, %zmm12 {%k7}
+
+// CHECK: vfnmadd132pd %zmm1, %zmm14, %zmm12 {%k7} {z}
+// CHECK:  encoding: [0x62,0x72,0x8d,0xcf,0x9c,0xe1]
+          vfnmadd132pd %zmm1, %zmm14, %zmm12 {%k7} {z}
+
+// CHECK: vfnmadd132pd {rn-sae}, %zmm1, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x18,0x9c,0xe1]
+          vfnmadd132pd {rn-sae}, %zmm1, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd {ru-sae}, %zmm1, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x58,0x9c,0xe1]
+          vfnmadd132pd {ru-sae}, %zmm1, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd {rd-sae}, %zmm1, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x38,0x9c,0xe1]
+          vfnmadd132pd {rd-sae}, %zmm1, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd {rz-sae}, %zmm1, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x78,0x9c,0xe1]
+          vfnmadd132pd {rz-sae}, %zmm1, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd (%rcx), %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x48,0x9c,0x21]
+          vfnmadd132pd (%rcx), %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd 291(%rax,%r14,8), %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x32,0x8d,0x48,0x9c,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd132pd 291(%rax,%r14,8), %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd (%rcx){1to8}, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x58,0x9c,0x21]
+          vfnmadd132pd (%rcx){1to8}, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd 8128(%rdx), %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x48,0x9c,0x62,0x7f]
+          vfnmadd132pd 8128(%rdx), %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd 8192(%rdx), %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x48,0x9c,0xa2,0x00,0x20,0x00,0x00]
+          vfnmadd132pd 8192(%rdx), %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd -8192(%rdx), %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x48,0x9c,0x62,0x80]
+          vfnmadd132pd -8192(%rdx), %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd -8256(%rdx), %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x48,0x9c,0xa2,0xc0,0xdf,0xff,0xff]
+          vfnmadd132pd -8256(%rdx), %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd 1016(%rdx){1to8}, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x58,0x9c,0x62,0x7f]
+          vfnmadd132pd 1016(%rdx){1to8}, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd 1024(%rdx){1to8}, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x58,0x9c,0xa2,0x00,0x04,0x00,0x00]
+          vfnmadd132pd 1024(%rdx){1to8}, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd -1024(%rdx){1to8}, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x58,0x9c,0x62,0x80]
+          vfnmadd132pd -1024(%rdx){1to8}, %zmm14, %zmm12
+
+// CHECK: vfnmadd132pd -1032(%rdx){1to8}, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x72,0x8d,0x58,0x9c,0xa2,0xf8,0xfb,0xff,0xff]
+          vfnmadd132pd -1032(%rdx){1to8}, %zmm14, %zmm12
+
+// CHECK: vfnmadd213ps %zmm6, %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x48,0xac,0xd6]
+          vfnmadd213ps %zmm6, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps %zmm6, %zmm10, %zmm26 {%k6}
+// CHECK:  encoding: [0x62,0x62,0x2d,0x4e,0xac,0xd6]
+          vfnmadd213ps %zmm6, %zmm10, %zmm26 {%k6}
+
+// CHECK: vfnmadd213ps %zmm6, %zmm10, %zmm26 {%k6} {z}
+// CHECK:  encoding: [0x62,0x62,0x2d,0xce,0xac,0xd6]
+          vfnmadd213ps %zmm6, %zmm10, %zmm26 {%k6} {z}
+
+// CHECK: vfnmadd213ps {rn-sae}, %zmm6, %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x18,0xac,0xd6]
+          vfnmadd213ps {rn-sae}, %zmm6, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps {ru-sae}, %zmm6, %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x58,0xac,0xd6]
+          vfnmadd213ps {ru-sae}, %zmm6, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps {rd-sae}, %zmm6, %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x38,0xac,0xd6]
+          vfnmadd213ps {rd-sae}, %zmm6, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps {rz-sae}, %zmm6, %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x78,0xac,0xd6]
+          vfnmadd213ps {rz-sae}, %zmm6, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps (%rcx), %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x48,0xac,0x11]
+          vfnmadd213ps (%rcx), %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps 291(%rax,%r14,8), %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x22,0x2d,0x48,0xac,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd213ps 291(%rax,%r14,8), %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps (%rcx){1to16}, %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x58,0xac,0x11]
+          vfnmadd213ps (%rcx){1to16}, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps 8128(%rdx), %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x48,0xac,0x52,0x7f]
+          vfnmadd213ps 8128(%rdx), %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps 8192(%rdx), %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x48,0xac,0x92,0x00,0x20,0x00,0x00]
+          vfnmadd213ps 8192(%rdx), %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps -8192(%rdx), %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x48,0xac,0x52,0x80]
+          vfnmadd213ps -8192(%rdx), %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps -8256(%rdx), %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x48,0xac,0x92,0xc0,0xdf,0xff,0xff]
+          vfnmadd213ps -8256(%rdx), %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps 508(%rdx){1to16}, %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x58,0xac,0x52,0x7f]
+          vfnmadd213ps 508(%rdx){1to16}, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps 512(%rdx){1to16}, %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x58,0xac,0x92,0x00,0x02,0x00,0x00]
+          vfnmadd213ps 512(%rdx){1to16}, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps -512(%rdx){1to16}, %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x58,0xac,0x52,0x80]
+          vfnmadd213ps -512(%rdx){1to16}, %zmm10, %zmm26
+
+// CHECK: vfnmadd213ps -516(%rdx){1to16}, %zmm10, %zmm26
+// CHECK:  encoding: [0x62,0x62,0x2d,0x58,0xac,0x92,0xfc,0xfd,0xff,0xff]
+          vfnmadd213ps -516(%rdx){1to16}, %zmm10, %zmm26
+
+// CHECK: vfnmadd213pd %zmm9, %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x40,0xac,0xc9]
+          vfnmadd213pd %zmm9, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd %zmm9, %zmm16, %zmm17 {%k4}
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x44,0xac,0xc9]
+          vfnmadd213pd %zmm9, %zmm16, %zmm17 {%k4}
+
+// CHECK: vfnmadd213pd %zmm9, %zmm16, %zmm17 {%k4} {z}
+// CHECK:  encoding: [0x62,0xc2,0xfd,0xc4,0xac,0xc9]
+          vfnmadd213pd %zmm9, %zmm16, %zmm17 {%k4} {z}
+
+// CHECK: vfnmadd213pd {rn-sae}, %zmm9, %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x10,0xac,0xc9]
+          vfnmadd213pd {rn-sae}, %zmm9, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd {ru-sae}, %zmm9, %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x50,0xac,0xc9]
+          vfnmadd213pd {ru-sae}, %zmm9, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd {rd-sae}, %zmm9, %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x30,0xac,0xc9]
+          vfnmadd213pd {rd-sae}, %zmm9, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd {rz-sae}, %zmm9, %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x70,0xac,0xc9]
+          vfnmadd213pd {rz-sae}, %zmm9, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd (%rcx), %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x40,0xac,0x09]
+          vfnmadd213pd (%rcx), %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd 291(%rax,%r14,8), %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x40,0xac,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd213pd 291(%rax,%r14,8), %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd (%rcx){1to8}, %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x50,0xac,0x09]
+          vfnmadd213pd (%rcx){1to8}, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd 8128(%rdx), %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x40,0xac,0x4a,0x7f]
+          vfnmadd213pd 8128(%rdx), %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd 8192(%rdx), %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x40,0xac,0x8a,0x00,0x20,0x00,0x00]
+          vfnmadd213pd 8192(%rdx), %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd -8192(%rdx), %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x40,0xac,0x4a,0x80]
+          vfnmadd213pd -8192(%rdx), %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd -8256(%rdx), %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x40,0xac,0x8a,0xc0,0xdf,0xff,0xff]
+          vfnmadd213pd -8256(%rdx), %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd 1016(%rdx){1to8}, %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x50,0xac,0x4a,0x7f]
+          vfnmadd213pd 1016(%rdx){1to8}, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd 1024(%rdx){1to8}, %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x50,0xac,0x8a,0x00,0x04,0x00,0x00]
+          vfnmadd213pd 1024(%rdx){1to8}, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd -1024(%rdx){1to8}, %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x50,0xac,0x4a,0x80]
+          vfnmadd213pd -1024(%rdx){1to8}, %zmm16, %zmm17
+
+// CHECK: vfnmadd213pd -1032(%rdx){1to8}, %zmm16, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x50,0xac,0x8a,0xf8,0xfb,0xff,0xff]
+          vfnmadd213pd -1032(%rdx){1to8}, %zmm16, %zmm17
+
+// CHECK: vfnmadd231ps %zmm24, %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x12,0x45,0x48,0xbc,0xf0]
+          vfnmadd231ps %zmm24, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps %zmm24, %zmm7, %zmm14 {%k5}
+// CHECK:  encoding: [0x62,0x12,0x45,0x4d,0xbc,0xf0]
+          vfnmadd231ps %zmm24, %zmm7, %zmm14 {%k5}
+
+// CHECK: vfnmadd231ps %zmm24, %zmm7, %zmm14 {%k5} {z}
+// CHECK:  encoding: [0x62,0x12,0x45,0xcd,0xbc,0xf0]
+          vfnmadd231ps %zmm24, %zmm7, %zmm14 {%k5} {z}
+
+// CHECK: vfnmadd231ps {rn-sae}, %zmm24, %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x12,0x45,0x18,0xbc,0xf0]
+          vfnmadd231ps {rn-sae}, %zmm24, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps {ru-sae}, %zmm24, %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x12,0x45,0x58,0xbc,0xf0]
+          vfnmadd231ps {ru-sae}, %zmm24, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps {rd-sae}, %zmm24, %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x12,0x45,0x38,0xbc,0xf0]
+          vfnmadd231ps {rd-sae}, %zmm24, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps {rz-sae}, %zmm24, %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x12,0x45,0x78,0xbc,0xf0]
+          vfnmadd231ps {rz-sae}, %zmm24, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps (%rcx), %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x72,0x45,0x48,0xbc,0x31]
+          vfnmadd231ps (%rcx), %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps 291(%rax,%r14,8), %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x32,0x45,0x48,0xbc,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd231ps 291(%rax,%r14,8), %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps (%rcx){1to16}, %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x72,0x45,0x58,0xbc,0x31]
+          vfnmadd231ps (%rcx){1to16}, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps 8128(%rdx), %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x72,0x45,0x48,0xbc,0x72,0x7f]
+          vfnmadd231ps 8128(%rdx), %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps 8192(%rdx), %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x72,0x45,0x48,0xbc,0xb2,0x00,0x20,0x00,0x00]
+          vfnmadd231ps 8192(%rdx), %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps -8192(%rdx), %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x72,0x45,0x48,0xbc,0x72,0x80]
+          vfnmadd231ps -8192(%rdx), %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps -8256(%rdx), %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x72,0x45,0x48,0xbc,0xb2,0xc0,0xdf,0xff,0xff]
+          vfnmadd231ps -8256(%rdx), %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps 508(%rdx){1to16}, %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x72,0x45,0x58,0xbc,0x72,0x7f]
+          vfnmadd231ps 508(%rdx){1to16}, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps 512(%rdx){1to16}, %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x72,0x45,0x58,0xbc,0xb2,0x00,0x02,0x00,0x00]
+          vfnmadd231ps 512(%rdx){1to16}, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps -512(%rdx){1to16}, %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x72,0x45,0x58,0xbc,0x72,0x80]
+          vfnmadd231ps -512(%rdx){1to16}, %zmm7, %zmm14
+
+// CHECK: vfnmadd231ps -516(%rdx){1to16}, %zmm7, %zmm14
+// CHECK:  encoding: [0x62,0x72,0x45,0x58,0xbc,0xb2,0xfc,0xfd,0xff,0xff]
+          vfnmadd231ps -516(%rdx){1to16}, %zmm7, %zmm14
+
+// CHECK: vfnmadd231pd %zmm16, %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x32,0xa5,0x48,0xbc,0xe0]
+          vfnmadd231pd %zmm16, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd %zmm16, %zmm11, %zmm12 {%k6}
+// CHECK:  encoding: [0x62,0x32,0xa5,0x4e,0xbc,0xe0]
+          vfnmadd231pd %zmm16, %zmm11, %zmm12 {%k6}
+
+// CHECK: vfnmadd231pd %zmm16, %zmm11, %zmm12 {%k6} {z}
+// CHECK:  encoding: [0x62,0x32,0xa5,0xce,0xbc,0xe0]
+          vfnmadd231pd %zmm16, %zmm11, %zmm12 {%k6} {z}
+
+// CHECK: vfnmadd231pd {rn-sae}, %zmm16, %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x32,0xa5,0x18,0xbc,0xe0]
+          vfnmadd231pd {rn-sae}, %zmm16, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd {ru-sae}, %zmm16, %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x32,0xa5,0x58,0xbc,0xe0]
+          vfnmadd231pd {ru-sae}, %zmm16, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd {rd-sae}, %zmm16, %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x32,0xa5,0x38,0xbc,0xe0]
+          vfnmadd231pd {rd-sae}, %zmm16, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd {rz-sae}, %zmm16, %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x32,0xa5,0x78,0xbc,0xe0]
+          vfnmadd231pd {rz-sae}, %zmm16, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd (%rcx), %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x72,0xa5,0x48,0xbc,0x21]
+          vfnmadd231pd (%rcx), %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd 291(%rax,%r14,8), %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x32,0xa5,0x48,0xbc,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd231pd 291(%rax,%r14,8), %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd (%rcx){1to8}, %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x72,0xa5,0x58,0xbc,0x21]
+          vfnmadd231pd (%rcx){1to8}, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd 8128(%rdx), %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x72,0xa5,0x48,0xbc,0x62,0x7f]
+          vfnmadd231pd 8128(%rdx), %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd 8192(%rdx), %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x72,0xa5,0x48,0xbc,0xa2,0x00,0x20,0x00,0x00]
+          vfnmadd231pd 8192(%rdx), %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd -8192(%rdx), %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x72,0xa5,0x48,0xbc,0x62,0x80]
+          vfnmadd231pd -8192(%rdx), %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd -8256(%rdx), %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x72,0xa5,0x48,0xbc,0xa2,0xc0,0xdf,0xff,0xff]
+          vfnmadd231pd -8256(%rdx), %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd 1016(%rdx){1to8}, %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x72,0xa5,0x58,0xbc,0x62,0x7f]
+          vfnmadd231pd 1016(%rdx){1to8}, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd 1024(%rdx){1to8}, %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x72,0xa5,0x58,0xbc,0xa2,0x00,0x04,0x00,0x00]
+          vfnmadd231pd 1024(%rdx){1to8}, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd -1024(%rdx){1to8}, %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x72,0xa5,0x58,0xbc,0x62,0x80]
+          vfnmadd231pd -1024(%rdx){1to8}, %zmm11, %zmm12
+
+// CHECK: vfnmadd231pd -1032(%rdx){1to8}, %zmm11, %zmm12
+// CHECK:  encoding: [0x62,0x72,0xa5,0x58,0xbc,0xa2,0xf8,0xfb,0xff,0xff]
+          vfnmadd231pd -1032(%rdx){1to8}, %zmm11, %zmm12
+
+// CHECK: vfnmsub132ps %zmm6, %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x40,0x9e,0xe6]
+          vfnmsub132ps %zmm6, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps %zmm6, %zmm18, %zmm4 {%k2}
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x42,0x9e,0xe6]
+          vfnmsub132ps %zmm6, %zmm18, %zmm4 {%k2}
+
+// CHECK: vfnmsub132ps %zmm6, %zmm18, %zmm4 {%k2} {z}
+// CHECK:  encoding: [0x62,0xf2,0x6d,0xc2,0x9e,0xe6]
+          vfnmsub132ps %zmm6, %zmm18, %zmm4 {%k2} {z}
+
+// CHECK: vfnmsub132ps {rn-sae}, %zmm6, %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x10,0x9e,0xe6]
+          vfnmsub132ps {rn-sae}, %zmm6, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps {ru-sae}, %zmm6, %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x50,0x9e,0xe6]
+          vfnmsub132ps {ru-sae}, %zmm6, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps {rd-sae}, %zmm6, %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x30,0x9e,0xe6]
+          vfnmsub132ps {rd-sae}, %zmm6, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps {rz-sae}, %zmm6, %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x70,0x9e,0xe6]
+          vfnmsub132ps {rz-sae}, %zmm6, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps (%rcx), %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x40,0x9e,0x21]
+          vfnmsub132ps (%rcx), %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps 291(%rax,%r14,8), %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xb2,0x6d,0x40,0x9e,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub132ps 291(%rax,%r14,8), %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps (%rcx){1to16}, %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x50,0x9e,0x21]
+          vfnmsub132ps (%rcx){1to16}, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps 8128(%rdx), %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x40,0x9e,0x62,0x7f]
+          vfnmsub132ps 8128(%rdx), %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps 8192(%rdx), %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x40,0x9e,0xa2,0x00,0x20,0x00,0x00]
+          vfnmsub132ps 8192(%rdx), %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps -8192(%rdx), %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x40,0x9e,0x62,0x80]
+          vfnmsub132ps -8192(%rdx), %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps -8256(%rdx), %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x40,0x9e,0xa2,0xc0,0xdf,0xff,0xff]
+          vfnmsub132ps -8256(%rdx), %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps 508(%rdx){1to16}, %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x50,0x9e,0x62,0x7f]
+          vfnmsub132ps 508(%rdx){1to16}, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps 512(%rdx){1to16}, %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x50,0x9e,0xa2,0x00,0x02,0x00,0x00]
+          vfnmsub132ps 512(%rdx){1to16}, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps -512(%rdx){1to16}, %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x50,0x9e,0x62,0x80]
+          vfnmsub132ps -512(%rdx){1to16}, %zmm18, %zmm4
+
+// CHECK: vfnmsub132ps -516(%rdx){1to16}, %zmm18, %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x6d,0x50,0x9e,0xa2,0xfc,0xfd,0xff,0xff]
+          vfnmsub132ps -516(%rdx){1to16}, %zmm18, %zmm4
+
+// CHECK: vfnmsub132pd %zmm6, %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x48,0x9e,0xe6]
+          vfnmsub132pd %zmm6, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd %zmm6, %zmm5, %zmm28 {%k2}
+// CHECK:  encoding: [0x62,0x62,0xd5,0x4a,0x9e,0xe6]
+          vfnmsub132pd %zmm6, %zmm5, %zmm28 {%k2}
+
+// CHECK: vfnmsub132pd %zmm6, %zmm5, %zmm28 {%k2} {z}
+// CHECK:  encoding: [0x62,0x62,0xd5,0xca,0x9e,0xe6]
+          vfnmsub132pd %zmm6, %zmm5, %zmm28 {%k2} {z}
+
+// CHECK: vfnmsub132pd {rn-sae}, %zmm6, %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x18,0x9e,0xe6]
+          vfnmsub132pd {rn-sae}, %zmm6, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd {ru-sae}, %zmm6, %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x58,0x9e,0xe6]
+          vfnmsub132pd {ru-sae}, %zmm6, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd {rd-sae}, %zmm6, %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x38,0x9e,0xe6]
+          vfnmsub132pd {rd-sae}, %zmm6, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd {rz-sae}, %zmm6, %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x78,0x9e,0xe6]
+          vfnmsub132pd {rz-sae}, %zmm6, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd (%rcx), %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x48,0x9e,0x21]
+          vfnmsub132pd (%rcx), %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd 291(%rax,%r14,8), %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x22,0xd5,0x48,0x9e,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub132pd 291(%rax,%r14,8), %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd (%rcx){1to8}, %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x58,0x9e,0x21]
+          vfnmsub132pd (%rcx){1to8}, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd 8128(%rdx), %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x48,0x9e,0x62,0x7f]
+          vfnmsub132pd 8128(%rdx), %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd 8192(%rdx), %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x48,0x9e,0xa2,0x00,0x20,0x00,0x00]
+          vfnmsub132pd 8192(%rdx), %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd -8192(%rdx), %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x48,0x9e,0x62,0x80]
+          vfnmsub132pd -8192(%rdx), %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd -8256(%rdx), %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x48,0x9e,0xa2,0xc0,0xdf,0xff,0xff]
+          vfnmsub132pd -8256(%rdx), %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd 1016(%rdx){1to8}, %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x58,0x9e,0x62,0x7f]
+          vfnmsub132pd 1016(%rdx){1to8}, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd 1024(%rdx){1to8}, %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x58,0x9e,0xa2,0x00,0x04,0x00,0x00]
+          vfnmsub132pd 1024(%rdx){1to8}, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd -1024(%rdx){1to8}, %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x58,0x9e,0x62,0x80]
+          vfnmsub132pd -1024(%rdx){1to8}, %zmm5, %zmm28
+
+// CHECK: vfnmsub132pd -1032(%rdx){1to8}, %zmm5, %zmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x58,0x9e,0xa2,0xf8,0xfb,0xff,0xff]
+          vfnmsub132pd -1032(%rdx){1to8}, %zmm5, %zmm28
+
+// CHECK: vfnmsub213ps %zmm2, %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x48,0xae,0xea]
+          vfnmsub213ps %zmm2, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps %zmm2, %zmm13, %zmm21 {%k3}
+// CHECK:  encoding: [0x62,0xe2,0x15,0x4b,0xae,0xea]
+          vfnmsub213ps %zmm2, %zmm13, %zmm21 {%k3}
+
+// CHECK: vfnmsub213ps %zmm2, %zmm13, %zmm21 {%k3} {z}
+// CHECK:  encoding: [0x62,0xe2,0x15,0xcb,0xae,0xea]
+          vfnmsub213ps %zmm2, %zmm13, %zmm21 {%k3} {z}
+
+// CHECK: vfnmsub213ps {rn-sae}, %zmm2, %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x18,0xae,0xea]
+          vfnmsub213ps {rn-sae}, %zmm2, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps {ru-sae}, %zmm2, %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x58,0xae,0xea]
+          vfnmsub213ps {ru-sae}, %zmm2, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps {rd-sae}, %zmm2, %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x38,0xae,0xea]
+          vfnmsub213ps {rd-sae}, %zmm2, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps {rz-sae}, %zmm2, %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x78,0xae,0xea]
+          vfnmsub213ps {rz-sae}, %zmm2, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps (%rcx), %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x48,0xae,0x29]
+          vfnmsub213ps (%rcx), %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps 291(%rax,%r14,8), %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xa2,0x15,0x48,0xae,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub213ps 291(%rax,%r14,8), %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps (%rcx){1to16}, %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x58,0xae,0x29]
+          vfnmsub213ps (%rcx){1to16}, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps 8128(%rdx), %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x48,0xae,0x6a,0x7f]
+          vfnmsub213ps 8128(%rdx), %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps 8192(%rdx), %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x48,0xae,0xaa,0x00,0x20,0x00,0x00]
+          vfnmsub213ps 8192(%rdx), %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps -8192(%rdx), %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x48,0xae,0x6a,0x80]
+          vfnmsub213ps -8192(%rdx), %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps -8256(%rdx), %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x48,0xae,0xaa,0xc0,0xdf,0xff,0xff]
+          vfnmsub213ps -8256(%rdx), %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps 508(%rdx){1to16}, %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x58,0xae,0x6a,0x7f]
+          vfnmsub213ps 508(%rdx){1to16}, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps 512(%rdx){1to16}, %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x58,0xae,0xaa,0x00,0x02,0x00,0x00]
+          vfnmsub213ps 512(%rdx){1to16}, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps -512(%rdx){1to16}, %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x58,0xae,0x6a,0x80]
+          vfnmsub213ps -512(%rdx){1to16}, %zmm13, %zmm21
+
+// CHECK: vfnmsub213ps -516(%rdx){1to16}, %zmm13, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x15,0x58,0xae,0xaa,0xfc,0xfd,0xff,0xff]
+          vfnmsub213ps -516(%rdx){1to16}, %zmm13, %zmm21
+
+// CHECK: vfnmsub213pd %zmm11, %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xc2,0xed,0x40,0xae,0xfb]
+          vfnmsub213pd %zmm11, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd %zmm11, %zmm18, %zmm23 {%k2}
+// CHECK:  encoding: [0x62,0xc2,0xed,0x42,0xae,0xfb]
+          vfnmsub213pd %zmm11, %zmm18, %zmm23 {%k2}
+
+// CHECK: vfnmsub213pd %zmm11, %zmm18, %zmm23 {%k2} {z}
+// CHECK:  encoding: [0x62,0xc2,0xed,0xc2,0xae,0xfb]
+          vfnmsub213pd %zmm11, %zmm18, %zmm23 {%k2} {z}
+
+// CHECK: vfnmsub213pd {rn-sae}, %zmm11, %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xc2,0xed,0x10,0xae,0xfb]
+          vfnmsub213pd {rn-sae}, %zmm11, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd {ru-sae}, %zmm11, %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xc2,0xed,0x50,0xae,0xfb]
+          vfnmsub213pd {ru-sae}, %zmm11, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd {rd-sae}, %zmm11, %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xc2,0xed,0x30,0xae,0xfb]
+          vfnmsub213pd {rd-sae}, %zmm11, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd {rz-sae}, %zmm11, %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xc2,0xed,0x70,0xae,0xfb]
+          vfnmsub213pd {rz-sae}, %zmm11, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd (%rcx), %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0xed,0x40,0xae,0x39]
+          vfnmsub213pd (%rcx), %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd 291(%rax,%r14,8), %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xa2,0xed,0x40,0xae,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub213pd 291(%rax,%r14,8), %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd (%rcx){1to8}, %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0xed,0x50,0xae,0x39]
+          vfnmsub213pd (%rcx){1to8}, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd 8128(%rdx), %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0xed,0x40,0xae,0x7a,0x7f]
+          vfnmsub213pd 8128(%rdx), %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd 8192(%rdx), %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0xed,0x40,0xae,0xba,0x00,0x20,0x00,0x00]
+          vfnmsub213pd 8192(%rdx), %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd -8192(%rdx), %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0xed,0x40,0xae,0x7a,0x80]
+          vfnmsub213pd -8192(%rdx), %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd -8256(%rdx), %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0xed,0x40,0xae,0xba,0xc0,0xdf,0xff,0xff]
+          vfnmsub213pd -8256(%rdx), %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd 1016(%rdx){1to8}, %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0xed,0x50,0xae,0x7a,0x7f]
+          vfnmsub213pd 1016(%rdx){1to8}, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd 1024(%rdx){1to8}, %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0xed,0x50,0xae,0xba,0x00,0x04,0x00,0x00]
+          vfnmsub213pd 1024(%rdx){1to8}, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd -1024(%rdx){1to8}, %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0xed,0x50,0xae,0x7a,0x80]
+          vfnmsub213pd -1024(%rdx){1to8}, %zmm18, %zmm23
+
+// CHECK: vfnmsub213pd -1032(%rdx){1to8}, %zmm18, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0xed,0x50,0xae,0xba,0xf8,0xfb,0xff,0xff]
+          vfnmsub213pd -1032(%rdx){1to8}, %zmm18, %zmm23
+
+// CHECK: vfnmsub231ps %zmm13, %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x52,0x4d,0x48,0xbe,0xc5]
+          vfnmsub231ps %zmm13, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps %zmm13, %zmm6, %zmm8 {%k2}
+// CHECK:  encoding: [0x62,0x52,0x4d,0x4a,0xbe,0xc5]
+          vfnmsub231ps %zmm13, %zmm6, %zmm8 {%k2}
+
+// CHECK: vfnmsub231ps %zmm13, %zmm6, %zmm8 {%k2} {z}
+// CHECK:  encoding: [0x62,0x52,0x4d,0xca,0xbe,0xc5]
+          vfnmsub231ps %zmm13, %zmm6, %zmm8 {%k2} {z}
+
+// CHECK: vfnmsub231ps {rn-sae}, %zmm13, %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x52,0x4d,0x18,0xbe,0xc5]
+          vfnmsub231ps {rn-sae}, %zmm13, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps {ru-sae}, %zmm13, %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x52,0x4d,0x58,0xbe,0xc5]
+          vfnmsub231ps {ru-sae}, %zmm13, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps {rd-sae}, %zmm13, %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x52,0x4d,0x38,0xbe,0xc5]
+          vfnmsub231ps {rd-sae}, %zmm13, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps {rz-sae}, %zmm13, %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x52,0x4d,0x78,0xbe,0xc5]
+          vfnmsub231ps {rz-sae}, %zmm13, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps (%rcx), %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x4d,0x48,0xbe,0x01]
+          vfnmsub231ps (%rcx), %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps 291(%rax,%r14,8), %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x32,0x4d,0x48,0xbe,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub231ps 291(%rax,%r14,8), %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps (%rcx){1to16}, %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x4d,0x58,0xbe,0x01]
+          vfnmsub231ps (%rcx){1to16}, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps 8128(%rdx), %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x4d,0x48,0xbe,0x42,0x7f]
+          vfnmsub231ps 8128(%rdx), %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps 8192(%rdx), %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x4d,0x48,0xbe,0x82,0x00,0x20,0x00,0x00]
+          vfnmsub231ps 8192(%rdx), %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps -8192(%rdx), %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x4d,0x48,0xbe,0x42,0x80]
+          vfnmsub231ps -8192(%rdx), %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps -8256(%rdx), %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x4d,0x48,0xbe,0x82,0xc0,0xdf,0xff,0xff]
+          vfnmsub231ps -8256(%rdx), %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps 508(%rdx){1to16}, %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x4d,0x58,0xbe,0x42,0x7f]
+          vfnmsub231ps 508(%rdx){1to16}, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps 512(%rdx){1to16}, %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x4d,0x58,0xbe,0x82,0x00,0x02,0x00,0x00]
+          vfnmsub231ps 512(%rdx){1to16}, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps -512(%rdx){1to16}, %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x4d,0x58,0xbe,0x42,0x80]
+          vfnmsub231ps -512(%rdx){1to16}, %zmm6, %zmm8
+
+// CHECK: vfnmsub231ps -516(%rdx){1to16}, %zmm6, %zmm8
+// CHECK:  encoding: [0x62,0x72,0x4d,0x58,0xbe,0x82,0xfc,0xfd,0xff,0xff]
+          vfnmsub231ps -516(%rdx){1to16}, %zmm6, %zmm8
+
+// CHECK: vfnmsub231pd %zmm24, %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x02,0xdd,0x48,0xbe,0xe8]
+          vfnmsub231pd %zmm24, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd %zmm24, %zmm4, %zmm29 {%k7}
+// CHECK:  encoding: [0x62,0x02,0xdd,0x4f,0xbe,0xe8]
+          vfnmsub231pd %zmm24, %zmm4, %zmm29 {%k7}
+
+// CHECK: vfnmsub231pd %zmm24, %zmm4, %zmm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x02,0xdd,0xcf,0xbe,0xe8]
+          vfnmsub231pd %zmm24, %zmm4, %zmm29 {%k7} {z}
+
+// CHECK: vfnmsub231pd {rn-sae}, %zmm24, %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x02,0xdd,0x18,0xbe,0xe8]
+          vfnmsub231pd {rn-sae}, %zmm24, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd {ru-sae}, %zmm24, %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x02,0xdd,0x58,0xbe,0xe8]
+          vfnmsub231pd {ru-sae}, %zmm24, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd {rd-sae}, %zmm24, %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x02,0xdd,0x38,0xbe,0xe8]
+          vfnmsub231pd {rd-sae}, %zmm24, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd {rz-sae}, %zmm24, %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x02,0xdd,0x78,0xbe,0xe8]
+          vfnmsub231pd {rz-sae}, %zmm24, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd (%rcx), %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x48,0xbe,0x29]
+          vfnmsub231pd (%rcx), %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd 291(%rax,%r14,8), %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x22,0xdd,0x48,0xbe,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub231pd 291(%rax,%r14,8), %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd (%rcx){1to8}, %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x58,0xbe,0x29]
+          vfnmsub231pd (%rcx){1to8}, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd 8128(%rdx), %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x48,0xbe,0x6a,0x7f]
+          vfnmsub231pd 8128(%rdx), %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd 8192(%rdx), %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x48,0xbe,0xaa,0x00,0x20,0x00,0x00]
+          vfnmsub231pd 8192(%rdx), %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd -8192(%rdx), %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x48,0xbe,0x6a,0x80]
+          vfnmsub231pd -8192(%rdx), %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd -8256(%rdx), %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x48,0xbe,0xaa,0xc0,0xdf,0xff,0xff]
+          vfnmsub231pd -8256(%rdx), %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd 1016(%rdx){1to8}, %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x58,0xbe,0x6a,0x7f]
+          vfnmsub231pd 1016(%rdx){1to8}, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd 1024(%rdx){1to8}, %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x58,0xbe,0xaa,0x00,0x04,0x00,0x00]
+          vfnmsub231pd 1024(%rdx){1to8}, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd -1024(%rdx){1to8}, %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x58,0xbe,0x6a,0x80]
+          vfnmsub231pd -1024(%rdx){1to8}, %zmm4, %zmm29
+
+// CHECK: vfnmsub231pd -1032(%rdx){1to8}, %zmm4, %zmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x58,0xbe,0xaa,0xf8,0xfb,0xff,0xff]
+          vfnmsub231pd -1032(%rdx){1to8}, %zmm4, %zmm29
+
+
+// CHECK: vfmadd231ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x1d,0xc3,0xb8,0x9a,0x00,0x20,0x00,0x00]
+          vfmadd231ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd231ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x1d,0x93,0xb8,0xd9]
+          vfmadd231ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd231ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x5d,0xd3,0xb8,0x82,0xf8,0xfb,0xff,0xff]
+          vfmadd231ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z}
+
+// CHECK: vfmadd231pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x9d,0xc3,0xb8,0x9a,0x00,0x20,0x00,0x00]
+          vfmadd231pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd231pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x9d,0x93,0xb8,0xd9]
+          vfmadd231pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd231pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0xdd,0xd3,0xb8,0x82,0xf8,0xfb,0xff,0xff]
+          vfmadd231pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z}
+
+// CHECK: vfmadd213ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x1d,0xc3,0xa8,0x9a,0x00,0x20,0x00,0x00]
+          vfmadd213ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd213ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x1d,0x93,0xa8,0xd9]
+          vfmadd213ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd213ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x5d,0xd3,0xa8,0x82,0xf8,0xfb,0xff,0xff]
+          vfmadd213ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z}
+
+// CHECK: vfmadd213pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x9d,0xc3,0xa8,0x9a,0x00,0x20,0x00,0x00]
+          vfmadd213pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd213pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x9d,0x93,0xa8,0xd9]
+          vfmadd213pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd213pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0xdd,0xd3,0xa8,0x82,0xf8,0xfb,0xff,0xff]
+          vfmadd213pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z}
+
+// CHECK: vfmadd132ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x1d,0xc3,0x98,0x9a,0x00,0x20,0x00,0x00]
+          vfmadd132ps 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd132ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x1d,0x93,0x98,0xd9]
+          vfmadd132ps {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd132ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x5d,0xd3,0x98,0x82,0xf8,0xfb,0xff,0xff]
+          vfmadd132ps -1032(%rdx){1to16}, %zmm20, %zmm24 {%k3} {z}
+
+// CHECK: vfmadd132pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0x9d,0xc3,0x98,0x9a,0x00,0x20,0x00,0x00]
+          vfmadd132pd 8192(%rdx), %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd132pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x9d,0x93,0x98,0xd9]
+          vfmadd132pd {rn-sae}, %zmm25, %zmm28, %zmm27 {%k3} {z}
+
+// CHECK: vfmadd132pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x62,0xdd,0xd3,0x98,0x82,0xf8,0xfb,0xff,0xff]
+          vfmadd132pd -1032(%rdx){1to8}, %zmm20, %zmm24 {%k3} {z}
+
 // CHECK: vpermi2d %zmm4, %zmm28, %zmm10
 // CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0xd4]
           vpermi2d %zmm4, %zmm28, %zmm10
 
-// CHECK: vpermi2d %zmm4, %zmm28, %zmm10 {%k5}
-// CHECK:  encoding: [0x62,0x72,0x1d,0x45,0x76,0xd4]
-          vpermi2d %zmm4, %zmm28, %zmm10 {%k5}
+// CHECK: vpermi2d %zmm4, %zmm28, %zmm10 {%k5}
+// CHECK:  encoding: [0x62,0x72,0x1d,0x45,0x76,0xd4]
+          vpermi2d %zmm4, %zmm28, %zmm10 {%k5}
+
+// CHECK: vpermi2d %zmm4, %zmm28, %zmm10 {%k5} {z}
+// CHECK:  encoding: [0x62,0x72,0x1d,0xc5,0x76,0xd4]
+          vpermi2d %zmm4, %zmm28, %zmm10 {%k5} {z}
+
+// CHECK: vpermi2d (%rcx), %zmm28, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x11]
+          vpermi2d (%rcx), %zmm28, %zmm10
+
+// CHECK: vpermi2d 291(%rax,%r14,8), %zmm28, %zmm10
+// CHECK:  encoding: [0x62,0x32,0x1d,0x40,0x76,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpermi2d 291(%rax,%r14,8), %zmm28, %zmm10
+
+// CHECK: vpermi2d (%rcx){1to16}, %zmm28, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x11]
+          vpermi2d (%rcx){1to16}, %zmm28, %zmm10
+
+// CHECK: vpermi2d 8128(%rdx), %zmm28, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x52,0x7f]
+          vpermi2d 8128(%rdx), %zmm28, %zmm10
+
+// CHECK: vpermi2d 8192(%rdx), %zmm28, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x92,0x00,0x20,0x00,0x00]
+          vpermi2d 8192(%rdx), %zmm28, %zmm10
+
+// CHECK: vpermi2d -8192(%rdx), %zmm28, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x52,0x80]
+          vpermi2d -8192(%rdx), %zmm28, %zmm10
+
+// CHECK: vpermi2d -8256(%rdx), %zmm28, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x92,0xc0,0xdf,0xff,0xff]
+          vpermi2d -8256(%rdx), %zmm28, %zmm10
+
+// CHECK: vpermi2d 508(%rdx){1to16}, %zmm28, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x52,0x7f]
+          vpermi2d 508(%rdx){1to16}, %zmm28, %zmm10
+
+// CHECK: vpermi2d 512(%rdx){1to16}, %zmm28, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x92,0x00,0x02,0x00,0x00]
+          vpermi2d 512(%rdx){1to16}, %zmm28, %zmm10
+
+// CHECK: vpermi2d -512(%rdx){1to16}, %zmm28, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x52,0x80]
+          vpermi2d -512(%rdx){1to16}, %zmm28, %zmm10
+
+// CHECK: vpermi2d -516(%rdx){1to16}, %zmm28, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x92,0xfc,0xfd,0xff,0xff]
+          vpermi2d -516(%rdx){1to16}, %zmm28, %zmm10
+
+// CHECK: vpermi2q %zmm28, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0x82,0x9d,0x40,0x76,0xd4]
+          vpermi2q %zmm28, %zmm28, %zmm18
+
+// CHECK: vpermi2q %zmm28, %zmm28, %zmm18 {%k2}
+// CHECK:  encoding: [0x62,0x82,0x9d,0x42,0x76,0xd4]
+          vpermi2q %zmm28, %zmm28, %zmm18 {%k2}
+
+// CHECK: vpermi2q %zmm28, %zmm28, %zmm18 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0x9d,0xc2,0x76,0xd4]
+          vpermi2q %zmm28, %zmm28, %zmm18 {%k2} {z}
+
+// CHECK: vpermi2q (%rcx), %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x11]
+          vpermi2q (%rcx), %zmm28, %zmm18
+
+// CHECK: vpermi2q 291(%rax,%r14,8), %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x40,0x76,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpermi2q 291(%rax,%r14,8), %zmm28, %zmm18
+
+// CHECK: vpermi2q (%rcx){1to8}, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x11]
+          vpermi2q (%rcx){1to8}, %zmm28, %zmm18
+
+// CHECK: vpermi2q 8128(%rdx), %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x52,0x7f]
+          vpermi2q 8128(%rdx), %zmm28, %zmm18
+
+// CHECK: vpermi2q 8192(%rdx), %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x92,0x00,0x20,0x00,0x00]
+          vpermi2q 8192(%rdx), %zmm28, %zmm18
+
+// CHECK: vpermi2q -8192(%rdx), %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x52,0x80]
+          vpermi2q -8192(%rdx), %zmm28, %zmm18
+
+// CHECK: vpermi2q -8256(%rdx), %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x92,0xc0,0xdf,0xff,0xff]
+          vpermi2q -8256(%rdx), %zmm28, %zmm18
+
+// CHECK: vpermi2q 1016(%rdx){1to8}, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x52,0x7f]
+          vpermi2q 1016(%rdx){1to8}, %zmm28, %zmm18
+
+// CHECK: vpermi2q 1024(%rdx){1to8}, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x92,0x00,0x04,0x00,0x00]
+          vpermi2q 1024(%rdx){1to8}, %zmm28, %zmm18
+
+// CHECK: vpermi2q -1024(%rdx){1to8}, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x52,0x80]
+          vpermi2q -1024(%rdx){1to8}, %zmm28, %zmm18
+
+// CHECK: vpermi2q -1032(%rdx){1to8}, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x92,0xf8,0xfb,0xff,0xff]
+          vpermi2q -1032(%rdx){1to8}, %zmm28, %zmm18
+
+// CHECK: vpermi2ps %zmm8, %zmm23, %zmm24
+// CHECK:  encoding: [0x62,0x42,0x45,0x40,0x77,0xc0]
+          vpermi2ps %zmm8, %zmm23, %zmm24
+
+// CHECK: vpermi2ps %zmm8, %zmm23, %zmm24 {%k2}
+// CHECK:  encoding: [0x62,0x42,0x45,0x42,0x77,0xc0]
+          vpermi2ps %zmm8, %zmm23, %zmm24 {%k2}
+
+// CHECK: vpermi2ps %zmm8, %zmm23, %zmm24 {%k2} {z}
+// CHECK:  encoding: [0x62,0x42,0x45,0xc2,0x77,0xc0]
+          vpermi2ps %zmm8, %zmm23, %zmm24 {%k2} {z}
+
+// CHECK: vpermi2ps (%rcx), %zmm23, %zmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x01]
+          vpermi2ps (%rcx), %zmm23, %zmm24
+
+// CHECK: vpermi2ps 291(%rax,%r14,8), %zmm23, %zmm24
+// CHECK:  encoding: [0x62,0x22,0x45,0x40,0x77,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpermi2ps 291(%rax,%r14,8), %zmm23, %zmm24
+
+// CHECK: vpermi2ps (%rcx){1to16}, %zmm23, %zmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x01]
+          vpermi2ps (%rcx){1to16}, %zmm23, %zmm24
+
+// CHECK: vpermi2ps 8128(%rdx), %zmm23, %zmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x42,0x7f]
+          vpermi2ps 8128(%rdx), %zmm23, %zmm24
+
+// CHECK: vpermi2ps 8192(%rdx), %zmm23, %zmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x82,0x00,0x20,0x00,0x00]
+          vpermi2ps 8192(%rdx), %zmm23, %zmm24
+
+// CHECK: vpermi2ps -8192(%rdx), %zmm23, %zmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x42,0x80]
+          vpermi2ps -8192(%rdx), %zmm23, %zmm24
+
+// CHECK: vpermi2ps -8256(%rdx), %zmm23, %zmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x82,0xc0,0xdf,0xff,0xff]
+          vpermi2ps -8256(%rdx), %zmm23, %zmm24
+
+// CHECK: vpermi2ps 508(%rdx){1to16}, %zmm23, %zmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x42,0x7f]
+          vpermi2ps 508(%rdx){1to16}, %zmm23, %zmm24
+
+// CHECK: vpermi2ps 512(%rdx){1to16}, %zmm23, %zmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x82,0x00,0x02,0x00,0x00]
+          vpermi2ps 512(%rdx){1to16}, %zmm23, %zmm24
+
+// CHECK: vpermi2ps -512(%rdx){1to16}, %zmm23, %zmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x42,0x80]
+          vpermi2ps -512(%rdx){1to16}, %zmm23, %zmm24
+
+// CHECK: vpermi2ps -516(%rdx){1to16}, %zmm23, %zmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x82,0xfc,0xfd,0xff,0xff]
+          vpermi2ps -516(%rdx){1to16}, %zmm23, %zmm24
+
+// CHECK: vpermi2pd %zmm20, %zmm5, %zmm20
+// CHECK:  encoding: [0x62,0xa2,0xd5,0x48,0x77,0xe4]
+          vpermi2pd %zmm20, %zmm5, %zmm20
+
+// CHECK: vpermi2pd %zmm20, %zmm5, %zmm20 {%k3}
+// CHECK:  encoding: [0x62,0xa2,0xd5,0x4b,0x77,0xe4]
+          vpermi2pd %zmm20, %zmm5, %zmm20 {%k3}
+
+// CHECK: vpermi2pd %zmm20, %zmm5, %zmm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa2,0xd5,0xcb,0x77,0xe4]
+          vpermi2pd %zmm20, %zmm5, %zmm20 {%k3} {z}
+
+// CHECK: vpermi2pd (%rcx), %zmm5, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0x21]
+          vpermi2pd (%rcx), %zmm5, %zmm20
+
+// CHECK: vpermi2pd 291(%rax,%r14,8), %zmm5, %zmm20
+// CHECK:  encoding: [0x62,0xa2,0xd5,0x48,0x77,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpermi2pd 291(%rax,%r14,8), %zmm5, %zmm20
+
+// CHECK: vpermi2pd (%rcx){1to8}, %zmm5, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0x21]
+          vpermi2pd (%rcx){1to8}, %zmm5, %zmm20
+
+// CHECK: vpermi2pd 8128(%rdx), %zmm5, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0x62,0x7f]
+          vpermi2pd 8128(%rdx), %zmm5, %zmm20
+
+// CHECK: vpermi2pd 8192(%rdx), %zmm5, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0xa2,0x00,0x20,0x00,0x00]
+          vpermi2pd 8192(%rdx), %zmm5, %zmm20
+
+// CHECK: vpermi2pd -8192(%rdx), %zmm5, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0x62,0x80]
+          vpermi2pd -8192(%rdx), %zmm5, %zmm20
+
+// CHECK: vpermi2pd -8256(%rdx), %zmm5, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0xa2,0xc0,0xdf,0xff,0xff]
+          vpermi2pd -8256(%rdx), %zmm5, %zmm20
+
+// CHECK: vpermi2pd 1016(%rdx){1to8}, %zmm5, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0x62,0x7f]
+          vpermi2pd 1016(%rdx){1to8}, %zmm5, %zmm20
+
+// CHECK: vpermi2pd 1024(%rdx){1to8}, %zmm5, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0xa2,0x00,0x04,0x00,0x00]
+          vpermi2pd 1024(%rdx){1to8}, %zmm5, %zmm20
+
+// CHECK: vpermi2pd -1024(%rdx){1to8}, %zmm5, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0x62,0x80]
+          vpermi2pd -1024(%rdx){1to8}, %zmm5, %zmm20
+
+// CHECK: vpermi2pd -1032(%rdx){1to8}, %zmm5, %zmm20
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0xa2,0xf8,0xfb,0xff,0xff]
+          vpermi2pd -1032(%rdx){1to8}, %zmm5, %zmm20
+
+// CHECK: vcompresspd %zmm9, (%rcx)
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x8a,0x09]
+          vcompresspd %zmm9, (%rcx)
+
+// CHECK: vcompresspd %zmm9, (%rcx) {%k4}
+// CHECK:  encoding: [0x62,0x72,0xfd,0x4c,0x8a,0x09]
+          vcompresspd %zmm9, (%rcx) {%k4}
+
+// CHECK: vcompresspd %zmm9, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x32,0xfd,0x48,0x8a,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vcompresspd %zmm9, 291(%rax,%r14,8)
+
+// CHECK: vcompresspd %zmm9, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x8a,0x4a,0x7f]
+          vcompresspd %zmm9, 1016(%rdx)
+
+// CHECK: vcompresspd %zmm9, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x8a,0x8a,0x00,0x04,0x00,0x00]
+          vcompresspd %zmm9, 1024(%rdx)
+
+// CHECK: vcompresspd %zmm9, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x8a,0x4a,0x80]
+          vcompresspd %zmm9, -1024(%rdx)
+
+// CHECK: vcompresspd %zmm9, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x8a,0x8a,0xf8,0xfb,0xff,0xff]
+          vcompresspd %zmm9, -1032(%rdx)
+
+// CHECK: vcompresspd %zmm4, %zmm8
+// CHECK:  encoding: [0x62,0xd2,0xfd,0x48,0x8a,0xe0]
+          vcompresspd %zmm4, %zmm8
+
+// CHECK: vcompresspd %zmm4, %zmm8 {%k6}
+// CHECK:  encoding: [0x62,0xd2,0xfd,0x4e,0x8a,0xe0]
+          vcompresspd %zmm4, %zmm8 {%k6}
+
+// CHECK: vcompresspd %zmm4, %zmm8 {%k6} {z}
+// CHECK:  encoding: [0x62,0xd2,0xfd,0xce,0x8a,0xe0]
+          vcompresspd %zmm4, %zmm8 {%k6} {z}
+
+// CHECK: vcompressps %zmm10, (%rcx)
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x8a,0x11]
+          vcompressps %zmm10, (%rcx)
+
+// CHECK: vcompressps %zmm10, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x72,0x7d,0x4f,0x8a,0x11]
+          vcompressps %zmm10, (%rcx) {%k7}
+
+// CHECK: vcompressps %zmm10, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x32,0x7d,0x48,0x8a,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vcompressps %zmm10, 291(%rax,%r14,8)
+
+// CHECK: vcompressps %zmm10, 508(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x8a,0x52,0x7f]
+          vcompressps %zmm10, 508(%rdx)
+
+// CHECK: vcompressps %zmm10, 512(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x8a,0x92,0x00,0x02,0x00,0x00]
+          vcompressps %zmm10, 512(%rdx)
+
+// CHECK: vcompressps %zmm10, -512(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x8a,0x52,0x80]
+          vcompressps %zmm10, -512(%rdx)
+
+// CHECK: vcompressps %zmm10, -516(%rdx)
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x8a,0x92,0xfc,0xfd,0xff,0xff]
+          vcompressps %zmm10, -516(%rdx)
+
+// CHECK: vcompressps %zmm14, %zmm4
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x8a,0xf4]
+          vcompressps %zmm14, %zmm4
+
+// CHECK: vcompressps %zmm14, %zmm4 {%k2}
+// CHECK:  encoding: [0x62,0x72,0x7d,0x4a,0x8a,0xf4]
+          vcompressps %zmm14, %zmm4 {%k2}
+
+// CHECK: vcompressps %zmm14, %zmm4 {%k2} {z}
+// CHECK:  encoding: [0x62,0x72,0x7d,0xca,0x8a,0xf4]
+          vcompressps %zmm14, %zmm4 {%k2} {z}
+
+// CHECK: vexpandpd (%rcx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x88,0x01]
+          vexpandpd (%rcx), %zmm24
+
+// CHECK: vexpandpd (%rcx), %zmm24 {%k4}
+// CHECK:  encoding: [0x62,0x62,0xfd,0x4c,0x88,0x01]
+          vexpandpd (%rcx), %zmm24 {%k4}
+
+// CHECK: vexpandpd (%rcx), %zmm24 {%k4} {z}
+// CHECK:  encoding: [0x62,0x62,0xfd,0xcc,0x88,0x01]
+          vexpandpd (%rcx), %zmm24 {%k4} {z}
+
+// CHECK: vexpandpd 291(%rax,%r14,8), %zmm24
+// CHECK:  encoding: [0x62,0x22,0xfd,0x48,0x88,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vexpandpd 291(%rax,%r14,8), %zmm24
+
+// CHECK: vexpandpd 1016(%rdx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x88,0x42,0x7f]
+          vexpandpd 1016(%rdx), %zmm24
+
+// CHECK: vexpandpd 1024(%rdx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x88,0x82,0x00,0x04,0x00,0x00]
+          vexpandpd 1024(%rdx), %zmm24
+
+// CHECK: vexpandpd -1024(%rdx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x88,0x42,0x80]
+          vexpandpd -1024(%rdx), %zmm24
+
+// CHECK: vexpandpd -1032(%rdx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x88,0x82,0xf8,0xfb,0xff,0xff]
+          vexpandpd -1032(%rdx), %zmm24
+
+// CHECK: vexpandpd %zmm15, %zmm23
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x48,0x88,0xff]
+          vexpandpd %zmm15, %zmm23
+
+// CHECK: vexpandpd %zmm15, %zmm23 {%k5}
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x4d,0x88,0xff]
+          vexpandpd %zmm15, %zmm23 {%k5}
+
+// CHECK: vexpandpd %zmm15, %zmm23 {%k5} {z}
+// CHECK:  encoding: [0x62,0xc2,0xfd,0xcd,0x88,0xff]
+          vexpandpd %zmm15, %zmm23 {%k5} {z}
+
+// CHECK: vexpandps (%rcx), %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x88,0x21]
+          vexpandps (%rcx), %zmm4
+
+// CHECK: vexpandps (%rcx), %zmm4 {%k6}
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x4e,0x88,0x21]
+          vexpandps (%rcx), %zmm4 {%k6}
+
+// CHECK: vexpandps (%rcx), %zmm4 {%k6} {z}
+// CHECK:  encoding: [0x62,0xf2,0x7d,0xce,0x88,0x21]
+          vexpandps (%rcx), %zmm4 {%k6} {z}
+
+// CHECK: vexpandps 291(%rax,%r14,8), %zmm4
+// CHECK:  encoding: [0x62,0xb2,0x7d,0x48,0x88,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vexpandps 291(%rax,%r14,8), %zmm4
+
+// CHECK: vexpandps 508(%rdx), %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x88,0x62,0x7f]
+          vexpandps 508(%rdx), %zmm4
+
+// CHECK: vexpandps 512(%rdx), %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x88,0xa2,0x00,0x02,0x00,0x00]
+          vexpandps 512(%rdx), %zmm4
+
+// CHECK: vexpandps -512(%rdx), %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x88,0x62,0x80]
+          vexpandps -512(%rdx), %zmm4
+
+// CHECK: vexpandps -516(%rdx), %zmm4
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x88,0xa2,0xfc,0xfd,0xff,0xff]
+          vexpandps -516(%rdx), %zmm4
+
+// CHECK: vexpandps %zmm9, %zmm14
+// CHECK:  encoding: [0x62,0x52,0x7d,0x48,0x88,0xf1]
+          vexpandps %zmm9, %zmm14
+
+// CHECK: vexpandps %zmm9, %zmm14 {%k2}
+// CHECK:  encoding: [0x62,0x52,0x7d,0x4a,0x88,0xf1]
+          vexpandps %zmm9, %zmm14 {%k2}
+
+// CHECK: vexpandps %zmm9, %zmm14 {%k2} {z}
+// CHECK:  encoding: [0x62,0x52,0x7d,0xca,0x88,0xf1]
+          vexpandps %zmm9, %zmm14 {%k2} {z}
+
+// CHECK: vpabsd %zmm14, %zmm15
+// CHECK:  encoding: [0x62,0x52,0x7d,0x48,0x1e,0xfe]
+          vpabsd %zmm14, %zmm15
+
+// CHECK: vpabsd %zmm14, %zmm15 {%k6}
+// CHECK:  encoding: [0x62,0x52,0x7d,0x4e,0x1e,0xfe]
+          vpabsd %zmm14, %zmm15 {%k6}
+
+// CHECK: vpabsd %zmm14, %zmm15 {%k6} {z}
+// CHECK:  encoding: [0x62,0x52,0x7d,0xce,0x1e,0xfe]
+          vpabsd %zmm14, %zmm15 {%k6} {z}
+
+// CHECK: vpabsd (%rcx), %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x1e,0x39]
+          vpabsd (%rcx), %zmm15
+
+// CHECK: vpabsd 291(%rax,%r14,8), %zmm15
+// CHECK:  encoding: [0x62,0x32,0x7d,0x48,0x1e,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpabsd 291(%rax,%r14,8), %zmm15
+
+// CHECK: vpabsd (%rcx){1to16}, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x1e,0x39]
+          vpabsd (%rcx){1to16}, %zmm15
+
+// CHECK: vpabsd 8128(%rdx), %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x1e,0x7a,0x7f]
+          vpabsd 8128(%rdx), %zmm15
+
+// CHECK: vpabsd 8192(%rdx), %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x1e,0xba,0x00,0x20,0x00,0x00]
+          vpabsd 8192(%rdx), %zmm15
+
+// CHECK: vpabsd -8192(%rdx), %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x1e,0x7a,0x80]
+          vpabsd -8192(%rdx), %zmm15
+
+// CHECK: vpabsd -8256(%rdx), %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x1e,0xba,0xc0,0xdf,0xff,0xff]
+          vpabsd -8256(%rdx), %zmm15
+
+// CHECK: vpabsd 508(%rdx){1to16}, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x1e,0x7a,0x7f]
+          vpabsd 508(%rdx){1to16}, %zmm15
+
+// CHECK: vpabsd 512(%rdx){1to16}, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x1e,0xba,0x00,0x02,0x00,0x00]
+          vpabsd 512(%rdx){1to16}, %zmm15
+
+// CHECK: vpabsd -512(%rdx){1to16}, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x1e,0x7a,0x80]
+          vpabsd -512(%rdx){1to16}, %zmm15
+
+// CHECK: vpabsd -516(%rdx){1to16}, %zmm15
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x1e,0xba,0xfc,0xfd,0xff,0xff]
+          vpabsd -516(%rdx){1to16}, %zmm15
+
+// CHECK: vpabsq %zmm24, %zmm5
+// CHECK:  encoding: [0x62,0x92,0xfd,0x48,0x1f,0xe8]
+          vpabsq %zmm24, %zmm5
+
+// CHECK: vpabsq %zmm24, %zmm5 {%k6}
+// CHECK:  encoding: [0x62,0x92,0xfd,0x4e,0x1f,0xe8]
+          vpabsq %zmm24, %zmm5 {%k6}
+
+// CHECK: vpabsq %zmm24, %zmm5 {%k6} {z}
+// CHECK:  encoding: [0x62,0x92,0xfd,0xce,0x1f,0xe8]
+          vpabsq %zmm24, %zmm5 {%k6} {z}
+
+// CHECK: vpabsq (%rcx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x1f,0x29]
+          vpabsq (%rcx), %zmm5
+
+// CHECK: vpabsq 291(%rax,%r14,8), %zmm5
+// CHECK:  encoding: [0x62,0xb2,0xfd,0x48,0x1f,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpabsq 291(%rax,%r14,8), %zmm5
+
+// CHECK: vpabsq (%rcx){1to8}, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x58,0x1f,0x29]
+          vpabsq (%rcx){1to8}, %zmm5
+
+// CHECK: vpabsq 8128(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x1f,0x6a,0x7f]
+          vpabsq 8128(%rdx), %zmm5
+
+// CHECK: vpabsq 8192(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xaa,0x00,0x20,0x00,0x00]
+          vpabsq 8192(%rdx), %zmm5
+
+// CHECK: vpabsq -8192(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x1f,0x6a,0x80]
+          vpabsq -8192(%rdx), %zmm5
+
+// CHECK: vpabsq -8256(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xaa,0xc0,0xdf,0xff,0xff]
+          vpabsq -8256(%rdx), %zmm5
+
+// CHECK: vpabsq 1016(%rdx){1to8}, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x58,0x1f,0x6a,0x7f]
+          vpabsq 1016(%rdx){1to8}, %zmm5
+
+// CHECK: vpabsq 1024(%rdx){1to8}, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x58,0x1f,0xaa,0x00,0x04,0x00,0x00]
+          vpabsq 1024(%rdx){1to8}, %zmm5
+
+// CHECK: vpabsq -1024(%rdx){1to8}, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x58,0x1f,0x6a,0x80]
+          vpabsq -1024(%rdx){1to8}, %zmm5
+
+// CHECK: vpabsq -1032(%rdx){1to8}, %zmm5
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x58,0x1f,0xaa,0xf8,0xfb,0xff,0xff]
+          vpabsq -1032(%rdx){1to8}, %zmm5
+
+// CHECK: vpgatherdd 123(%r14,%zmm11,8), %zmm17 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x49,0x90,0x8c,0xde,0x7b,0x00,0x00,0x00]
+          vpgatherdd 123(%r14, %zmm11,8), %zmm17 {%k1}
+
+// CHECK: vpgatherdd 256(%r9,%zmm11), %zmm17 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x49,0x90,0x4c,0x19,0x40]
+          vpgatherdd 256(%r9,%zmm11), %zmm17 {%k1}
+
+// CHECK: vpgatherdd 1024(%rcx,%zmm11,4), %zmm17 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x49,0x90,0x8c,0x99,0x00,0x04,0x00,0x00]
+          vpgatherdd 1024(%rcx, %zmm11,4), %zmm17 {%k1}
+
+// CHECK: vpgatherdq 123(%r14,%ymm14,8), %zmm8 {%k1}
+// CHECK:  encoding: [0x62,0x12,0xfd,0x49,0x90,0x84,0xf6,0x7b,0x00,0x00,0x00]
+          vpgatherdq 123(%r14, %ymm14,8), %zmm8 {%k1}
+
+// CHECK: vpgatherdq 256(%r9,%ymm14), %zmm8 {%k1}
+// CHECK:  encoding: [0x62,0x12,0xfd,0x49,0x90,0x44,0x31,0x20]
+          vpgatherdq 256(%r9, %ymm14), %zmm8 {%k1}
+
+// CHECK: vpgatherdq 1024(%rcx,%ymm14,4), %zmm8 {%k1}
+// CHECK:  encoding: [0x62,0x32,0xfd,0x49,0x90,0x84,0xb1,0x00,0x04,0x00,0x00]
+          vpgatherdq 1024(%rcx, %ymm14,4), %zmm8 {%k1}
+
+// CHECK: vpgatherqd 123(%r14,%zmm17,8), %ymm3 {%k1}
+// CHECK:  encoding: [0x62,0xd2,0x7d,0x41,0x91,0x9c,0xce,0x7b,0x00,0x00,0x00]
+          vpgatherqd 123(%r14, %zmm17,8), %ymm3 {%k1}
+
+// CHECK: vpgatherqd 256(%r9,%zmm17), %ymm3 {%k1}
+// CHECK:  encoding: [0x62,0xd2,0x7d,0x41,0x91,0x5c,0x09,0x40]
+          vpgatherqd 256(%r9,%zmm17), %ymm3 {%k1}
+
+// CHECK: vpgatherqd 1024(%rcx,%zmm17,4), %ymm3 {%k1}
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x41,0x91,0x9c,0x89,0x00,0x04,0x00,0x00]
+          vpgatherqd 1024(%rcx, %zmm17,4), %ymm3 {%k1}
+
+// CHECK: vpgatherqq 123(%r14,%zmm21,8), %zmm17 {%k1}
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x41,0x91,0x8c,0xee,0x7b,0x00,0x00,0x00]
+          vpgatherqq 123(%r14, %zmm21,8), %zmm17 {%k1}
+
+// CHECK: vpgatherqq 256(%r9,%zmm21), %zmm17 {%k1}
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x41,0x91,0x4c,0x29,0x20]
+          vpgatherqq 256(%r9,%zmm21), %zmm17 {%k1}
+
+// CHECK: vpgatherqq 1024(%rcx,%zmm21,4), %zmm17 {%k1}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x41,0x91,0x8c,0xa9,0x00,0x04,0x00,0x00]
+          vpgatherqq 1024(%rcx, %zmm21,4), %zmm17 {%k1}
+
+// CHECK: vpscatterdd %zmm19, 123(%r14,%zmm16,8) {%k1}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x41,0xa0,0x9c,0xc6,0x7b,0x00,0x00,0x00]
+          vpscatterdd %zmm19, 123(%r14,%zmm16,8) {%k1}
+
+// CHECK: vpscatterdd %zmm19, 123(%r14,%zmm16,8) {%k1}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x41,0xa0,0x9c,0xc6,0x7b,0x00,0x00,0x00]
+          vpscatterdd %zmm19, 123(%r14,%zmm16,8) {%k1}
+
+// CHECK: vpscatterdd %zmm19, 256(%r9,%zmm16) {%k1}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x41,0xa0,0x5c,0x01,0x40]
+          vpscatterdd %zmm19, 256(%r9,%zmm16) {%k1}
+
+// CHECK: vpscatterdd %zmm19, 1024(%rcx,%zmm16,4) {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x41,0xa0,0x9c,0x81,0x00,0x04,0x00,0x00]
+          vpscatterdd %zmm19, 1024(%rcx,%zmm16,4) {%k1}
+
+// CHECK: vpscatterdq %zmm5, 123(%r14,%ymm6,8) {%k1}
+// CHECK:  encoding: [0x62,0xd2,0xfd,0x49,0xa0,0xac,0xf6,0x7b,0x00,0x00,0x00]
+          vpscatterdq %zmm5, 123(%r14,%ymm6,8) {%k1}
+
+// CHECK: vpscatterdq %zmm5, 123(%r14,%ymm6,8) {%k1}
+// CHECK:  encoding: [0x62,0xd2,0xfd,0x49,0xa0,0xac,0xf6,0x7b,0x00,0x00,0x00]
+          vpscatterdq %zmm5, 123(%r14,%ymm6,8) {%k1}
+
+// CHECK: vpscatterdq %zmm5, 256(%r9,%ymm6) {%k1}
+// CHECK:  encoding: [0x62,0xd2,0xfd,0x49,0xa0,0x6c,0x31,0x20]
+          vpscatterdq %zmm5, 256(%r9,%ymm6) {%k1}
+
+// CHECK: vpscatterdq %zmm5, 1024(%rcx,%ymm6,4) {%k1}
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x49,0xa0,0xac,0xb1,0x00,0x04,0x00,0x00]
+          vpscatterdq %zmm5, 1024(%rcx,%ymm6,4) {%k1}
+
+// CHECK: vpscatterqd %ymm20, 123(%r14,%zmm2,8) {%k1}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x49,0xa1,0xa4,0xd6,0x7b,0x00,0x00,0x00]
+          vpscatterqd %ymm20, 123(%r14,%zmm2,8) {%k1}
+
+// CHECK: vpscatterqd %ymm20, 123(%r14,%zmm2,8) {%k1}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x49,0xa1,0xa4,0xd6,0x7b,0x00,0x00,0x00]
+          vpscatterqd %ymm20, 123(%r14,%zmm2,8) {%k1}
+
+// CHECK: vpscatterqd %ymm20, 256(%r9,%zmm2) {%k1}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x49,0xa1,0x64,0x11,0x40]
+          vpscatterqd %ymm20, 256(%r9,%zmm2) {%k1}
+
+// CHECK: vpscatterqd %ymm20, 1024(%rcx,%zmm2,4) {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x49,0xa1,0xa4,0x91,0x00,0x04,0x00,0x00]
+          vpscatterqd %ymm20, 1024(%rcx,%zmm2,4) {%k1}
+
+// CHECK: vpscatterqq %zmm14, 123(%r14,%zmm20,8) {%k1}
+// CHECK:  encoding: [0x62,0x52,0xfd,0x41,0xa1,0xb4,0xe6,0x7b,0x00,0x00,0x00]
+          vpscatterqq %zmm14, 123(%r14,%zmm20,8) {%k1}
+
+// CHECK: vpscatterqq %zmm14, 123(%r14,%zmm20,8) {%k1}
+// CHECK:  encoding: [0x62,0x52,0xfd,0x41,0xa1,0xb4,0xe6,0x7b,0x00,0x00,0x00]
+          vpscatterqq %zmm14, 123(%r14,%zmm20,8) {%k1}
+
+// CHECK: vpscatterqq %zmm14, 256(%r9,%zmm20) {%k1}
+// CHECK:  encoding: [0x62,0x52,0xfd,0x41,0xa1,0x74,0x21,0x20]
+          vpscatterqq %zmm14, 256(%r9,%zmm20) {%k1}
+
+// CHECK: vpscatterqq %zmm14, 1024(%rcx,%zmm20,4) {%k1}
+// CHECK:  encoding: [0x62,0x72,0xfd,0x41,0xa1,0xb4,0xa1,0x00,0x04,0x00,0x00]
+          vpscatterqq %zmm14, 1024(%rcx,%zmm20,4) {%k1}
+// CHECK: vscalefpd %zmm28, %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x02,0xad,0x40,0x2c,0xd4]
+          vscalefpd %zmm28, %zmm26, %zmm26
+
+// CHECK: vscalefpd %zmm28, %zmm26, %zmm26 {%k5}
+// CHECK:  encoding: [0x62,0x02,0xad,0x45,0x2c,0xd4]
+          vscalefpd %zmm28, %zmm26, %zmm26 {%k5}
+
+// CHECK: vscalefpd %zmm28, %zmm26, %zmm26 {%k5} {z}
+// CHECK:  encoding: [0x62,0x02,0xad,0xc5,0x2c,0xd4]
+          vscalefpd %zmm28, %zmm26, %zmm26 {%k5} {z}
+
+// CHECK: vscalefpd {rn-sae}, %zmm28, %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x02,0xad,0x10,0x2c,0xd4]
+          vscalefpd {rn-sae}, %zmm28, %zmm26, %zmm26
+
+// CHECK: vscalefpd {ru-sae}, %zmm28, %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x02,0xad,0x50,0x2c,0xd4]
+          vscalefpd {ru-sae}, %zmm28, %zmm26, %zmm26
+
+// CHECK: vscalefpd {rd-sae}, %zmm28, %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x02,0xad,0x30,0x2c,0xd4]
+          vscalefpd {rd-sae}, %zmm28, %zmm26, %zmm26
+
+// CHECK: vscalefpd {rz-sae}, %zmm28, %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x02,0xad,0x70,0x2c,0xd4]
+          vscalefpd {rz-sae}, %zmm28, %zmm26, %zmm26
+
+// CHECK: vscalefpd (%rcx), %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x40,0x2c,0x11]
+          vscalefpd (%rcx), %zmm26, %zmm26
+
+// CHECK: vscalefpd 291(%rax,%r14,8), %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x22,0xad,0x40,0x2c,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vscalefpd 291(%rax,%r14,8), %zmm26, %zmm26
+
+// CHECK: vscalefpd (%rcx){1to8}, %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x50,0x2c,0x11]
+          vscalefpd (%rcx){1to8}, %zmm26, %zmm26
+
+// CHECK: vscalefpd 8128(%rdx), %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x40,0x2c,0x52,0x7f]
+          vscalefpd 8128(%rdx), %zmm26, %zmm26
+
+// CHECK: vscalefpd 8192(%rdx), %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x40,0x2c,0x92,0x00,0x20,0x00,0x00]
+          vscalefpd 8192(%rdx), %zmm26, %zmm26
+
+// CHECK: vscalefpd -8192(%rdx), %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x40,0x2c,0x52,0x80]
+          vscalefpd -8192(%rdx), %zmm26, %zmm26
+
+// CHECK: vscalefpd -8256(%rdx), %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x40,0x2c,0x92,0xc0,0xdf,0xff,0xff]
+          vscalefpd -8256(%rdx), %zmm26, %zmm26
+
+// CHECK: vscalefpd 1016(%rdx){1to8}, %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x50,0x2c,0x52,0x7f]
+          vscalefpd 1016(%rdx){1to8}, %zmm26, %zmm26
+
+// CHECK: vscalefpd 1024(%rdx){1to8}, %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x50,0x2c,0x92,0x00,0x04,0x00,0x00]
+          vscalefpd 1024(%rdx){1to8}, %zmm26, %zmm26
+
+// CHECK: vscalefpd -1024(%rdx){1to8}, %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x50,0x2c,0x52,0x80]
+          vscalefpd -1024(%rdx){1to8}, %zmm26, %zmm26
+
+// CHECK: vscalefpd -1032(%rdx){1to8}, %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x50,0x2c,0x92,0xf8,0xfb,0xff,0xff]
+          vscalefpd -1032(%rdx){1to8}, %zmm26, %zmm26
+
+// CHECK: vscalefps %zmm18, %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x48,0x2c,0xda]
+          vscalefps %zmm18, %zmm6, %zmm19
+
+// CHECK: vscalefps %zmm18, %zmm6, %zmm19 {%k6}
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x4e,0x2c,0xda]
+          vscalefps %zmm18, %zmm6, %zmm19 {%k6}
+
+// CHECK: vscalefps %zmm18, %zmm6, %zmm19 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa2,0x4d,0xce,0x2c,0xda]
+          vscalefps %zmm18, %zmm6, %zmm19 {%k6} {z}
+
+// CHECK: vscalefps {rn-sae}, %zmm18, %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x18,0x2c,0xda]
+          vscalefps {rn-sae}, %zmm18, %zmm6, %zmm19
+
+// CHECK: vscalefps {ru-sae}, %zmm18, %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x58,0x2c,0xda]
+          vscalefps {ru-sae}, %zmm18, %zmm6, %zmm19
+
+// CHECK: vscalefps {rd-sae}, %zmm18, %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x38,0x2c,0xda]
+          vscalefps {rd-sae}, %zmm18, %zmm6, %zmm19
+
+// CHECK: vscalefps {rz-sae}, %zmm18, %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x78,0x2c,0xda]
+          vscalefps {rz-sae}, %zmm18, %zmm6, %zmm19
+
+// CHECK: vscalefps (%rcx), %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x48,0x2c,0x19]
+          vscalefps (%rcx), %zmm6, %zmm19
+
+// CHECK: vscalefps 291(%rax,%r14,8), %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x48,0x2c,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vscalefps 291(%rax,%r14,8), %zmm6, %zmm19
+
+// CHECK: vscalefps (%rcx){1to16}, %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x58,0x2c,0x19]
+          vscalefps (%rcx){1to16}, %zmm6, %zmm19
+
+// CHECK: vscalefps 8128(%rdx), %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x48,0x2c,0x5a,0x7f]
+          vscalefps 8128(%rdx), %zmm6, %zmm19
+
+// CHECK: vscalefps 8192(%rdx), %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x48,0x2c,0x9a,0x00,0x20,0x00,0x00]
+          vscalefps 8192(%rdx), %zmm6, %zmm19
+
+// CHECK: vscalefps -8192(%rdx), %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x48,0x2c,0x5a,0x80]
+          vscalefps -8192(%rdx), %zmm6, %zmm19
+
+// CHECK: vscalefps -8256(%rdx), %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x48,0x2c,0x9a,0xc0,0xdf,0xff,0xff]
+          vscalefps -8256(%rdx), %zmm6, %zmm19
+
+// CHECK: vscalefps 508(%rdx){1to16}, %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x58,0x2c,0x5a,0x7f]
+          vscalefps 508(%rdx){1to16}, %zmm6, %zmm19
+
+// CHECK: vscalefps 512(%rdx){1to16}, %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x58,0x2c,0x9a,0x00,0x02,0x00,0x00]
+          vscalefps 512(%rdx){1to16}, %zmm6, %zmm19
+
+// CHECK: vscalefps -512(%rdx){1to16}, %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x58,0x2c,0x5a,0x80]
+          vscalefps -512(%rdx){1to16}, %zmm6, %zmm19
+
+// CHECK: vscalefps -516(%rdx){1to16}, %zmm6, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x58,0x2c,0x9a,0xfc,0xfd,0xff,0xff]
+          vscalefps -516(%rdx){1to16}, %zmm6, %zmm19
+
+// CHECK: vfmadd132ss %xmm22, %xmm17, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x75,0x00,0x99,0xf6]
+          vfmadd132ss %xmm22, %xmm17, %xmm30
+
+// CHECK: vfmadd132ss %xmm22, %xmm17, %xmm30 {%k3}
+// CHECK:  encoding: [0x62,0x22,0x75,0x03,0x99,0xf6]
+          vfmadd132ss %xmm22, %xmm17, %xmm30 {%k3}
+
+// CHECK: vfmadd132ss %xmm22, %xmm17, %xmm30 {%k3} {z}
+// CHECK:  encoding: [0x62,0x22,0x75,0x83,0x99,0xf6]
+          vfmadd132ss %xmm22, %xmm17, %xmm30 {%k3} {z}
+
+// CHECK: vfmadd132ss {rn-sae}, %xmm22, %xmm17, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x75,0x10,0x99,0xf6]
+          vfmadd132ss {rn-sae}, %xmm22, %xmm17, %xmm30
+
+// CHECK: vfmadd132ss {ru-sae}, %xmm22, %xmm17, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x75,0x50,0x99,0xf6]
+          vfmadd132ss {ru-sae}, %xmm22, %xmm17, %xmm30
+
+// CHECK: vfmadd132ss {rd-sae}, %xmm22, %xmm17, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x75,0x30,0x99,0xf6]
+          vfmadd132ss {rd-sae}, %xmm22, %xmm17, %xmm30
+
+// CHECK: vfmadd132ss {rz-sae}, %xmm22, %xmm17, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x75,0x70,0x99,0xf6]
+          vfmadd132ss {rz-sae}, %xmm22, %xmm17, %xmm30
+
+// CHECK: vfmadd132ss (%rcx), %xmm17, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x75,0x00,0x99,0x31]
+          vfmadd132ss (%rcx), %xmm17, %xmm30
+
+// CHECK: vfmadd132ss 291(%rax,%r14,8), %xmm17, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x75,0x00,0x99,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd132ss 291(%rax,%r14,8), %xmm17, %xmm30
+
+// CHECK: vfmadd132ss 508(%rdx), %xmm17, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x75,0x00,0x99,0x72,0x7f]
+          vfmadd132ss 508(%rdx), %xmm17, %xmm30
+
+// CHECK: vfmadd132ss 512(%rdx), %xmm17, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x75,0x00,0x99,0xb2,0x00,0x02,0x00,0x00]
+          vfmadd132ss 512(%rdx), %xmm17, %xmm30
+
+// CHECK: vfmadd132ss -512(%rdx), %xmm17, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x75,0x00,0x99,0x72,0x80]
+          vfmadd132ss -512(%rdx), %xmm17, %xmm30
+
+// CHECK: vfmadd132ss -516(%rdx), %xmm17, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x75,0x00,0x99,0xb2,0xfc,0xfd,0xff,0xff]
+          vfmadd132ss -516(%rdx), %xmm17, %xmm30
+
+// CHECK: vfmadd132sd %xmm3, %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x99,0xe3]
+          vfmadd132sd %xmm3, %xmm17, %xmm28
+
+// CHECK: vfmadd132sd %xmm3, %xmm17, %xmm28 {%k2}
+// CHECK:  encoding: [0x62,0x62,0xf5,0x02,0x99,0xe3]
+          vfmadd132sd %xmm3, %xmm17, %xmm28 {%k2}
+
+// CHECK: vfmadd132sd %xmm3, %xmm17, %xmm28 {%k2} {z}
+// CHECK:  encoding: [0x62,0x62,0xf5,0x82,0x99,0xe3]
+          vfmadd132sd %xmm3, %xmm17, %xmm28 {%k2} {z}
+
+// CHECK: vfmadd132sd {rn-sae}, %xmm3, %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x99,0xe3]
+          vfmadd132sd {rn-sae}, %xmm3, %xmm17, %xmm28
+
+// CHECK: vfmadd132sd {ru-sae}, %xmm3, %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xf5,0x50,0x99,0xe3]
+          vfmadd132sd {ru-sae}, %xmm3, %xmm17, %xmm28
+
+// CHECK: vfmadd132sd {rd-sae}, %xmm3, %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xf5,0x30,0x99,0xe3]
+          vfmadd132sd {rd-sae}, %xmm3, %xmm17, %xmm28
+
+// CHECK: vfmadd132sd {rz-sae}, %xmm3, %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xf5,0x70,0x99,0xe3]
+          vfmadd132sd {rz-sae}, %xmm3, %xmm17, %xmm28
+
+// CHECK: vfmadd132sd (%rcx), %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x99,0x21]
+          vfmadd132sd (%rcx), %xmm17, %xmm28
+
+// CHECK: vfmadd132sd 291(%rax,%r14,8), %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x22,0xf5,0x00,0x99,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd132sd 291(%rax,%r14,8), %xmm17, %xmm28
+
+// CHECK: vfmadd132sd 1016(%rdx), %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x99,0x62,0x7f]
+          vfmadd132sd 1016(%rdx), %xmm17, %xmm28
+
+// CHECK: vfmadd132sd 1024(%rdx), %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x99,0xa2,0x00,0x04,0x00,0x00]
+          vfmadd132sd 1024(%rdx), %xmm17, %xmm28
+
+// CHECK: vfmadd132sd -1024(%rdx), %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x99,0x62,0x80]
+          vfmadd132sd -1024(%rdx), %xmm17, %xmm28
+
+// CHECK: vfmadd132sd -1032(%rdx), %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x99,0xa2,0xf8,0xfb,0xff,0xff]
+          vfmadd132sd -1032(%rdx), %xmm17, %xmm28
+
+// CHECK: vfmadd213ss %xmm16, %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x4d,0x00,0xa9,0xf0]
+          vfmadd213ss %xmm16, %xmm22, %xmm30
+
+// CHECK: vfmadd213ss %xmm16, %xmm22, %xmm30 {%k1}
+// CHECK:  encoding: [0x62,0x22,0x4d,0x01,0xa9,0xf0]
+          vfmadd213ss %xmm16, %xmm22, %xmm30 {%k1}
+
+// CHECK: vfmadd213ss %xmm16, %xmm22, %xmm30 {%k1} {z}
+// CHECK:  encoding: [0x62,0x22,0x4d,0x81,0xa9,0xf0]
+          vfmadd213ss %xmm16, %xmm22, %xmm30 {%k1} {z}
+
+// CHECK: vfmadd213ss {rn-sae}, %xmm16, %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x4d,0x10,0xa9,0xf0]
+          vfmadd213ss {rn-sae}, %xmm16, %xmm22, %xmm30
+
+// CHECK: vfmadd213ss {ru-sae}, %xmm16, %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x4d,0x50,0xa9,0xf0]
+          vfmadd213ss {ru-sae}, %xmm16, %xmm22, %xmm30
+
+// CHECK: vfmadd213ss {rd-sae}, %xmm16, %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x4d,0x30,0xa9,0xf0]
+          vfmadd213ss {rd-sae}, %xmm16, %xmm22, %xmm30
+
+// CHECK: vfmadd213ss {rz-sae}, %xmm16, %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x4d,0x70,0xa9,0xf0]
+          vfmadd213ss {rz-sae}, %xmm16, %xmm22, %xmm30
+
+// CHECK: vfmadd213ss (%rcx), %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x00,0xa9,0x31]
+          vfmadd213ss (%rcx), %xmm22, %xmm30
+
+// CHECK: vfmadd213ss 291(%rax,%r14,8), %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x4d,0x00,0xa9,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd213ss 291(%rax,%r14,8), %xmm22, %xmm30
+
+// CHECK: vfmadd213ss 508(%rdx), %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x00,0xa9,0x72,0x7f]
+          vfmadd213ss 508(%rdx), %xmm22, %xmm30
+
+// CHECK: vfmadd213ss 512(%rdx), %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x00,0xa9,0xb2,0x00,0x02,0x00,0x00]
+          vfmadd213ss 512(%rdx), %xmm22, %xmm30
+
+// CHECK: vfmadd213ss -512(%rdx), %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x00,0xa9,0x72,0x80]
+          vfmadd213ss -512(%rdx), %xmm22, %xmm30
+
+// CHECK: vfmadd213ss -516(%rdx), %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x00,0xa9,0xb2,0xfc,0xfd,0xff,0xff]
+          vfmadd213ss -516(%rdx), %xmm22, %xmm30
+
+// CHECK: vfmadd213sd %xmm13, %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x42,0xad,0x00,0xa9,0xc5]
+          vfmadd213sd %xmm13, %xmm26, %xmm24
+
+// CHECK: vfmadd213sd %xmm13, %xmm26, %xmm24 {%k3}
+// CHECK:  encoding: [0x62,0x42,0xad,0x03,0xa9,0xc5]
+          vfmadd213sd %xmm13, %xmm26, %xmm24 {%k3}
+
+// CHECK: vfmadd213sd %xmm13, %xmm26, %xmm24 {%k3} {z}
+// CHECK:  encoding: [0x62,0x42,0xad,0x83,0xa9,0xc5]
+          vfmadd213sd %xmm13, %xmm26, %xmm24 {%k3} {z}
+
+// CHECK: vfmadd213sd {rn-sae}, %xmm13, %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x42,0xad,0x10,0xa9,0xc5]
+          vfmadd213sd {rn-sae}, %xmm13, %xmm26, %xmm24
+
+// CHECK: vfmadd213sd {ru-sae}, %xmm13, %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x42,0xad,0x50,0xa9,0xc5]
+          vfmadd213sd {ru-sae}, %xmm13, %xmm26, %xmm24
+
+// CHECK: vfmadd213sd {rd-sae}, %xmm13, %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x42,0xad,0x30,0xa9,0xc5]
+          vfmadd213sd {rd-sae}, %xmm13, %xmm26, %xmm24
+
+// CHECK: vfmadd213sd {rz-sae}, %xmm13, %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x42,0xad,0x70,0xa9,0xc5]
+          vfmadd213sd {rz-sae}, %xmm13, %xmm26, %xmm24
+
+// CHECK: vfmadd213sd (%rcx), %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xad,0x00,0xa9,0x01]
+          vfmadd213sd (%rcx), %xmm26, %xmm24
+
+// CHECK: vfmadd213sd 291(%rax,%r14,8), %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x22,0xad,0x00,0xa9,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd213sd 291(%rax,%r14,8), %xmm26, %xmm24
+
+// CHECK: vfmadd213sd 1016(%rdx), %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xad,0x00,0xa9,0x42,0x7f]
+          vfmadd213sd 1016(%rdx), %xmm26, %xmm24
+
+// CHECK: vfmadd213sd 1024(%rdx), %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xad,0x00,0xa9,0x82,0x00,0x04,0x00,0x00]
+          vfmadd213sd 1024(%rdx), %xmm26, %xmm24
+
+// CHECK: vfmadd213sd -1024(%rdx), %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xad,0x00,0xa9,0x42,0x80]
+          vfmadd213sd -1024(%rdx), %xmm26, %xmm24
+
+// CHECK: vfmadd213sd -1032(%rdx), %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xad,0x00,0xa9,0x82,0xf8,0xfb,0xff,0xff]
+          vfmadd213sd -1032(%rdx), %xmm26, %xmm24
+
+// CHECK: vfmadd231ss %xmm10, %xmm15, %xmm29
+// CHECK:  encoding: [0x62,0x42,0x05,0x08,0xb9,0xea]
+          vfmadd231ss %xmm10, %xmm15, %xmm29
+
+// CHECK: vfmadd231ss %xmm10, %xmm15, %xmm29 {%k4}
+// CHECK:  encoding: [0x62,0x42,0x05,0x0c,0xb9,0xea]
+          vfmadd231ss %xmm10, %xmm15, %xmm29 {%k4}
+
+// CHECK: vfmadd231ss %xmm10, %xmm15, %xmm29 {%k4} {z}
+// CHECK:  encoding: [0x62,0x42,0x05,0x8c,0xb9,0xea]
+          vfmadd231ss %xmm10, %xmm15, %xmm29 {%k4} {z}
+
+// CHECK: vfmadd231ss {rn-sae}, %xmm10, %xmm15, %xmm29
+// CHECK:  encoding: [0x62,0x42,0x05,0x18,0xb9,0xea]
+          vfmadd231ss {rn-sae}, %xmm10, %xmm15, %xmm29
+
+// CHECK: vfmadd231ss {ru-sae}, %xmm10, %xmm15, %xmm29
+// CHECK:  encoding: [0x62,0x42,0x05,0x58,0xb9,0xea]
+          vfmadd231ss {ru-sae}, %xmm10, %xmm15, %xmm29
+
+// CHECK: vfmadd231ss {rd-sae}, %xmm10, %xmm15, %xmm29
+// CHECK:  encoding: [0x62,0x42,0x05,0x38,0xb9,0xea]
+          vfmadd231ss {rd-sae}, %xmm10, %xmm15, %xmm29
+
+// CHECK: vfmadd231ss {rz-sae}, %xmm10, %xmm15, %xmm29
+// CHECK:  encoding: [0x62,0x42,0x05,0x78,0xb9,0xea]
+          vfmadd231ss {rz-sae}, %xmm10, %xmm15, %xmm29
+
+// CHECK: vfmadd231ss (%rcx), %xmm15, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x05,0x08,0xb9,0x29]
+          vfmadd231ss (%rcx), %xmm15, %xmm29
+
+// CHECK: vfmadd231ss 291(%rax,%r14,8), %xmm15, %xmm29
+// CHECK:  encoding: [0x62,0x22,0x05,0x08,0xb9,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd231ss 291(%rax,%r14,8), %xmm15, %xmm29
+
+// CHECK: vfmadd231ss 508(%rdx), %xmm15, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x05,0x08,0xb9,0x6a,0x7f]
+          vfmadd231ss 508(%rdx), %xmm15, %xmm29
+
+// CHECK: vfmadd231ss 512(%rdx), %xmm15, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x05,0x08,0xb9,0xaa,0x00,0x02,0x00,0x00]
+          vfmadd231ss 512(%rdx), %xmm15, %xmm29
+
+// CHECK: vfmadd231ss -512(%rdx), %xmm15, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x05,0x08,0xb9,0x6a,0x80]
+          vfmadd231ss -512(%rdx), %xmm15, %xmm29
+
+// CHECK: vfmadd231ss -516(%rdx), %xmm15, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x05,0x08,0xb9,0xaa,0xfc,0xfd,0xff,0xff]
+          vfmadd231ss -516(%rdx), %xmm15, %xmm29
+
+// CHECK: vfmadd231sd %xmm14, %xmm1, %xmm4
+// CHECK:  encoding: [0xc4,0xc2,0xf1,0xb9,0xe6]
+          vfmadd231sd %xmm14, %xmm1, %xmm4
+
+// CHECK: vfmadd231sd %xmm14, %xmm1, %xmm4 {%k1}
+// CHECK:  encoding: [0x62,0xd2,0xf5,0x09,0xb9,0xe6]
+          vfmadd231sd %xmm14, %xmm1, %xmm4 {%k1}
+
+// CHECK: vfmadd231sd %xmm14, %xmm1, %xmm4 {%k1} {z}
+// CHECK:  encoding: [0x62,0xd2,0xf5,0x89,0xb9,0xe6]
+          vfmadd231sd %xmm14, %xmm1, %xmm4 {%k1} {z}
+
+// CHECK: vfmadd231sd {rn-sae}, %xmm14, %xmm1, %xmm4
+// CHECK:  encoding: [0x62,0xd2,0xf5,0x18,0xb9,0xe6]
+          vfmadd231sd {rn-sae}, %xmm14, %xmm1, %xmm4
+
+// CHECK: vfmadd231sd {ru-sae}, %xmm14, %xmm1, %xmm4
+// CHECK:  encoding: [0x62,0xd2,0xf5,0x58,0xb9,0xe6]
+          vfmadd231sd {ru-sae}, %xmm14, %xmm1, %xmm4
+
+// CHECK: vfmadd231sd {rd-sae}, %xmm14, %xmm1, %xmm4
+// CHECK:  encoding: [0x62,0xd2,0xf5,0x38,0xb9,0xe6]
+          vfmadd231sd {rd-sae}, %xmm14, %xmm1, %xmm4
+
+// CHECK: vfmadd231sd {rz-sae}, %xmm14, %xmm1, %xmm4
+// CHECK:  encoding: [0x62,0xd2,0xf5,0x78,0xb9,0xe6]
+          vfmadd231sd {rz-sae}, %xmm14, %xmm1, %xmm4
+
+// CHECK: vfmadd231sd   (%rcx), %xmm1, %xmm24 
+// CHECK: encoding: [0x62,0x62,0xf5,0x08,0xb9,0x01]
+          vfmadd231sd   (%rcx), %xmm1, %xmm24 
+
+// CHECK: vfmadd231sd   291(%rax,%r14,8), %xmm1, %xmm24 
+// CHECK: encoding: [0x62,0x22,0xf5,0x08,0xb9,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd231sd   291(%rax,%r14,8), %xmm1, %xmm24 
+
+// CHECK: vfmadd231sd   1016(%rdx), %xmm1, %xmm24 
+// CHECK: encoding: [0x62,0x62,0xf5,0x08,0xb9,0x42,0x7f]
+          vfmadd231sd   1016(%rdx), %xmm1, %xmm24 
+
+// CHECK: vfmadd231sd   1024(%rdx), %xmm1, %xmm24 
+// CHECK: encoding: [0x62,0x62,0xf5,0x08,0xb9,0x82,0x00,0x04,0x00,0x00]
+          vfmadd231sd   1024(%rdx), %xmm1, %xmm24 
+
+// CHECK: vfmadd231sd   -1024(%rdx), %xmm1, %xmm24 
+// CHECK: encoding: [0x62,0x62,0xf5,0x08,0xb9,0x42,0x80]
+          vfmadd231sd   -1024(%rdx), %xmm1, %xmm24 
+
+// CHECK: vfmadd231sd   -1032(%rdx), %xmm1, %xmm24 
+// CHECK: encoding: [0x62,0x62,0xf5,0x08,0xb9,0x82,0xf8,0xfb,0xff,0xff]
+          vfmadd231sd   -1032(%rdx), %xmm1, %xmm24 
+
+// CHECK: vfmsub132ss %xmm27, %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x4d,0x00,0x9b,0xf3]
+          vfmsub132ss %xmm27, %xmm22, %xmm30
+
+// CHECK: vfmsub132ss %xmm27, %xmm22, %xmm30 {%k3}
+// CHECK:  encoding: [0x62,0x02,0x4d,0x03,0x9b,0xf3]
+          vfmsub132ss %xmm27, %xmm22, %xmm30 {%k3}
+
+// CHECK: vfmsub132ss %xmm27, %xmm22, %xmm30 {%k3} {z}
+// CHECK:  encoding: [0x62,0x02,0x4d,0x83,0x9b,0xf3]
+          vfmsub132ss %xmm27, %xmm22, %xmm30 {%k3} {z}
+
+// CHECK: vfmsub132ss {rn-sae}, %xmm27, %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x4d,0x10,0x9b,0xf3]
+          vfmsub132ss {rn-sae}, %xmm27, %xmm22, %xmm30
+
+// CHECK: vfmsub132ss {ru-sae}, %xmm27, %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x4d,0x50,0x9b,0xf3]
+          vfmsub132ss {ru-sae}, %xmm27, %xmm22, %xmm30
+
+// CHECK: vfmsub132ss {rd-sae}, %xmm27, %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x4d,0x30,0x9b,0xf3]
+          vfmsub132ss {rd-sae}, %xmm27, %xmm22, %xmm30
+
+// CHECK: vfmsub132ss {rz-sae}, %xmm27, %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x4d,0x70,0x9b,0xf3]
+          vfmsub132ss {rz-sae}, %xmm27, %xmm22, %xmm30
+
+// CHECK: vfmsub132ss (%rcx), %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x00,0x9b,0x31]
+          vfmsub132ss (%rcx), %xmm22, %xmm30
+
+// CHECK: vfmsub132ss 291(%rax,%r14,8), %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x4d,0x00,0x9b,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub132ss 291(%rax,%r14,8), %xmm22, %xmm30
+
+// CHECK: vfmsub132ss 508(%rdx), %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x00,0x9b,0x72,0x7f]
+          vfmsub132ss 508(%rdx), %xmm22, %xmm30
+
+// CHECK: vfmsub132ss 512(%rdx), %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x00,0x9b,0xb2,0x00,0x02,0x00,0x00]
+          vfmsub132ss 512(%rdx), %xmm22, %xmm30
+
+// CHECK: vfmsub132ss -512(%rdx), %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x00,0x9b,0x72,0x80]
+          vfmsub132ss -512(%rdx), %xmm22, %xmm30
+
+// CHECK: vfmsub132ss -516(%rdx), %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x00,0x9b,0xb2,0xfc,0xfd,0xff,0xff]
+          vfmsub132ss -516(%rdx), %xmm22, %xmm30
+
+// CHECK: vfmsub132sd %xmm27, %xmm8, %xmm12
+// CHECK:  encoding: [0x62,0x12,0xbd,0x08,0x9b,0xe3]
+          vfmsub132sd %xmm27, %xmm8, %xmm12
+
+// CHECK: vfmsub132sd %xmm27, %xmm8, %xmm12 {%k3}
+// CHECK:  encoding: [0x62,0x12,0xbd,0x0b,0x9b,0xe3]
+          vfmsub132sd %xmm27, %xmm8, %xmm12 {%k3}
+
+// CHECK: vfmsub132sd %xmm27, %xmm8, %xmm12 {%k3} {z}
+// CHECK:  encoding: [0x62,0x12,0xbd,0x8b,0x9b,0xe3]
+          vfmsub132sd %xmm27, %xmm8, %xmm12 {%k3} {z}
+
+// CHECK: vfmsub132sd {rn-sae}, %xmm27, %xmm8, %xmm12
+// CHECK:  encoding: [0x62,0x12,0xbd,0x18,0x9b,0xe3]
+          vfmsub132sd {rn-sae}, %xmm27, %xmm8, %xmm12
+
+// CHECK: vfmsub132sd {ru-sae}, %xmm27, %xmm8, %xmm12
+// CHECK:  encoding: [0x62,0x12,0xbd,0x58,0x9b,0xe3]
+          vfmsub132sd {ru-sae}, %xmm27, %xmm8, %xmm12
+
+// CHECK: vfmsub132sd {rd-sae}, %xmm27, %xmm8, %xmm12
+// CHECK:  encoding: [0x62,0x12,0xbd,0x38,0x9b,0xe3]
+          vfmsub132sd {rd-sae}, %xmm27, %xmm8, %xmm12
+
+// CHECK: vfmsub132sd {rz-sae}, %xmm27, %xmm8, %xmm12
+// CHECK:  encoding: [0x62,0x12,0xbd,0x78,0x9b,0xe3]
+          vfmsub132sd {rz-sae}, %xmm27, %xmm8, %xmm12
+
+// CHECK: vfmsub132sd   (%rcx), %xmm8, %xmm22 
+// CHECK: encoding: [0x62,0xe2,0xbd,0x08,0x9b,0x31]
+          vfmsub132sd   (%rcx), %xmm8, %xmm22 
+
+// CHECK: vfmsub132sd   291(%rax,%r14,8), %xmm8, %xmm22 
+// CHECK: encoding: [0x62,0xa2,0xbd,0x08,0x9b,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub132sd   291(%rax,%r14,8), %xmm8, %xmm22 
+
+// CHECK: vfmsub132sd   1016(%rdx), %xmm8, %xmm22 
+// CHECK: encoding: [0x62,0xe2,0xbd,0x08,0x9b,0x72,0x7f]
+          vfmsub132sd   1016(%rdx), %xmm8, %xmm22 
+
+// CHECK: vfmsub132sd   1024(%rdx), %xmm8, %xmm22 
+// CHECK: encoding: [0x62,0xe2,0xbd,0x08,0x9b,0xb2,0x00,0x04,0x00,0x00]
+          vfmsub132sd   1024(%rdx), %xmm8, %xmm22 
+
+// CHECK: vfmsub132sd   -1024(%rdx), %xmm8, %xmm22 
+// CHECK: encoding: [0x62,0xe2,0xbd,0x08,0x9b,0x72,0x80]
+          vfmsub132sd   -1024(%rdx), %xmm8, %xmm22 
+
+// CHECK: vfmsub132sd   -1032(%rdx), %xmm8, %xmm22 
+// CHECK: encoding: [0x62,0xe2,0xbd,0x08,0x9b,0xb2,0xf8,0xfb,0xff,0xff]
+          vfmsub132sd   -1032(%rdx), %xmm8, %xmm22 
+
+// CHECK: vfmsub213ss %xmm26, %xmm13, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x15,0x08,0xab,0xf2]
+          vfmsub213ss %xmm26, %xmm13, %xmm30
+
+// CHECK: vfmsub213ss %xmm26, %xmm13, %xmm30 {%k1}
+// CHECK:  encoding: [0x62,0x02,0x15,0x09,0xab,0xf2]
+          vfmsub213ss %xmm26, %xmm13, %xmm30 {%k1}
+
+// CHECK: vfmsub213ss %xmm26, %xmm13, %xmm30 {%k1} {z}
+// CHECK:  encoding: [0x62,0x02,0x15,0x89,0xab,0xf2]
+          vfmsub213ss %xmm26, %xmm13, %xmm30 {%k1} {z}
+
+// CHECK: vfmsub213ss {rn-sae}, %xmm26, %xmm13, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x15,0x18,0xab,0xf2]
+          vfmsub213ss {rn-sae}, %xmm26, %xmm13, %xmm30
+
+// CHECK: vfmsub213ss {ru-sae}, %xmm26, %xmm13, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x15,0x58,0xab,0xf2]
+          vfmsub213ss {ru-sae}, %xmm26, %xmm13, %xmm30
+
+// CHECK: vfmsub213ss {rd-sae}, %xmm26, %xmm13, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x15,0x38,0xab,0xf2]
+          vfmsub213ss {rd-sae}, %xmm26, %xmm13, %xmm30
+
+// CHECK: vfmsub213ss {rz-sae}, %xmm26, %xmm13, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x15,0x78,0xab,0xf2]
+          vfmsub213ss {rz-sae}, %xmm26, %xmm13, %xmm30
+
+// CHECK: vfmsub213ss (%rcx), %xmm13, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x15,0x08,0xab,0x31]
+          vfmsub213ss (%rcx), %xmm13, %xmm30
+
+// CHECK: vfmsub213ss 291(%rax,%r14,8), %xmm13, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x15,0x08,0xab,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub213ss 291(%rax,%r14,8), %xmm13, %xmm30
+
+// CHECK: vfmsub213ss 508(%rdx), %xmm13, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x15,0x08,0xab,0x72,0x7f]
+          vfmsub213ss 508(%rdx), %xmm13, %xmm30
+
+// CHECK: vfmsub213ss 512(%rdx), %xmm13, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x15,0x08,0xab,0xb2,0x00,0x02,0x00,0x00]
+          vfmsub213ss 512(%rdx), %xmm13, %xmm30
+
+// CHECK: vfmsub213ss -512(%rdx), %xmm13, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x15,0x08,0xab,0x72,0x80]
+          vfmsub213ss -512(%rdx), %xmm13, %xmm30
+
+// CHECK: vfmsub213ss -516(%rdx), %xmm13, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x15,0x08,0xab,0xb2,0xfc,0xfd,0xff,0xff]
+          vfmsub213ss -516(%rdx), %xmm13, %xmm30
+
+// CHECK: vfmsub213sd   %xmm12, %xmm8, %xmm26 
+// CHECK: encoding: [0x62,0x42,0xbd,0x08,0xab,0xd4]
+          vfmsub213sd   %xmm12, %xmm8, %xmm26 
+
+// CHECK: vfmsub213sd %xmm12, %xmm8, %xmm6 {%k1}
+// CHECK:  encoding: [0x62,0xd2,0xbd,0x09,0xab,0xf4]
+          vfmsub213sd %xmm12, %xmm8, %xmm6 {%k1}
+
+// CHECK: vfmsub213sd %xmm12, %xmm8, %xmm6 {%k1} {z}
+// CHECK:  encoding: [0x62,0xd2,0xbd,0x89,0xab,0xf4]
+          vfmsub213sd %xmm12, %xmm8, %xmm6 {%k1} {z}
+
+// CHECK: vfmsub213sd {rn-sae}, %xmm12, %xmm8, %xmm6
+// CHECK:  encoding: [0x62,0xd2,0xbd,0x18,0xab,0xf4]
+          vfmsub213sd {rn-sae}, %xmm12, %xmm8, %xmm6
+
+// CHECK: vfmsub213sd {ru-sae}, %xmm12, %xmm8, %xmm6
+// CHECK:  encoding: [0x62,0xd2,0xbd,0x58,0xab,0xf4]
+          vfmsub213sd {ru-sae}, %xmm12, %xmm8, %xmm6
+
+// CHECK: vfmsub213sd {rd-sae}, %xmm12, %xmm8, %xmm6
+// CHECK:  encoding: [0x62,0xd2,0xbd,0x38,0xab,0xf4]
+          vfmsub213sd {rd-sae}, %xmm12, %xmm8, %xmm6
+
+// CHECK: vfmsub213sd {rz-sae}, %xmm12, %xmm8, %xmm6
+// CHECK:  encoding: [0x62,0xd2,0xbd,0x78,0xab,0xf4]
+          vfmsub213sd {rz-sae}, %xmm12, %xmm8, %xmm6
+
+// CHECK: vfmsub213sd   (%rcx), %xmm8, %xmm26 
+// CHECK: encoding: [0x62,0x62,0xbd,0x08,0xab,0x11]
+          vfmsub213sd   (%rcx), %xmm8, %xmm26 
+
+// CHECK: vfmsub213sd   291(%rax,%r14,8), %xmm8, %xmm26 
+// CHECK: encoding: [0x62,0x22,0xbd,0x08,0xab,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub213sd   291(%rax,%r14,8), %xmm8, %xmm26 
+
+// CHECK: vfmsub213sd   1016(%rdx), %xmm8, %xmm26 
+// CHECK: encoding: [0x62,0x62,0xbd,0x08,0xab,0x52,0x7f]
+          vfmsub213sd   1016(%rdx), %xmm8, %xmm26 
+
+// CHECK: vfmsub213sd   1024(%rdx), %xmm8, %xmm26 
+// CHECK: encoding: [0x62,0x62,0xbd,0x08,0xab,0x92,0x00,0x04,0x00,0x00]
+          vfmsub213sd   1024(%rdx), %xmm8, %xmm26 
+
+// CHECK: vfmsub213sd   -1024(%rdx), %xmm8, %xmm26 
+// CHECK: encoding: [0x62,0x62,0xbd,0x08,0xab,0x52,0x80]
+          vfmsub213sd   -1024(%rdx), %xmm8, %xmm26 
+
+// CHECK: vfmsub213sd   -1032(%rdx), %xmm8, %xmm26 
+// CHECK: encoding: [0x62,0x62,0xbd,0x08,0xab,0x92,0xf8,0xfb,0xff,0xff]
+          vfmsub213sd   -1032(%rdx), %xmm8, %xmm26 
+
+// CHECK: vfmsub231ss %xmm5, %xmm3, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x65,0x08,0xbb,0xed]
+          vfmsub231ss %xmm5, %xmm3, %xmm29
+
+// CHECK: vfmsub231ss %xmm5, %xmm3, %xmm29 {%k6}
+// CHECK:  encoding: [0x62,0x62,0x65,0x0e,0xbb,0xed]
+          vfmsub231ss %xmm5, %xmm3, %xmm29 {%k6}
+
+// CHECK: vfmsub231ss %xmm5, %xmm3, %xmm29 {%k6} {z}
+// CHECK:  encoding: [0x62,0x62,0x65,0x8e,0xbb,0xed]
+          vfmsub231ss %xmm5, %xmm3, %xmm29 {%k6} {z}
+
+// CHECK: vfmsub231ss {rn-sae}, %xmm5, %xmm3, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x65,0x18,0xbb,0xed]
+          vfmsub231ss {rn-sae}, %xmm5, %xmm3, %xmm29
+
+// CHECK: vfmsub231ss {ru-sae}, %xmm5, %xmm3, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x65,0x58,0xbb,0xed]
+          vfmsub231ss {ru-sae}, %xmm5, %xmm3, %xmm29
+
+// CHECK: vfmsub231ss {rd-sae}, %xmm5, %xmm3, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x65,0x38,0xbb,0xed]
+          vfmsub231ss {rd-sae}, %xmm5, %xmm3, %xmm29
+
+// CHECK: vfmsub231ss {rz-sae}, %xmm5, %xmm3, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x65,0x78,0xbb,0xed]
+          vfmsub231ss {rz-sae}, %xmm5, %xmm3, %xmm29
+
+// CHECK: vfmsub231ss (%rcx), %xmm3, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x65,0x08,0xbb,0x29]
+          vfmsub231ss (%rcx), %xmm3, %xmm29
+
+// CHECK: vfmsub231ss 291(%rax,%r14,8), %xmm3, %xmm29
+// CHECK:  encoding: [0x62,0x22,0x65,0x08,0xbb,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub231ss 291(%rax,%r14,8), %xmm3, %xmm29
+
+// CHECK: vfmsub231ss 508(%rdx), %xmm3, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x65,0x08,0xbb,0x6a,0x7f]
+          vfmsub231ss 508(%rdx), %xmm3, %xmm29
+
+// CHECK: vfmsub231ss 512(%rdx), %xmm3, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x65,0x08,0xbb,0xaa,0x00,0x02,0x00,0x00]
+          vfmsub231ss 512(%rdx), %xmm3, %xmm29
+
+// CHECK: vfmsub231ss -512(%rdx), %xmm3, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x65,0x08,0xbb,0x6a,0x80]
+          vfmsub231ss -512(%rdx), %xmm3, %xmm29
+
+// CHECK: vfmsub231ss -516(%rdx), %xmm3, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x65,0x08,0xbb,0xaa,0xfc,0xfd,0xff,0xff]
+          vfmsub231ss -516(%rdx), %xmm3, %xmm29
+
+// CHECK: vfmsub231sd %xmm6, %xmm11, %xmm3
+// CHECK:  encoding: [0xc4,0xe2,0xa1,0xbb,0xde]
+          vfmsub231sd %xmm6, %xmm11, %xmm3
+
+// CHECK: vfmsub231sd %xmm6, %xmm11, %xmm3 {%k7}
+// CHECK:  encoding: [0x62,0xf2,0xa5,0x0f,0xbb,0xde]
+          vfmsub231sd %xmm6, %xmm11, %xmm3 {%k7}
+
+// CHECK: vfmsub231sd %xmm6, %xmm11, %xmm3 {%k7} {z}
+// CHECK:  encoding: [0x62,0xf2,0xa5,0x8f,0xbb,0xde]
+          vfmsub231sd %xmm6, %xmm11, %xmm3 {%k7} {z}
+
+// CHECK: vfmsub231sd {rn-sae}, %xmm6, %xmm11, %xmm3
+// CHECK:  encoding: [0x62,0xf2,0xa5,0x18,0xbb,0xde]
+          vfmsub231sd {rn-sae}, %xmm6, %xmm11, %xmm3
+
+// CHECK: vfmsub231sd {ru-sae}, %xmm6, %xmm11, %xmm3
+// CHECK:  encoding: [0x62,0xf2,0xa5,0x58,0xbb,0xde]
+          vfmsub231sd {ru-sae}, %xmm6, %xmm11, %xmm3
+
+// CHECK: vfmsub231sd {rd-sae}, %xmm6, %xmm11, %xmm3
+// CHECK:  encoding: [0x62,0xf2,0xa5,0x38,0xbb,0xde]
+          vfmsub231sd {rd-sae}, %xmm6, %xmm11, %xmm3
+
+// CHECK: vfmsub231sd {rz-sae}, %xmm6, %xmm11, %xmm3
+// CHECK:  encoding: [0x62,0xf2,0xa5,0x78,0xbb,0xde]
+          vfmsub231sd {rz-sae}, %xmm6, %xmm11, %xmm3
+
+// CHECK: vfmsub231sd   (%rcx), %xmm11, %xmm23 
+// CHECK: encoding: [0x62,0xe2,0xa5,0x08,0xbb,0x39]
+          vfmsub231sd   (%rcx), %xmm11, %xmm23 
+
+// CHECK: vfmsub231sd   291(%rax,%r14,8), %xmm11, %xmm23 
+// CHECK: encoding: [0x62,0xa2,0xa5,0x08,0xbb,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub231sd   291(%rax,%r14,8), %xmm11, %xmm23 
+
+// CHECK: vfmsub231sd   1016(%rdx), %xmm11, %xmm23 
+// CHECK: encoding: [0x62,0xe2,0xa5,0x08,0xbb,0x7a,0x7f]
+          vfmsub231sd   1016(%rdx), %xmm11, %xmm23 
+
+// CHECK: vfmsub231sd   1024(%rdx), %xmm11, %xmm23 
+// CHECK: encoding: [0x62,0xe2,0xa5,0x08,0xbb,0xba,0x00,0x04,0x00,0x00]
+          vfmsub231sd   1024(%rdx), %xmm11, %xmm23 
+
+// CHECK: vfmsub231sd   -1024(%rdx), %xmm11, %xmm23 
+// CHECK: encoding: [0x62,0xe2,0xa5,0x08,0xbb,0x7a,0x80]
+          vfmsub231sd   -1024(%rdx), %xmm11, %xmm23 
+
+// CHECK: vfmsub231sd   -1032(%rdx), %xmm11, %xmm23 
+// CHECK: encoding: [0x62,0xe2,0xa5,0x08,0xbb,0xba,0xf8,0xfb,0xff,0xff]
+          vfmsub231sd   -1032(%rdx), %xmm11, %xmm23 
+
+// CHECK: vfnmadd132ss %xmm23, %xmm1, %xmm25
+// CHECK:  encoding: [0x62,0x22,0x75,0x08,0x9d,0xcf]
+          vfnmadd132ss %xmm23, %xmm1, %xmm25
+
+// CHECK: vfnmadd132ss %xmm23, %xmm1, %xmm25 {%k3}
+// CHECK:  encoding: [0x62,0x22,0x75,0x0b,0x9d,0xcf]
+          vfnmadd132ss %xmm23, %xmm1, %xmm25 {%k3}
+
+// CHECK: vfnmadd132ss %xmm23, %xmm1, %xmm25 {%k3} {z}
+// CHECK:  encoding: [0x62,0x22,0x75,0x8b,0x9d,0xcf]
+          vfnmadd132ss %xmm23, %xmm1, %xmm25 {%k3} {z}
+
+// CHECK: vfnmadd132ss {rn-sae}, %xmm23, %xmm1, %xmm25
+// CHECK:  encoding: [0x62,0x22,0x75,0x18,0x9d,0xcf]
+          vfnmadd132ss {rn-sae}, %xmm23, %xmm1, %xmm25
+
+// CHECK: vfnmadd132ss {ru-sae}, %xmm23, %xmm1, %xmm25
+// CHECK:  encoding: [0x62,0x22,0x75,0x58,0x9d,0xcf]
+          vfnmadd132ss {ru-sae}, %xmm23, %xmm1, %xmm25
+
+// CHECK: vfnmadd132ss {rd-sae}, %xmm23, %xmm1, %xmm25
+// CHECK:  encoding: [0x62,0x22,0x75,0x38,0x9d,0xcf]
+          vfnmadd132ss {rd-sae}, %xmm23, %xmm1, %xmm25
+
+// CHECK: vfnmadd132ss {rz-sae}, %xmm23, %xmm1, %xmm25
+// CHECK:  encoding: [0x62,0x22,0x75,0x78,0x9d,0xcf]
+          vfnmadd132ss {rz-sae}, %xmm23, %xmm1, %xmm25
+
+// CHECK: vfnmadd132ss (%rcx), %xmm1, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x75,0x08,0x9d,0x09]
+          vfnmadd132ss (%rcx), %xmm1, %xmm25
+
+// CHECK: vfnmadd132ss 291(%rax,%r14,8), %xmm1, %xmm25
+// CHECK:  encoding: [0x62,0x22,0x75,0x08,0x9d,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd132ss 291(%rax,%r14,8), %xmm1, %xmm25
+
+// CHECK: vfnmadd132ss 508(%rdx), %xmm1, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x75,0x08,0x9d,0x4a,0x7f]
+          vfnmadd132ss 508(%rdx), %xmm1, %xmm25
+
+// CHECK: vfnmadd132ss 512(%rdx), %xmm1, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x75,0x08,0x9d,0x8a,0x00,0x02,0x00,0x00]
+          vfnmadd132ss 512(%rdx), %xmm1, %xmm25
 
-// CHECK: vpermi2d %zmm4, %zmm28, %zmm10 {%k5} {z}
-// CHECK:  encoding: [0x62,0x72,0x1d,0xc5,0x76,0xd4]
-          vpermi2d %zmm4, %zmm28, %zmm10 {%k5} {z}
+// CHECK: vfnmadd132ss -512(%rdx), %xmm1, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x75,0x08,0x9d,0x4a,0x80]
+          vfnmadd132ss -512(%rdx), %xmm1, %xmm25
 
-// CHECK: vpermi2d (%rcx), %zmm28, %zmm10
-// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x11]
-          vpermi2d (%rcx), %zmm28, %zmm10
+// CHECK: vfnmadd132ss -516(%rdx), %xmm1, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x75,0x08,0x9d,0x8a,0xfc,0xfd,0xff,0xff]
+          vfnmadd132ss -516(%rdx), %xmm1, %xmm25
 
-// CHECK: vpermi2d 291(%rax,%r14,8), %zmm28, %zmm10
-// CHECK:  encoding: [0x62,0x32,0x1d,0x40,0x76,0x94,0xf0,0x23,0x01,0x00,0x00]
-          vpermi2d 291(%rax,%r14,8), %zmm28, %zmm10
+// CHECK: vfnmadd132sd %xmm11, %xmm3, %xmm19
+// CHECK:  encoding: [0x62,0xc2,0xe5,0x08,0x9d,0xdb]
+          vfnmadd132sd %xmm11, %xmm3, %xmm19
 
-// CHECK: vpermi2d (%rcx){1to16}, %zmm28, %zmm10
-// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x11]
-          vpermi2d (%rcx){1to16}, %zmm28, %zmm10
+// CHECK: vfnmadd132sd %xmm11, %xmm3, %xmm19 {%k2}
+// CHECK:  encoding: [0x62,0xc2,0xe5,0x0a,0x9d,0xdb]
+          vfnmadd132sd %xmm11, %xmm3, %xmm19 {%k2}
 
-// CHECK: vpermi2d 8128(%rdx), %zmm28, %zmm10
-// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x52,0x7f]
-          vpermi2d 8128(%rdx), %zmm28, %zmm10
+// CHECK: vfnmadd132sd %xmm11, %xmm3, %xmm19 {%k2} {z}
+// CHECK:  encoding: [0x62,0xc2,0xe5,0x8a,0x9d,0xdb]
+          vfnmadd132sd %xmm11, %xmm3, %xmm19 {%k2} {z}
 
-// CHECK: vpermi2d 8192(%rdx), %zmm28, %zmm10
-// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x92,0x00,0x20,0x00,0x00]
-          vpermi2d 8192(%rdx), %zmm28, %zmm10
+// CHECK: vfnmadd132sd {rn-sae}, %xmm11, %xmm3, %xmm19
+// CHECK:  encoding: [0x62,0xc2,0xe5,0x18,0x9d,0xdb]
+          vfnmadd132sd {rn-sae}, %xmm11, %xmm3, %xmm19
 
-// CHECK: vpermi2d -8192(%rdx), %zmm28, %zmm10
-// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x52,0x80]
-          vpermi2d -8192(%rdx), %zmm28, %zmm10
+// CHECK: vfnmadd132sd {ru-sae}, %xmm11, %xmm3, %xmm19
+// CHECK:  encoding: [0x62,0xc2,0xe5,0x58,0x9d,0xdb]
+          vfnmadd132sd {ru-sae}, %xmm11, %xmm3, %xmm19
 
-// CHECK: vpermi2d -8256(%rdx), %zmm28, %zmm10
-// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x92,0xc0,0xdf,0xff,0xff]
-          vpermi2d -8256(%rdx), %zmm28, %zmm10
+// CHECK: vfnmadd132sd {rd-sae}, %xmm11, %xmm3, %xmm19
+// CHECK:  encoding: [0x62,0xc2,0xe5,0x38,0x9d,0xdb]
+          vfnmadd132sd {rd-sae}, %xmm11, %xmm3, %xmm19
 
-// CHECK: vpermi2d 508(%rdx){1to16}, %zmm28, %zmm10
-// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x52,0x7f]
-          vpermi2d 508(%rdx){1to16}, %zmm28, %zmm10
+// CHECK: vfnmadd132sd {rz-sae}, %xmm11, %xmm3, %xmm19
+// CHECK:  encoding: [0x62,0xc2,0xe5,0x78,0x9d,0xdb]
+          vfnmadd132sd {rz-sae}, %xmm11, %xmm3, %xmm19
 
-// CHECK: vpermi2d 512(%rdx){1to16}, %zmm28, %zmm10
-// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x92,0x00,0x02,0x00,0x00]
-          vpermi2d 512(%rdx){1to16}, %zmm28, %zmm10
+// CHECK: vfnmadd132sd (%rcx), %xmm3, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x08,0x9d,0x19]
+          vfnmadd132sd (%rcx), %xmm3, %xmm19
 
-// CHECK: vpermi2d -512(%rdx){1to16}, %zmm28, %zmm10
-// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x52,0x80]
-          vpermi2d -512(%rdx){1to16}, %zmm28, %zmm10
+// CHECK: vfnmadd132sd 291(%rax,%r14,8), %xmm3, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0xe5,0x08,0x9d,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd132sd 291(%rax,%r14,8), %xmm3, %xmm19
 
-// CHECK: vpermi2d -516(%rdx){1to16}, %zmm28, %zmm10
-// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x92,0xfc,0xfd,0xff,0xff]
-          vpermi2d -516(%rdx){1to16}, %zmm28, %zmm10
+// CHECK: vfnmadd132sd 1016(%rdx), %xmm3, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x08,0x9d,0x5a,0x7f]
+          vfnmadd132sd 1016(%rdx), %xmm3, %xmm19
 
-// CHECK: vpermi2q %zmm28, %zmm28, %zmm18
-// CHECK:  encoding: [0x62,0x82,0x9d,0x40,0x76,0xd4]
-          vpermi2q %zmm28, %zmm28, %zmm18
+// CHECK: vfnmadd132sd 1024(%rdx), %xmm3, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x08,0x9d,0x9a,0x00,0x04,0x00,0x00]
+          vfnmadd132sd 1024(%rdx), %xmm3, %xmm19
 
-// CHECK: vpermi2q %zmm28, %zmm28, %zmm18 {%k2}
-// CHECK:  encoding: [0x62,0x82,0x9d,0x42,0x76,0xd4]
-          vpermi2q %zmm28, %zmm28, %zmm18 {%k2}
+// CHECK: vfnmadd132sd -1024(%rdx), %xmm3, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x08,0x9d,0x5a,0x80]
+          vfnmadd132sd -1024(%rdx), %xmm3, %xmm19
 
-// CHECK: vpermi2q %zmm28, %zmm28, %zmm18 {%k2} {z}
-// CHECK:  encoding: [0x62,0x82,0x9d,0xc2,0x76,0xd4]
-          vpermi2q %zmm28, %zmm28, %zmm18 {%k2} {z}
+// CHECK: vfnmadd132sd -1032(%rdx), %xmm3, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x08,0x9d,0x9a,0xf8,0xfb,0xff,0xff]
+          vfnmadd132sd -1032(%rdx), %xmm3, %xmm19
 
-// CHECK: vpermi2q (%rcx), %zmm28, %zmm18
-// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x11]
-          vpermi2q (%rcx), %zmm28, %zmm18
+// CHECK: vfnmadd213ss %xmm28, %xmm7, %xmm29
+// CHECK:  encoding: [0x62,0x02,0x45,0x08,0xad,0xec]
+          vfnmadd213ss %xmm28, %xmm7, %xmm29
 
-// CHECK: vpermi2q 291(%rax,%r14,8), %zmm28, %zmm18
-// CHECK:  encoding: [0x62,0xa2,0x9d,0x40,0x76,0x94,0xf0,0x23,0x01,0x00,0x00]
-          vpermi2q 291(%rax,%r14,8), %zmm28, %zmm18
+// CHECK: vfnmadd213ss %xmm28, %xmm7, %xmm29 {%k2}
+// CHECK:  encoding: [0x62,0x02,0x45,0x0a,0xad,0xec]
+          vfnmadd213ss %xmm28, %xmm7, %xmm29 {%k2}
 
-// CHECK: vpermi2q (%rcx){1to8}, %zmm28, %zmm18
-// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x11]
-          vpermi2q (%rcx){1to8}, %zmm28, %zmm18
+// CHECK: vfnmadd213ss %xmm28, %xmm7, %xmm29 {%k2} {z}
+// CHECK:  encoding: [0x62,0x02,0x45,0x8a,0xad,0xec]
+          vfnmadd213ss %xmm28, %xmm7, %xmm29 {%k2} {z}
 
-// CHECK: vpermi2q 8128(%rdx), %zmm28, %zmm18
-// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x52,0x7f]
-          vpermi2q 8128(%rdx), %zmm28, %zmm18
+// CHECK: vfnmadd213ss {rn-sae}, %xmm28, %xmm7, %xmm29
+// CHECK:  encoding: [0x62,0x02,0x45,0x18,0xad,0xec]
+          vfnmadd213ss {rn-sae}, %xmm28, %xmm7, %xmm29
 
-// CHECK: vpermi2q 8192(%rdx), %zmm28, %zmm18
-// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x92,0x00,0x20,0x00,0x00]
-          vpermi2q 8192(%rdx), %zmm28, %zmm18
+// CHECK: vfnmadd213ss {ru-sae}, %xmm28, %xmm7, %xmm29
+// CHECK:  encoding: [0x62,0x02,0x45,0x58,0xad,0xec]
+          vfnmadd213ss {ru-sae}, %xmm28, %xmm7, %xmm29
 
-// CHECK: vpermi2q -8192(%rdx), %zmm28, %zmm18
-// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x52,0x80]
-          vpermi2q -8192(%rdx), %zmm28, %zmm18
+// CHECK: vfnmadd213ss {rd-sae}, %xmm28, %xmm7, %xmm29
+// CHECK:  encoding: [0x62,0x02,0x45,0x38,0xad,0xec]
+          vfnmadd213ss {rd-sae}, %xmm28, %xmm7, %xmm29
 
-// CHECK: vpermi2q -8256(%rdx), %zmm28, %zmm18
-// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x92,0xc0,0xdf,0xff,0xff]
-          vpermi2q -8256(%rdx), %zmm28, %zmm18
+// CHECK: vfnmadd213ss {rz-sae}, %xmm28, %xmm7, %xmm29
+// CHECK:  encoding: [0x62,0x02,0x45,0x78,0xad,0xec]
+          vfnmadd213ss {rz-sae}, %xmm28, %xmm7, %xmm29
 
-// CHECK: vpermi2q 1016(%rdx){1to8}, %zmm28, %zmm18
-// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x52,0x7f]
-          vpermi2q 1016(%rdx){1to8}, %zmm28, %zmm18
+// CHECK: vfnmadd213ss (%rcx), %xmm7, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x45,0x08,0xad,0x29]
+          vfnmadd213ss (%rcx), %xmm7, %xmm29
 
-// CHECK: vpermi2q 1024(%rdx){1to8}, %zmm28, %zmm18
-// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x92,0x00,0x04,0x00,0x00]
-          vpermi2q 1024(%rdx){1to8}, %zmm28, %zmm18
+// CHECK: vfnmadd213ss 291(%rax,%r14,8), %xmm7, %xmm29
+// CHECK:  encoding: [0x62,0x22,0x45,0x08,0xad,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd213ss 291(%rax,%r14,8), %xmm7, %xmm29
 
-// CHECK: vpermi2q -1024(%rdx){1to8}, %zmm28, %zmm18
-// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x52,0x80]
-          vpermi2q -1024(%rdx){1to8}, %zmm28, %zmm18
+// CHECK: vfnmadd213ss 508(%rdx), %xmm7, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x45,0x08,0xad,0x6a,0x7f]
+          vfnmadd213ss 508(%rdx), %xmm7, %xmm29
 
-// CHECK: vpermi2q -1032(%rdx){1to8}, %zmm28, %zmm18
-// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x92,0xf8,0xfb,0xff,0xff]
-          vpermi2q -1032(%rdx){1to8}, %zmm28, %zmm18
+// CHECK: vfnmadd213ss 512(%rdx), %xmm7, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x45,0x08,0xad,0xaa,0x00,0x02,0x00,0x00]
+          vfnmadd213ss 512(%rdx), %xmm7, %xmm29
 
-// CHECK: vpermi2ps %zmm8, %zmm23, %zmm24
-// CHECK:  encoding: [0x62,0x42,0x45,0x40,0x77,0xc0]
-          vpermi2ps %zmm8, %zmm23, %zmm24
+// CHECK: vfnmadd213ss -512(%rdx), %xmm7, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x45,0x08,0xad,0x6a,0x80]
+          vfnmadd213ss -512(%rdx), %xmm7, %xmm29
 
-// CHECK: vpermi2ps %zmm8, %zmm23, %zmm24 {%k2}
-// CHECK:  encoding: [0x62,0x42,0x45,0x42,0x77,0xc0]
-          vpermi2ps %zmm8, %zmm23, %zmm24 {%k2}
+// CHECK: vfnmadd213ss -516(%rdx), %xmm7, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x45,0x08,0xad,0xaa,0xfc,0xfd,0xff,0xff]
+          vfnmadd213ss -516(%rdx), %xmm7, %xmm29
 
-// CHECK: vpermi2ps %zmm8, %zmm23, %zmm24 {%k2} {z}
-// CHECK:  encoding: [0x62,0x42,0x45,0xc2,0x77,0xc0]
-          vpermi2ps %zmm8, %zmm23, %zmm24 {%k2} {z}
+// CHECK: vfnmadd213sd %xmm2, %xmm27, %xmm13
+// CHECK:  encoding: [0x62,0x72,0xa5,0x00,0xad,0xea]
+          vfnmadd213sd %xmm2, %xmm27, %xmm13
 
-// CHECK: vpermi2ps (%rcx), %zmm23, %zmm24
-// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x01]
-          vpermi2ps (%rcx), %zmm23, %zmm24
+// CHECK: vfnmadd213sd %xmm2, %xmm27, %xmm13 {%k7}
+// CHECK:  encoding: [0x62,0x72,0xa5,0x07,0xad,0xea]
+          vfnmadd213sd %xmm2, %xmm27, %xmm13 {%k7}
 
-// CHECK: vpermi2ps 291(%rax,%r14,8), %zmm23, %zmm24
-// CHECK:  encoding: [0x62,0x22,0x45,0x40,0x77,0x84,0xf0,0x23,0x01,0x00,0x00]
-          vpermi2ps 291(%rax,%r14,8), %zmm23, %zmm24
+// CHECK: vfnmadd213sd %xmm2, %xmm27, %xmm13 {%k7} {z}
+// CHECK:  encoding: [0x62,0x72,0xa5,0x87,0xad,0xea]
+          vfnmadd213sd %xmm2, %xmm27, %xmm13 {%k7} {z}
 
-// CHECK: vpermi2ps (%rcx){1to16}, %zmm23, %zmm24
-// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x01]
-          vpermi2ps (%rcx){1to16}, %zmm23, %zmm24
+// CHECK: vfnmadd213sd {rn-sae}, %xmm2, %xmm27, %xmm13
+// CHECK:  encoding: [0x62,0x72,0xa5,0x10,0xad,0xea]
+          vfnmadd213sd {rn-sae}, %xmm2, %xmm27, %xmm13
 
-// CHECK: vpermi2ps 8128(%rdx), %zmm23, %zmm24
-// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x42,0x7f]
-          vpermi2ps 8128(%rdx), %zmm23, %zmm24
+// CHECK: vfnmadd213sd {ru-sae}, %xmm2, %xmm27, %xmm13
+// CHECK:  encoding: [0x62,0x72,0xa5,0x50,0xad,0xea]
+          vfnmadd213sd {ru-sae}, %xmm2, %xmm27, %xmm13
 
-// CHECK: vpermi2ps 8192(%rdx), %zmm23, %zmm24
-// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x82,0x00,0x20,0x00,0x00]
-          vpermi2ps 8192(%rdx), %zmm23, %zmm24
+// CHECK: vfnmadd213sd {rd-sae}, %xmm2, %xmm27, %xmm13
+// CHECK:  encoding: [0x62,0x72,0xa5,0x30,0xad,0xea]
+          vfnmadd213sd {rd-sae}, %xmm2, %xmm27, %xmm13
 
-// CHECK: vpermi2ps -8192(%rdx), %zmm23, %zmm24
-// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x42,0x80]
-          vpermi2ps -8192(%rdx), %zmm23, %zmm24
+// CHECK: vfnmadd213sd {rz-sae}, %xmm2, %xmm27, %xmm13
+// CHECK:  encoding: [0x62,0x72,0xa5,0x70,0xad,0xea]
+          vfnmadd213sd {rz-sae}, %xmm2, %xmm27, %xmm13
 
-// CHECK: vpermi2ps -8256(%rdx), %zmm23, %zmm24
-// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x82,0xc0,0xdf,0xff,0xff]
-          vpermi2ps -8256(%rdx), %zmm23, %zmm24
+// CHECK: vfnmadd213sd (%rcx), %xmm27, %xmm13
+// CHECK:  encoding: [0x62,0x72,0xa5,0x00,0xad,0x29]
+          vfnmadd213sd (%rcx), %xmm27, %xmm13
 
-// CHECK: vpermi2ps 508(%rdx){1to16}, %zmm23, %zmm24
-// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x42,0x7f]
-          vpermi2ps 508(%rdx){1to16}, %zmm23, %zmm24
+// CHECK: vfnmadd213sd 291(%rax,%r14,8), %xmm27, %xmm13
+// CHECK:  encoding: [0x62,0x32,0xa5,0x00,0xad,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd213sd 291(%rax,%r14,8), %xmm27, %xmm13
 
-// CHECK: vpermi2ps 512(%rdx){1to16}, %zmm23, %zmm24
-// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x82,0x00,0x02,0x00,0x00]
-          vpermi2ps 512(%rdx){1to16}, %zmm23, %zmm24
+// CHECK: vfnmadd213sd 1016(%rdx), %xmm27, %xmm13
+// CHECK:  encoding: [0x62,0x72,0xa5,0x00,0xad,0x6a,0x7f]
+          vfnmadd213sd 1016(%rdx), %xmm27, %xmm13
 
-// CHECK: vpermi2ps -512(%rdx){1to16}, %zmm23, %zmm24
-// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x42,0x80]
-          vpermi2ps -512(%rdx){1to16}, %zmm23, %zmm24
+// CHECK: vfnmadd213sd 1024(%rdx), %xmm27, %xmm13
+// CHECK:  encoding: [0x62,0x72,0xa5,0x00,0xad,0xaa,0x00,0x04,0x00,0x00]
+          vfnmadd213sd 1024(%rdx), %xmm27, %xmm13
 
-// CHECK: vpermi2ps -516(%rdx){1to16}, %zmm23, %zmm24
-// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x82,0xfc,0xfd,0xff,0xff]
-          vpermi2ps -516(%rdx){1to16}, %zmm23, %zmm24
+// CHECK: vfnmadd213sd -1024(%rdx), %xmm27, %xmm13
+// CHECK:  encoding: [0x62,0x72,0xa5,0x00,0xad,0x6a,0x80]
+          vfnmadd213sd -1024(%rdx), %xmm27, %xmm13
 
-// CHECK: vpermi2pd %zmm20, %zmm5, %zmm20
-// CHECK:  encoding: [0x62,0xa2,0xd5,0x48,0x77,0xe4]
-          vpermi2pd %zmm20, %zmm5, %zmm20
+// CHECK: vfnmadd213sd -1032(%rdx), %xmm27, %xmm13
+// CHECK:  encoding: [0x62,0x72,0xa5,0x00,0xad,0xaa,0xf8,0xfb,0xff,0xff]
+          vfnmadd213sd -1032(%rdx), %xmm27, %xmm13
 
-// CHECK: vpermi2pd %zmm20, %zmm5, %zmm20 {%k3}
-// CHECK:  encoding: [0x62,0xa2,0xd5,0x4b,0x77,0xe4]
-          vpermi2pd %zmm20, %zmm5, %zmm20 {%k3}
+// CHECK: vfnmadd231ss %xmm17, %xmm2, %xmm27
+// CHECK:  encoding: [0x62,0x22,0x6d,0x08,0xbd,0xd9]
+          vfnmadd231ss %xmm17, %xmm2, %xmm27
 
-// CHECK: vpermi2pd %zmm20, %zmm5, %zmm20 {%k3} {z}
-// CHECK:  encoding: [0x62,0xa2,0xd5,0xcb,0x77,0xe4]
-          vpermi2pd %zmm20, %zmm5, %zmm20 {%k3} {z}
+// CHECK: vfnmadd231ss %xmm17, %xmm2, %xmm27 {%k1}
+// CHECK:  encoding: [0x62,0x22,0x6d,0x09,0xbd,0xd9]
+          vfnmadd231ss %xmm17, %xmm2, %xmm27 {%k1}
 
-// CHECK: vpermi2pd (%rcx), %zmm5, %zmm20
-// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0x21]
-          vpermi2pd (%rcx), %zmm5, %zmm20
+// CHECK: vfnmadd231ss %xmm17, %xmm2, %xmm27 {%k1} {z}
+// CHECK:  encoding: [0x62,0x22,0x6d,0x89,0xbd,0xd9]
+          vfnmadd231ss %xmm17, %xmm2, %xmm27 {%k1} {z}
 
-// CHECK: vpermi2pd 291(%rax,%r14,8), %zmm5, %zmm20
-// CHECK:  encoding: [0x62,0xa2,0xd5,0x48,0x77,0xa4,0xf0,0x23,0x01,0x00,0x00]
-          vpermi2pd 291(%rax,%r14,8), %zmm5, %zmm20
+// CHECK: vfnmadd231ss {rn-sae}, %xmm17, %xmm2, %xmm27
+// CHECK:  encoding: [0x62,0x22,0x6d,0x18,0xbd,0xd9]
+          vfnmadd231ss {rn-sae}, %xmm17, %xmm2, %xmm27
 
-// CHECK: vpermi2pd (%rcx){1to8}, %zmm5, %zmm20
-// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0x21]
-          vpermi2pd (%rcx){1to8}, %zmm5, %zmm20
+// CHECK: vfnmadd231ss {ru-sae}, %xmm17, %xmm2, %xmm27
+// CHECK:  encoding: [0x62,0x22,0x6d,0x58,0xbd,0xd9]
+          vfnmadd231ss {ru-sae}, %xmm17, %xmm2, %xmm27
 
-// CHECK: vpermi2pd 8128(%rdx), %zmm5, %zmm20
-// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0x62,0x7f]
-          vpermi2pd 8128(%rdx), %zmm5, %zmm20
+// CHECK: vfnmadd231ss {rd-sae}, %xmm17, %xmm2, %xmm27
+// CHECK:  encoding: [0x62,0x22,0x6d,0x38,0xbd,0xd9]
+          vfnmadd231ss {rd-sae}, %xmm17, %xmm2, %xmm27
 
-// CHECK: vpermi2pd 8192(%rdx), %zmm5, %zmm20
-// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0xa2,0x00,0x20,0x00,0x00]
-          vpermi2pd 8192(%rdx), %zmm5, %zmm20
+// CHECK: vfnmadd231ss {rz-sae}, %xmm17, %xmm2, %xmm27
+// CHECK:  encoding: [0x62,0x22,0x6d,0x78,0xbd,0xd9]
+          vfnmadd231ss {rz-sae}, %xmm17, %xmm2, %xmm27
 
-// CHECK: vpermi2pd -8192(%rdx), %zmm5, %zmm20
-// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0x62,0x80]
-          vpermi2pd -8192(%rdx), %zmm5, %zmm20
+// CHECK: vfnmadd231ss (%rcx), %xmm2, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x6d,0x08,0xbd,0x19]
+          vfnmadd231ss (%rcx), %xmm2, %xmm27
 
-// CHECK: vpermi2pd -8256(%rdx), %zmm5, %zmm20
-// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0xa2,0xc0,0xdf,0xff,0xff]
-          vpermi2pd -8256(%rdx), %zmm5, %zmm20
+// CHECK: vfnmadd231ss 291(%rax,%r14,8), %xmm2, %xmm27
+// CHECK:  encoding: [0x62,0x22,0x6d,0x08,0xbd,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd231ss 291(%rax,%r14,8), %xmm2, %xmm27
 
-// CHECK: vpermi2pd 1016(%rdx){1to8}, %zmm5, %zmm20
-// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0x62,0x7f]
-          vpermi2pd 1016(%rdx){1to8}, %zmm5, %zmm20
+// CHECK: vfnmadd231ss 508(%rdx), %xmm2, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x6d,0x08,0xbd,0x5a,0x7f]
+          vfnmadd231ss 508(%rdx), %xmm2, %xmm27
 
-// CHECK: vpermi2pd 1024(%rdx){1to8}, %zmm5, %zmm20
-// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0xa2,0x00,0x04,0x00,0x00]
-          vpermi2pd 1024(%rdx){1to8}, %zmm5, %zmm20
+// CHECK: vfnmadd231ss 512(%rdx), %xmm2, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x6d,0x08,0xbd,0x9a,0x00,0x02,0x00,0x00]
+          vfnmadd231ss 512(%rdx), %xmm2, %xmm27
 
-// CHECK: vpermi2pd -1024(%rdx){1to8}, %zmm5, %zmm20
-// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0x62,0x80]
-          vpermi2pd -1024(%rdx){1to8}, %zmm5, %zmm20
+// CHECK: vfnmadd231ss -512(%rdx), %xmm2, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x6d,0x08,0xbd,0x5a,0x80]
+          vfnmadd231ss -512(%rdx), %xmm2, %xmm27
 
-// CHECK: vpermi2pd -1032(%rdx){1to8}, %zmm5, %zmm20
-// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0xa2,0xf8,0xfb,0xff,0xff]
-          vpermi2pd -1032(%rdx){1to8}, %zmm5, %zmm20
+// CHECK: vfnmadd231ss -516(%rdx), %xmm2, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x6d,0x08,0xbd,0x9a,0xfc,0xfd,0xff,0xff]
+          vfnmadd231ss -516(%rdx), %xmm2, %xmm27
+
+// CHECK: vfnmadd231sd %xmm18, %xmm11, %xmm6
+// CHECK:  encoding: [0x62,0xb2,0xa5,0x08,0xbd,0xf2]
+          vfnmadd231sd %xmm18, %xmm11, %xmm6
+
+// CHECK: vfnmadd231sd %xmm18, %xmm11, %xmm6 {%k3}
+// CHECK:  encoding: [0x62,0xb2,0xa5,0x0b,0xbd,0xf2]
+          vfnmadd231sd %xmm18, %xmm11, %xmm6 {%k3}
+
+// CHECK: vfnmadd231sd %xmm18, %xmm11, %xmm6 {%k3} {z}
+// CHECK:  encoding: [0x62,0xb2,0xa5,0x8b,0xbd,0xf2]
+          vfnmadd231sd %xmm18, %xmm11, %xmm6 {%k3} {z}
+
+// CHECK: vfnmadd231sd {rn-sae}, %xmm18, %xmm11, %xmm6
+// CHECK:  encoding: [0x62,0xb2,0xa5,0x18,0xbd,0xf2]
+          vfnmadd231sd {rn-sae}, %xmm18, %xmm11, %xmm6
+
+// CHECK: vfnmadd231sd {ru-sae}, %xmm18, %xmm11, %xmm6
+// CHECK:  encoding: [0x62,0xb2,0xa5,0x58,0xbd,0xf2]
+          vfnmadd231sd {ru-sae}, %xmm18, %xmm11, %xmm6
+
+// CHECK: vfnmadd231sd {rd-sae}, %xmm18, %xmm11, %xmm6
+// CHECK:  encoding: [0x62,0xb2,0xa5,0x38,0xbd,0xf2]
+          vfnmadd231sd {rd-sae}, %xmm18, %xmm11, %xmm6
+
+// CHECK: vfnmadd231sd {rz-sae}, %xmm18, %xmm11, %xmm6
+// CHECK:  encoding: [0x62,0xb2,0xa5,0x78,0xbd,0xf2]
+          vfnmadd231sd {rz-sae}, %xmm18, %xmm11, %xmm6
+
+// CHECK: vfnmadd231sd  (%rcx), %xmm11, %xmm26 
+// CHECK: encoding: [0x62,0x62,0xa5,0x08,0xbd,0x11]
+          vfnmadd231sd  (%rcx), %xmm11, %xmm26 
+
+// CHECK: vfnmadd231sd  291(%rax,%r14,8), %xmm11, %xmm26 
+// CHECK: encoding: [0x62,0x22,0xa5,0x08,0xbd,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd231sd  291(%rax,%r14,8), %xmm11, %xmm26 
+
+// CHECK: vfnmadd231sd  1016(%rdx), %xmm11, %xmm26 
+// CHECK: encoding: [0x62,0x62,0xa5,0x08,0xbd,0x52,0x7f]
+          vfnmadd231sd  1016(%rdx), %xmm11, %xmm26 
+
+// CHECK: vfnmadd231sd  1024(%rdx), %xmm11, %xmm26 
+// CHECK: encoding: [0x62,0x62,0xa5,0x08,0xbd,0x92,0x00,0x04,0x00,0x00]
+          vfnmadd231sd  1024(%rdx), %xmm11, %xmm26 
+
+// CHECK: vfnmadd231sd  -1024(%rdx), %xmm11, %xmm26 
+// CHECK: encoding: [0x62,0x62,0xa5,0x08,0xbd,0x52,0x80]
+          vfnmadd231sd  -1024(%rdx), %xmm11, %xmm26 
+
+// CHECK: vfnmadd231sd  -1032(%rdx), %xmm11, %xmm26 
+// CHECK: encoding: [0x62,0x62,0xa5,0x08,0xbd,0x92,0xf8,0xfb,0xff,0xff]
+          vfnmadd231sd  -1032(%rdx), %xmm11, %xmm26 
+
+// CHECK: vfnmsub132ss %xmm24, %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0x82,0x4d,0x00,0x9f,0xf8]
+          vfnmsub132ss %xmm24, %xmm22, %xmm23
+
+// CHECK: vfnmsub132ss %xmm24, %xmm22, %xmm23 {%k6}
+// CHECK:  encoding: [0x62,0x82,0x4d,0x06,0x9f,0xf8]
+          vfnmsub132ss %xmm24, %xmm22, %xmm23 {%k6}
+
+// CHECK: vfnmsub132ss %xmm24, %xmm22, %xmm23 {%k6} {z}
+// CHECK:  encoding: [0x62,0x82,0x4d,0x86,0x9f,0xf8]
+          vfnmsub132ss %xmm24, %xmm22, %xmm23 {%k6} {z}
+
+// CHECK: vfnmsub132ss {rn-sae}, %xmm24, %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0x82,0x4d,0x10,0x9f,0xf8]
+          vfnmsub132ss {rn-sae}, %xmm24, %xmm22, %xmm23
+
+// CHECK: vfnmsub132ss {ru-sae}, %xmm24, %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0x82,0x4d,0x50,0x9f,0xf8]
+          vfnmsub132ss {ru-sae}, %xmm24, %xmm22, %xmm23
+
+// CHECK: vfnmsub132ss {rd-sae}, %xmm24, %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0x82,0x4d,0x30,0x9f,0xf8]
+          vfnmsub132ss {rd-sae}, %xmm24, %xmm22, %xmm23
+
+// CHECK: vfnmsub132ss {rz-sae}, %xmm24, %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0x82,0x4d,0x70,0x9f,0xf8]
+          vfnmsub132ss {rz-sae}, %xmm24, %xmm22, %xmm23
+
+// CHECK: vfnmsub132ss (%rcx), %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x00,0x9f,0x39]
+          vfnmsub132ss (%rcx), %xmm22, %xmm23
+
+// CHECK: vfnmsub132ss 291(%rax,%r14,8), %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x00,0x9f,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub132ss 291(%rax,%r14,8), %xmm22, %xmm23
+
+// CHECK: vfnmsub132ss 508(%rdx), %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x00,0x9f,0x7a,0x7f]
+          vfnmsub132ss 508(%rdx), %xmm22, %xmm23
+
+// CHECK: vfnmsub132ss 512(%rdx), %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x00,0x9f,0xba,0x00,0x02,0x00,0x00]
+          vfnmsub132ss 512(%rdx), %xmm22, %xmm23
+
+// CHECK: vfnmsub132ss -512(%rdx), %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x00,0x9f,0x7a,0x80]
+          vfnmsub132ss -512(%rdx), %xmm22, %xmm23
+
+// CHECK: vfnmsub132ss -516(%rdx), %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x00,0x9f,0xba,0xfc,0xfd,0xff,0xff]
+          vfnmsub132ss -516(%rdx), %xmm22, %xmm23
+
+// CHECK: vfnmsub132sd %xmm13, %xmm11, %xmm26
+// CHECK:  encoding: [0x62,0x42,0xa5,0x08,0x9f,0xd5]
+          vfnmsub132sd %xmm13, %xmm11, %xmm26
+
+// CHECK: vfnmsub132sd %xmm13, %xmm11, %xmm26 {%k6}
+// CHECK:  encoding: [0x62,0x42,0xa5,0x0e,0x9f,0xd5]
+          vfnmsub132sd %xmm13, %xmm11, %xmm26 {%k6}
+
+// CHECK: vfnmsub132sd %xmm13, %xmm11, %xmm26 {%k6} {z}
+// CHECK:  encoding: [0x62,0x42,0xa5,0x8e,0x9f,0xd5]
+          vfnmsub132sd %xmm13, %xmm11, %xmm26 {%k6} {z}
+
+// CHECK: vfnmsub132sd {rn-sae}, %xmm13, %xmm11, %xmm26
+// CHECK:  encoding: [0x62,0x42,0xa5,0x18,0x9f,0xd5]
+          vfnmsub132sd {rn-sae}, %xmm13, %xmm11, %xmm26
+
+// CHECK: vfnmsub132sd {ru-sae}, %xmm13, %xmm11, %xmm26
+// CHECK:  encoding: [0x62,0x42,0xa5,0x58,0x9f,0xd5]
+          vfnmsub132sd {ru-sae}, %xmm13, %xmm11, %xmm26
+
+// CHECK: vfnmsub132sd {rd-sae}, %xmm13, %xmm11, %xmm26
+// CHECK:  encoding: [0x62,0x42,0xa5,0x38,0x9f,0xd5]
+          vfnmsub132sd {rd-sae}, %xmm13, %xmm11, %xmm26
+
+// CHECK: vfnmsub132sd {rz-sae}, %xmm13, %xmm11, %xmm26
+// CHECK:  encoding: [0x62,0x42,0xa5,0x78,0x9f,0xd5]
+          vfnmsub132sd {rz-sae}, %xmm13, %xmm11, %xmm26
+
+// CHECK: vfnmsub132sd (%rcx), %xmm11, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x08,0x9f,0x11]
+          vfnmsub132sd (%rcx), %xmm11, %xmm26
+
+// CHECK: vfnmsub132sd 291(%rax,%r14,8), %xmm11, %xmm26
+// CHECK:  encoding: [0x62,0x22,0xa5,0x08,0x9f,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub132sd 291(%rax,%r14,8), %xmm11, %xmm26
+
+// CHECK: vfnmsub132sd 1016(%rdx), %xmm11, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x08,0x9f,0x52,0x7f]
+          vfnmsub132sd 1016(%rdx), %xmm11, %xmm26
+
+// CHECK: vfnmsub132sd 1024(%rdx), %xmm11, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x08,0x9f,0x92,0x00,0x04,0x00,0x00]
+          vfnmsub132sd 1024(%rdx), %xmm11, %xmm26
+
+// CHECK: vfnmsub132sd -1024(%rdx), %xmm11, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x08,0x9f,0x52,0x80]
+          vfnmsub132sd -1024(%rdx), %xmm11, %xmm26
+
+// CHECK: vfnmsub132sd -1032(%rdx), %xmm11, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x08,0x9f,0x92,0xf8,0xfb,0xff,0xff]
+          vfnmsub132sd -1032(%rdx), %xmm11, %xmm26
+
+// CHECK: vfnmsub213ss %xmm12, %xmm28, %xmm14
+// CHECK:  encoding: [0x62,0x52,0x1d,0x00,0xaf,0xf4]
+          vfnmsub213ss %xmm12, %xmm28, %xmm14
+
+// CHECK: vfnmsub213ss %xmm12, %xmm28, %xmm14 {%k4}
+// CHECK:  encoding: [0x62,0x52,0x1d,0x04,0xaf,0xf4]
+          vfnmsub213ss %xmm12, %xmm28, %xmm14 {%k4}
+
+// CHECK: vfnmsub213ss %xmm12, %xmm28, %xmm14 {%k4} {z}
+// CHECK:  encoding: [0x62,0x52,0x1d,0x84,0xaf,0xf4]
+          vfnmsub213ss %xmm12, %xmm28, %xmm14 {%k4} {z}
+
+// CHECK: vfnmsub213ss {rn-sae}, %xmm12, %xmm28, %xmm14
+// CHECK:  encoding: [0x62,0x52,0x1d,0x10,0xaf,0xf4]
+          vfnmsub213ss {rn-sae}, %xmm12, %xmm28, %xmm14
+
+// CHECK: vfnmsub213ss {ru-sae}, %xmm12, %xmm28, %xmm14
+// CHECK:  encoding: [0x62,0x52,0x1d,0x50,0xaf,0xf4]
+          vfnmsub213ss {ru-sae}, %xmm12, %xmm28, %xmm14
+
+// CHECK: vfnmsub213ss {rd-sae}, %xmm12, %xmm28, %xmm14
+// CHECK:  encoding: [0x62,0x52,0x1d,0x30,0xaf,0xf4]
+          vfnmsub213ss {rd-sae}, %xmm12, %xmm28, %xmm14
+
+// CHECK: vfnmsub213ss {rz-sae}, %xmm12, %xmm28, %xmm14
+// CHECK:  encoding: [0x62,0x52,0x1d,0x70,0xaf,0xf4]
+          vfnmsub213ss {rz-sae}, %xmm12, %xmm28, %xmm14
+
+// CHECK: vfnmsub213ss (%rcx), %xmm28, %xmm14
+// CHECK:  encoding: [0x62,0x72,0x1d,0x00,0xaf,0x31]
+          vfnmsub213ss (%rcx), %xmm28, %xmm14
+
+// CHECK: vfnmsub213ss 291(%rax,%r14,8), %xmm28, %xmm14
+// CHECK:  encoding: [0x62,0x32,0x1d,0x00,0xaf,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub213ss 291(%rax,%r14,8), %xmm28, %xmm14
+
+// CHECK: vfnmsub213ss 508(%rdx), %xmm28, %xmm14
+// CHECK:  encoding: [0x62,0x72,0x1d,0x00,0xaf,0x72,0x7f]
+          vfnmsub213ss 508(%rdx), %xmm28, %xmm14
+
+// CHECK: vfnmsub213ss 512(%rdx), %xmm28, %xmm14
+// CHECK:  encoding: [0x62,0x72,0x1d,0x00,0xaf,0xb2,0x00,0x02,0x00,0x00]
+          vfnmsub213ss 512(%rdx), %xmm28, %xmm14
+
+// CHECK: vfnmsub213ss -512(%rdx), %xmm28, %xmm14
+// CHECK:  encoding: [0x62,0x72,0x1d,0x00,0xaf,0x72,0x80]
+          vfnmsub213ss -512(%rdx), %xmm28, %xmm14
+
+// CHECK: vfnmsub213ss -516(%rdx), %xmm28, %xmm14
+// CHECK:  encoding: [0x62,0x72,0x1d,0x00,0xaf,0xb2,0xfc,0xfd,0xff,0xff]
+          vfnmsub213ss -516(%rdx), %xmm28, %xmm14
+
+// CHECK: vfnmsub213sd %xmm28, %xmm23, %xmm29
+// CHECK:  encoding: [0x62,0x02,0xc5,0x00,0xaf,0xec]
+          vfnmsub213sd %xmm28, %xmm23, %xmm29
+
+// CHECK: vfnmsub213sd %xmm28, %xmm23, %xmm29 {%k3}
+// CHECK:  encoding: [0x62,0x02,0xc5,0x03,0xaf,0xec]
+          vfnmsub213sd %xmm28, %xmm23, %xmm29 {%k3}
+
+// CHECK: vfnmsub213sd %xmm28, %xmm23, %xmm29 {%k3} {z}
+// CHECK:  encoding: [0x62,0x02,0xc5,0x83,0xaf,0xec]
+          vfnmsub213sd %xmm28, %xmm23, %xmm29 {%k3} {z}
+
+// CHECK: vfnmsub213sd {rn-sae}, %xmm28, %xmm23, %xmm29
+// CHECK:  encoding: [0x62,0x02,0xc5,0x10,0xaf,0xec]
+          vfnmsub213sd {rn-sae}, %xmm28, %xmm23, %xmm29
+
+// CHECK: vfnmsub213sd {ru-sae}, %xmm28, %xmm23, %xmm29
+// CHECK:  encoding: [0x62,0x02,0xc5,0x50,0xaf,0xec]
+          vfnmsub213sd {ru-sae}, %xmm28, %xmm23, %xmm29
+
+// CHECK: vfnmsub213sd {rd-sae}, %xmm28, %xmm23, %xmm29
+// CHECK:  encoding: [0x62,0x02,0xc5,0x30,0xaf,0xec]
+          vfnmsub213sd {rd-sae}, %xmm28, %xmm23, %xmm29
+
+// CHECK: vfnmsub213sd {rz-sae}, %xmm28, %xmm23, %xmm29
+// CHECK:  encoding: [0x62,0x02,0xc5,0x70,0xaf,0xec]
+          vfnmsub213sd {rz-sae}, %xmm28, %xmm23, %xmm29
+
+// CHECK: vfnmsub213sd (%rcx), %xmm23, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xc5,0x00,0xaf,0x29]
+          vfnmsub213sd (%rcx), %xmm23, %xmm29
+
+// CHECK: vfnmsub213sd 291(%rax,%r14,8), %xmm23, %xmm29
+// CHECK:  encoding: [0x62,0x22,0xc5,0x00,0xaf,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub213sd 291(%rax,%r14,8), %xmm23, %xmm29
+
+// CHECK: vfnmsub213sd 1016(%rdx), %xmm23, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xc5,0x00,0xaf,0x6a,0x7f]
+          vfnmsub213sd 1016(%rdx), %xmm23, %xmm29
+
+// CHECK: vfnmsub213sd 1024(%rdx), %xmm23, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xc5,0x00,0xaf,0xaa,0x00,0x04,0x00,0x00]
+          vfnmsub213sd 1024(%rdx), %xmm23, %xmm29
+
+// CHECK: vfnmsub213sd -1024(%rdx), %xmm23, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xc5,0x00,0xaf,0x6a,0x80]
+          vfnmsub213sd -1024(%rdx), %xmm23, %xmm29
+
+// CHECK: vfnmsub213sd -1032(%rdx), %xmm23, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xc5,0x00,0xaf,0xaa,0xf8,0xfb,0xff,0xff]
+          vfnmsub213sd -1032(%rdx), %xmm23, %xmm29
+
+// CHECK: vfnmsub231ss %xmm10, %xmm13, %xmm26
+// CHECK:  encoding: [0x62,0x42,0x15,0x08,0xbf,0xd2]
+          vfnmsub231ss %xmm10, %xmm13, %xmm26
+
+// CHECK: vfnmsub231ss %xmm10, %xmm13, %xmm26 {%k4}
+// CHECK:  encoding: [0x62,0x42,0x15,0x0c,0xbf,0xd2]
+          vfnmsub231ss %xmm10, %xmm13, %xmm26 {%k4}
+
+// CHECK: vfnmsub231ss %xmm10, %xmm13, %xmm26 {%k4} {z}
+// CHECK:  encoding: [0x62,0x42,0x15,0x8c,0xbf,0xd2]
+          vfnmsub231ss %xmm10, %xmm13, %xmm26 {%k4} {z}
+
+// CHECK: vfnmsub231ss {rn-sae}, %xmm10, %xmm13, %xmm26
+// CHECK:  encoding: [0x62,0x42,0x15,0x18,0xbf,0xd2]
+          vfnmsub231ss {rn-sae}, %xmm10, %xmm13, %xmm26
+
+// CHECK: vfnmsub231ss {ru-sae}, %xmm10, %xmm13, %xmm26
+// CHECK:  encoding: [0x62,0x42,0x15,0x58,0xbf,0xd2]
+          vfnmsub231ss {ru-sae}, %xmm10, %xmm13, %xmm26
+
+// CHECK: vfnmsub231ss {rd-sae}, %xmm10, %xmm13, %xmm26
+// CHECK:  encoding: [0x62,0x42,0x15,0x38,0xbf,0xd2]
+          vfnmsub231ss {rd-sae}, %xmm10, %xmm13, %xmm26
+
+// CHECK: vfnmsub231ss {rz-sae}, %xmm10, %xmm13, %xmm26
+// CHECK:  encoding: [0x62,0x42,0x15,0x78,0xbf,0xd2]
+          vfnmsub231ss {rz-sae}, %xmm10, %xmm13, %xmm26
+
+// CHECK: vfnmsub231ss (%rcx), %xmm13, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x08,0xbf,0x11]
+          vfnmsub231ss (%rcx), %xmm13, %xmm26
+
+// CHECK: vfnmsub231ss 291(%rax,%r14,8), %xmm13, %xmm26
+// CHECK:  encoding: [0x62,0x22,0x15,0x08,0xbf,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub231ss 291(%rax,%r14,8), %xmm13, %xmm26
+
+// CHECK: vfnmsub231ss 508(%rdx), %xmm13, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x08,0xbf,0x52,0x7f]
+          vfnmsub231ss 508(%rdx), %xmm13, %xmm26
+
+// CHECK: vfnmsub231ss 512(%rdx), %xmm13, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x08,0xbf,0x92,0x00,0x02,0x00,0x00]
+          vfnmsub231ss 512(%rdx), %xmm13, %xmm26
+
+// CHECK: vfnmsub231ss -512(%rdx), %xmm13, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x08,0xbf,0x52,0x80]
+          vfnmsub231ss -512(%rdx), %xmm13, %xmm26
+
+// CHECK: vfnmsub231ss -516(%rdx), %xmm13, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x08,0xbf,0x92,0xfc,0xfd,0xff,0xff]
+          vfnmsub231ss -516(%rdx), %xmm13, %xmm26
+
+// CHECK: vfnmsub231sd %xmm14, %xmm20, %xmm6
+// CHECK:  encoding: [0x62,0xd2,0xdd,0x00,0xbf,0xf6]
+          vfnmsub231sd %xmm14, %xmm20, %xmm6
+
+// CHECK: vfnmsub231sd %xmm14, %xmm20, %xmm6 {%k1}
+// CHECK:  encoding: [0x62,0xd2,0xdd,0x01,0xbf,0xf6]
+          vfnmsub231sd %xmm14, %xmm20, %xmm6 {%k1}
+
+// CHECK: vfnmsub231sd %xmm14, %xmm20, %xmm6 {%k1} {z}
+// CHECK:  encoding: [0x62,0xd2,0xdd,0x81,0xbf,0xf6]
+          vfnmsub231sd %xmm14, %xmm20, %xmm6 {%k1} {z}
+
+// CHECK: vfnmsub231sd {rn-sae}, %xmm14, %xmm20, %xmm6
+// CHECK:  encoding: [0x62,0xd2,0xdd,0x10,0xbf,0xf6]
+          vfnmsub231sd {rn-sae}, %xmm14, %xmm20, %xmm6
+
+// CHECK: vfnmsub231sd {ru-sae}, %xmm14, %xmm20, %xmm6
+// CHECK:  encoding: [0x62,0xd2,0xdd,0x50,0xbf,0xf6]
+          vfnmsub231sd {ru-sae}, %xmm14, %xmm20, %xmm6
+
+// CHECK: vfnmsub231sd {rd-sae}, %xmm14, %xmm20, %xmm6
+// CHECK:  encoding: [0x62,0xd2,0xdd,0x30,0xbf,0xf6]
+          vfnmsub231sd {rd-sae}, %xmm14, %xmm20, %xmm6
+
+// CHECK: vfnmsub231sd {rz-sae}, %xmm14, %xmm20, %xmm6
+// CHECK:  encoding: [0x62,0xd2,0xdd,0x70,0xbf,0xf6]
+          vfnmsub231sd {rz-sae}, %xmm14, %xmm20, %xmm6
+
+// CHECK: vfnmsub231sd (%rcx), %xmm20, %xmm6
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x00,0xbf,0x31]
+          vfnmsub231sd (%rcx), %xmm20, %xmm6
+
+// CHECK: vfnmsub231sd 291(%rax,%r14,8), %xmm20, %xmm6
+// CHECK:  encoding: [0x62,0xb2,0xdd,0x00,0xbf,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub231sd 291(%rax,%r14,8), %xmm20, %xmm6
+
+// CHECK: vfnmsub231sd 1016(%rdx), %xmm20, %xmm6
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x00,0xbf,0x72,0x7f]
+          vfnmsub231sd 1016(%rdx), %xmm20, %xmm6
+
+// CHECK: vfnmsub231sd 1024(%rdx), %xmm20, %xmm6
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x00,0xbf,0xb2,0x00,0x04,0x00,0x00]
+          vfnmsub231sd 1024(%rdx), %xmm20, %xmm6
+
+// CHECK: vfnmsub231sd -1024(%rdx), %xmm20, %xmm6
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x00,0xbf,0x72,0x80]
+          vfnmsub231sd -1024(%rdx), %xmm20, %xmm6
+
+// CHECK: vfnmsub231sd -1032(%rdx), %xmm20, %xmm6
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x00,0xbf,0xb2,0xf8,0xfb,0xff,0xff]
+          vfnmsub231sd -1032(%rdx), %xmm20, %xmm6
 
diff --git a/test/MC/X86/avx512vl-encoding.s b/test/MC/X86/avx512vl-encoding.s
index dd1ac24..e1fc328 100644
--- a/test/MC/X86/avx512vl-encoding.s
+++ b/test/MC/X86/avx512vl-encoding.s
@@ -891,3 +891,883 @@
 // CHECK: vpmovm2q %k2, %ymm30
 // CHECK:  encoding: [0x62,0x62,0xfe,0x28,0x38,0xf2]
           vpmovm2q %k2, %ymm30
+
+// CHECK: vcompresspd %xmm23, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x8a,0x39]
+          vcompresspd %xmm23, (%rcx)
+
+// CHECK: vcompresspd %xmm23, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x0e,0x8a,0x39]
+          vcompresspd %xmm23, (%rcx) {%k6}
+
+// CHECK: vcompresspd %xmm23, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x8a,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vcompresspd %xmm23, 291(%rax,%r14,8)
+
+// CHECK: vcompresspd %xmm23, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x8a,0x7a,0x7f]
+          vcompresspd %xmm23, 1016(%rdx)
+
+// CHECK: vcompresspd %xmm23, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x8a,0xba,0x00,0x04,0x00,0x00]
+          vcompresspd %xmm23, 1024(%rdx)
+
+// CHECK: vcompresspd %xmm23, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x8a,0x7a,0x80]
+          vcompresspd %xmm23, -1024(%rdx)
+
+// CHECK: vcompresspd %xmm23, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x8a,0xba,0xf8,0xfb,0xff,0xff]
+          vcompresspd %xmm23, -1032(%rdx)
+
+// CHECK: vcompresspd %ymm29, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0xfd,0x28,0x8a,0x29]
+          vcompresspd %ymm29, (%rcx)
+
+// CHECK: vcompresspd %ymm29, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0x62,0xfd,0x2a,0x8a,0x29]
+          vcompresspd %ymm29, (%rcx) {%k2}
+
+// CHECK: vcompresspd %ymm29, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0xfd,0x28,0x8a,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcompresspd %ymm29, 291(%rax,%r14,8)
+
+// CHECK: vcompresspd %ymm29, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x62,0xfd,0x28,0x8a,0x6a,0x7f]
+          vcompresspd %ymm29, 1016(%rdx)
+
+// CHECK: vcompresspd %ymm29, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0xfd,0x28,0x8a,0xaa,0x00,0x04,0x00,0x00]
+          vcompresspd %ymm29, 1024(%rdx)
+
+// CHECK: vcompresspd %ymm29, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0xfd,0x28,0x8a,0x6a,0x80]
+          vcompresspd %ymm29, -1024(%rdx)
+
+// CHECK: vcompresspd %ymm29, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0xfd,0x28,0x8a,0xaa,0xf8,0xfb,0xff,0xff]
+          vcompresspd %ymm29, -1032(%rdx)
+
+// CHECK: vcompresspd %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0x22,0xfd,0x08,0x8a,0xdc]
+          vcompresspd %xmm27, %xmm20
+
+// CHECK: vcompresspd %xmm27, %xmm20 {%k2}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x0a,0x8a,0xdc]
+          vcompresspd %xmm27, %xmm20 {%k2}
+
+// CHECK: vcompresspd %xmm27, %xmm20 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x8a,0x8a,0xdc]
+          vcompresspd %xmm27, %xmm20 {%k2} {z}
+
+// CHECK: vcompresspd %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x82,0xfd,0x28,0x8a,0xe0]
+          vcompresspd %ymm20, %ymm24
+
+// CHECK: vcompresspd %ymm20, %ymm24 {%k3}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x2b,0x8a,0xe0]
+          vcompresspd %ymm20, %ymm24 {%k3}
+
+// CHECK: vcompresspd %ymm20, %ymm24 {%k3} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0xab,0x8a,0xe0]
+          vcompresspd %ymm20, %ymm24 {%k3} {z}
+
+// CHECK: vcompressps %xmm21, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x8a,0x29]
+          vcompressps %xmm21, (%rcx)
+
+// CHECK: vcompressps %xmm21, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x0f,0x8a,0x29]
+          vcompressps %xmm21, (%rcx) {%k7}
+
+// CHECK: vcompressps %xmm21, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x8a,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcompressps %xmm21, 291(%rax,%r14,8)
+
+// CHECK: vcompressps %xmm21, 508(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x8a,0x6a,0x7f]
+          vcompressps %xmm21, 508(%rdx)
+
+// CHECK: vcompressps %xmm21, 512(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x8a,0xaa,0x00,0x02,0x00,0x00]
+          vcompressps %xmm21, 512(%rdx)
+
+// CHECK: vcompressps %xmm21, -512(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x8a,0x6a,0x80]
+          vcompressps %xmm21, -512(%rdx)
+
+// CHECK: vcompressps %xmm21, -516(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x8a,0xaa,0xfc,0xfd,0xff,0xff]
+          vcompressps %xmm21, -516(%rdx)
+
+// CHECK: vcompressps %ymm24, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x8a,0x01]
+          vcompressps %ymm24, (%rcx)
+
+// CHECK: vcompressps %ymm24, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x2f,0x8a,0x01]
+          vcompressps %ymm24, (%rcx) {%k7}
+
+// CHECK: vcompressps %ymm24, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x8a,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcompressps %ymm24, 291(%rax,%r14,8)
+
+// CHECK: vcompressps %ymm24, 508(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x8a,0x42,0x7f]
+          vcompressps %ymm24, 508(%rdx)
+
+// CHECK: vcompressps %ymm24, 512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x8a,0x82,0x00,0x02,0x00,0x00]
+          vcompressps %ymm24, 512(%rdx)
+
+// CHECK: vcompressps %ymm24, -512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x8a,0x42,0x80]
+          vcompressps %ymm24, -512(%rdx)
+
+// CHECK: vcompressps %ymm24, -516(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x8a,0x82,0xfc,0xfd,0xff,0xff]
+          vcompressps %ymm24, -516(%rdx)
+
+// CHECK: vcompressps %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x8a,0xec]
+          vcompressps %xmm29, %xmm28
+
+// CHECK: vcompressps %xmm29, %xmm28 {%k3}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0b,0x8a,0xec]
+          vcompressps %xmm29, %xmm28 {%k3}
+
+// CHECK: vcompressps %xmm29, %xmm28 {%k3} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8b,0x8a,0xec]
+          vcompressps %xmm29, %xmm28 {%k3} {z}
+
+// CHECK: vcompressps %ymm25, %ymm23
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x8a,0xcf]
+          vcompressps %ymm25, %ymm23
+
+// CHECK: vcompressps %ymm25, %ymm23 {%k6}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2e,0x8a,0xcf]
+          vcompressps %ymm25, %ymm23 {%k6}
+
+// CHECK: vcompressps %ymm25, %ymm23 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xae,0x8a,0xcf]
+          vcompressps %ymm25, %ymm23 {%k6} {z}
+
+// CHECK: vexpandpd (%rcx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x88,0x39]
+          vexpandpd (%rcx), %xmm23
+
+// CHECK: vexpandpd (%rcx), %xmm23 {%k3}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x0b,0x88,0x39]
+          vexpandpd (%rcx), %xmm23 {%k3}
+
+// CHECK: vexpandpd (%rcx), %xmm23 {%k3} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x8b,0x88,0x39]
+          vexpandpd (%rcx), %xmm23 {%k3} {z}
+
+// CHECK: vexpandpd 291(%rax,%r14,8), %xmm23
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x88,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vexpandpd 291(%rax,%r14,8), %xmm23
+
+// CHECK: vexpandpd 1016(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x88,0x7a,0x7f]
+          vexpandpd 1016(%rdx), %xmm23
+
+// CHECK: vexpandpd 1024(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x88,0xba,0x00,0x04,0x00,0x00]
+          vexpandpd 1024(%rdx), %xmm23
+
+// CHECK: vexpandpd -1024(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x88,0x7a,0x80]
+          vexpandpd -1024(%rdx), %xmm23
+
+// CHECK: vexpandpd -1032(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x88,0xba,0xf8,0xfb,0xff,0xff]
+          vexpandpd -1032(%rdx), %xmm23
+
+// CHECK: vexpandpd (%rcx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x88,0x31]
+          vexpandpd (%rcx), %ymm22
+
+// CHECK: vexpandpd (%rcx), %ymm22 {%k5}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x2d,0x88,0x31]
+          vexpandpd (%rcx), %ymm22 {%k5}
+
+// CHECK: vexpandpd (%rcx), %ymm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0xad,0x88,0x31]
+          vexpandpd (%rcx), %ymm22 {%k5} {z}
+
+// CHECK: vexpandpd 291(%rax,%r14,8), %ymm22
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x88,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vexpandpd 291(%rax,%r14,8), %ymm22
+
+// CHECK: vexpandpd 1016(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x88,0x72,0x7f]
+          vexpandpd 1016(%rdx), %ymm22
+
+// CHECK: vexpandpd 1024(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x88,0xb2,0x00,0x04,0x00,0x00]
+          vexpandpd 1024(%rdx), %ymm22
+
+// CHECK: vexpandpd -1024(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x88,0x72,0x80]
+          vexpandpd -1024(%rdx), %ymm22
+
+// CHECK: vexpandpd -1032(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x88,0xb2,0xf8,0xfb,0xff,0xff]
+          vexpandpd -1032(%rdx), %ymm22
+
+// CHECK: vexpandpd %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x02,0xfd,0x08,0x88,0xe9]
+          vexpandpd %xmm25, %xmm29
+
+// CHECK: vexpandpd %xmm25, %xmm29 {%k7}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x0f,0x88,0xe9]
+          vexpandpd %xmm25, %xmm29 {%k7}
+
+// CHECK: vexpandpd %xmm25, %xmm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x8f,0x88,0xe9]
+          vexpandpd %xmm25, %xmm29 {%k7} {z}
+
+// CHECK: vexpandpd %ymm27, %ymm21
+// CHECK:  encoding: [0x62,0x82,0xfd,0x28,0x88,0xeb]
+          vexpandpd %ymm27, %ymm21
+
+// CHECK: vexpandpd %ymm27, %ymm21 {%k2}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x2a,0x88,0xeb]
+          vexpandpd %ymm27, %ymm21 {%k2}
+
+// CHECK: vexpandpd %ymm27, %ymm21 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0xaa,0x88,0xeb]
+          vexpandpd %ymm27, %ymm21 {%k2} {z}
+
+// CHECK: vexpandps (%rcx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x88,0x11]
+          vexpandps (%rcx), %xmm18
+
+// CHECK: vexpandps (%rcx), %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x09,0x88,0x11]
+          vexpandps (%rcx), %xmm18 {%k1}
+
+// CHECK: vexpandps (%rcx), %xmm18 {%k1} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x89,0x88,0x11]
+          vexpandps (%rcx), %xmm18 {%k1} {z}
+
+// CHECK: vexpandps 291(%rax,%r14,8), %xmm18
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x88,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vexpandps 291(%rax,%r14,8), %xmm18
+
+// CHECK: vexpandps 508(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x88,0x52,0x7f]
+          vexpandps 508(%rdx), %xmm18
+
+// CHECK: vexpandps 512(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x88,0x92,0x00,0x02,0x00,0x00]
+          vexpandps 512(%rdx), %xmm18
+
+// CHECK: vexpandps -512(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x88,0x52,0x80]
+          vexpandps -512(%rdx), %xmm18
+
+// CHECK: vexpandps -516(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x88,0x92,0xfc,0xfd,0xff,0xff]
+          vexpandps -516(%rdx), %xmm18
+
+// CHECK: vexpandps (%rcx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x88,0x39]
+          vexpandps (%rcx), %ymm23
+
+// CHECK: vexpandps (%rcx), %ymm23 {%k7}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x2f,0x88,0x39]
+          vexpandps (%rcx), %ymm23 {%k7}
+
+// CHECK: vexpandps (%rcx), %ymm23 {%k7} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0xaf,0x88,0x39]
+          vexpandps (%rcx), %ymm23 {%k7} {z}
+
+// CHECK: vexpandps 291(%rax,%r14,8), %ymm23
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x88,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vexpandps 291(%rax,%r14,8), %ymm23
+
+// CHECK: vexpandps 508(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x88,0x7a,0x7f]
+          vexpandps 508(%rdx), %ymm23
+
+// CHECK: vexpandps 512(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x88,0xba,0x00,0x02,0x00,0x00]
+          vexpandps 512(%rdx), %ymm23
+
+// CHECK: vexpandps -512(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x88,0x7a,0x80]
+          vexpandps -512(%rdx), %ymm23
+
+// CHECK: vexpandps -516(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x88,0xba,0xfc,0xfd,0xff,0xff]
+          vexpandps -516(%rdx), %ymm23
+
+// CHECK: vexpandps %xmm19, %xmm29
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x88,0xeb]
+          vexpandps %xmm19, %xmm29
+
+// CHECK: vexpandps %xmm19, %xmm29 {%k5}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0d,0x88,0xeb]
+          vexpandps %xmm19, %xmm29 {%k5}
+
+// CHECK: vexpandps %xmm19, %xmm29 {%k5} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8d,0x88,0xeb]
+          vexpandps %xmm19, %xmm29 {%k5} {z}
+
+// CHECK: vexpandps %ymm29, %ymm29
+// CHECK:  encoding: [0x62,0x02,0x7d,0x28,0x88,0xed]
+          vexpandps %ymm29, %ymm29
+
+// CHECK: vexpandps %ymm29, %ymm29 {%k5}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x2d,0x88,0xed]
+          vexpandps %ymm29, %ymm29 {%k5}
+
+// CHECK: vexpandps %ymm29, %ymm29 {%k5} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xad,0x88,0xed]
+          vexpandps %ymm29, %ymm29 {%k5} {z}
+
+// CHECK: vpabsd %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x1e,0xe3]
+          vpabsd %xmm19, %xmm28
+
+// CHECK: vpabsd %xmm19, %xmm28 {%k6}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0e,0x1e,0xe3]
+          vpabsd %xmm19, %xmm28 {%k6}
+
+// CHECK: vpabsd %xmm19, %xmm28 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8e,0x1e,0xe3]
+          vpabsd %xmm19, %xmm28 {%k6} {z}
+
+// CHECK: vpabsd (%rcx), %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x1e,0x21]
+          vpabsd (%rcx), %xmm28
+
+// CHECK: vpabsd 291(%rax,%r14,8), %xmm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x1e,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpabsd 291(%rax,%r14,8), %xmm28
+
+// CHECK: vpabsd (%rcx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x1e,0x21]
+          vpabsd (%rcx){1to4}, %xmm28
+
+// CHECK: vpabsd 2032(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x1e,0x62,0x7f]
+          vpabsd 2032(%rdx), %xmm28
+
+// CHECK: vpabsd 2048(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x1e,0xa2,0x00,0x08,0x00,0x00]
+          vpabsd 2048(%rdx), %xmm28
+
+// CHECK: vpabsd -2048(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x1e,0x62,0x80]
+          vpabsd -2048(%rdx), %xmm28
+
+// CHECK: vpabsd -2064(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x1e,0xa2,0xf0,0xf7,0xff,0xff]
+          vpabsd -2064(%rdx), %xmm28
+
+// CHECK: vpabsd 508(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x1e,0x62,0x7f]
+          vpabsd 508(%rdx){1to4}, %xmm28
+
+// CHECK: vpabsd 512(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x1e,0xa2,0x00,0x02,0x00,0x00]
+          vpabsd 512(%rdx){1to4}, %xmm28
+
+// CHECK: vpabsd -512(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x1e,0x62,0x80]
+          vpabsd -512(%rdx){1to4}, %xmm28
+
+// CHECK: vpabsd -516(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x1e,0xa2,0xfc,0xfd,0xff,0xff]
+          vpabsd -516(%rdx){1to4}, %xmm28
+
+// CHECK: vpabsd %ymm18, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x1e,0xca]
+          vpabsd %ymm18, %ymm25
+
+// CHECK: vpabsd %ymm18, %ymm25 {%k2}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2a,0x1e,0xca]
+          vpabsd %ymm18, %ymm25 {%k2}
+
+// CHECK: vpabsd %ymm18, %ymm25 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xaa,0x1e,0xca]
+          vpabsd %ymm18, %ymm25 {%k2} {z}
+
+// CHECK: vpabsd (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x1e,0x09]
+          vpabsd (%rcx), %ymm25
+
+// CHECK: vpabsd 291(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x1e,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpabsd 291(%rax,%r14,8), %ymm25
+
+// CHECK: vpabsd (%rcx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x1e,0x09]
+          vpabsd (%rcx){1to8}, %ymm25
+
+// CHECK: vpabsd 4064(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x1e,0x4a,0x7f]
+          vpabsd 4064(%rdx), %ymm25
+
+// CHECK: vpabsd 4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x1e,0x8a,0x00,0x10,0x00,0x00]
+          vpabsd 4096(%rdx), %ymm25
+
+// CHECK: vpabsd -4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x1e,0x4a,0x80]
+          vpabsd -4096(%rdx), %ymm25
+
+// CHECK: vpabsd -4128(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x1e,0x8a,0xe0,0xef,0xff,0xff]
+          vpabsd -4128(%rdx), %ymm25
+
+// CHECK: vpabsd 508(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x1e,0x4a,0x7f]
+          vpabsd 508(%rdx){1to8}, %ymm25
+
+// CHECK: vpabsd 512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x1e,0x8a,0x00,0x02,0x00,0x00]
+          vpabsd 512(%rdx){1to8}, %ymm25
+
+// CHECK: vpabsd -512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x1e,0x4a,0x80]
+          vpabsd -512(%rdx){1to8}, %ymm25
+
+// CHECK: vpabsd -516(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x1e,0x8a,0xfc,0xfd,0xff,0xff]
+          vpabsd -516(%rdx){1to8}, %ymm25
+
+// CHECK: vpabsq %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x1f,0xde]
+          vpabsq %xmm22, %xmm19
+
+// CHECK: vpabsq %xmm22, %xmm19 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x0a,0x1f,0xde]
+          vpabsq %xmm22, %xmm19 {%k2}
+
+// CHECK: vpabsq %xmm22, %xmm19 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x8a,0x1f,0xde]
+          vpabsq %xmm22, %xmm19 {%k2} {z}
+
+// CHECK: vpabsq (%rcx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x19]
+          vpabsq (%rcx), %xmm19
+
+// CHECK: vpabsq 291(%rax,%r14,8), %xmm19
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x1f,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpabsq 291(%rax,%r14,8), %xmm19
+
+// CHECK: vpabsq (%rcx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x19]
+          vpabsq (%rcx){1to2}, %xmm19
+
+// CHECK: vpabsq 2032(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x5a,0x7f]
+          vpabsq 2032(%rdx), %xmm19
+
+// CHECK: vpabsq 2048(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x9a,0x00,0x08,0x00,0x00]
+          vpabsq 2048(%rdx), %xmm19
+
+// CHECK: vpabsq -2048(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x5a,0x80]
+          vpabsq -2048(%rdx), %xmm19
+
+// CHECK: vpabsq -2064(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x9a,0xf0,0xf7,0xff,0xff]
+          vpabsq -2064(%rdx), %xmm19
+
+// CHECK: vpabsq 1016(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x5a,0x7f]
+          vpabsq 1016(%rdx){1to2}, %xmm19
+
+// CHECK: vpabsq 1024(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x9a,0x00,0x04,0x00,0x00]
+          vpabsq 1024(%rdx){1to2}, %xmm19
+
+// CHECK: vpabsq -1024(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x5a,0x80]
+          vpabsq -1024(%rdx){1to2}, %xmm19
+
+// CHECK: vpabsq -1032(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x9a,0xf8,0xfb,0xff,0xff]
+          vpabsq -1032(%rdx){1to2}, %xmm19
+
+// CHECK: vpabsq %ymm17, %ymm22
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x1f,0xf1]
+          vpabsq %ymm17, %ymm22
+
+// CHECK: vpabsq %ymm17, %ymm22 {%k6}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x2e,0x1f,0xf1]
+          vpabsq %ymm17, %ymm22 {%k6}
+
+// CHECK: vpabsq %ymm17, %ymm22 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0xae,0x1f,0xf1]
+          vpabsq %ymm17, %ymm22 {%k6} {z}
+
+// CHECK: vpabsq (%rcx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x1f,0x31]
+          vpabsq (%rcx), %ymm22
+
+// CHECK: vpabsq 291(%rax,%r14,8), %ymm22
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x1f,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpabsq 291(%rax,%r14,8), %ymm22
+
+// CHECK: vpabsq (%rcx){1to4}, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x1f,0x31]
+          vpabsq (%rcx){1to4}, %ymm22
+
+// CHECK: vpabsq 4064(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x1f,0x72,0x7f]
+          vpabsq 4064(%rdx), %ymm22
+
+// CHECK: vpabsq 4096(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x1f,0xb2,0x00,0x10,0x00,0x00]
+          vpabsq 4096(%rdx), %ymm22
+
+// CHECK: vpabsq -4096(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x1f,0x72,0x80]
+          vpabsq -4096(%rdx), %ymm22
+
+// CHECK: vpabsq -4128(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x1f,0xb2,0xe0,0xef,0xff,0xff]
+          vpabsq -4128(%rdx), %ymm22
+
+// CHECK: vpabsq 1016(%rdx){1to4}, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x1f,0x72,0x7f]
+          vpabsq 1016(%rdx){1to4}, %ymm22
+
+// CHECK: vpabsq 1024(%rdx){1to4}, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x1f,0xb2,0x00,0x04,0x00,0x00]
+          vpabsq 1024(%rdx){1to4}, %ymm22
+
+// CHECK: vpabsq -1024(%rdx){1to4}, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x1f,0x72,0x80]
+          vpabsq -1024(%rdx){1to4}, %ymm22
+
+// CHECK: vpabsq -1032(%rdx){1to4}, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x1f,0xb2,0xf8,0xfb,0xff,0xff]
+          vpabsq -1032(%rdx){1to4}, %ymm22
+
+// CHECK: vpgatherdd 123(%r14,%xmm31,8), %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0x90,0x8c,0xfe,0x7b,0x00,0x00,0x00]
+          vpgatherdd 123(%r14,%xmm31,8), %xmm17 {%k1}
+
+// CHECK: vpgatherdd 256(%r9,%xmm31), %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0x90,0x4c,0x39,0x40]
+          vpgatherdd 256(%r9,%xmm31), %xmm17 {%k1}
+
+// CHECK: vpgatherdd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x01,0x90,0x8c,0xb9,0x00,0x04,0x00,0x00]
+          vpgatherdd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
+
+// CHECK: vpgatherdd 123(%r14,%ymm31,8), %ymm19 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x21,0x90,0x9c,0xfe,0x7b,0x00,0x00,0x00]
+          vpgatherdd 123(%r14,%ymm31,8), %ymm19 {%k1}
+
+// CHECK: vpgatherdd 256(%r9,%ymm31), %ymm19 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x21,0x90,0x5c,0x39,0x40]
+          vpgatherdd 256(%r9,%ymm31), %ymm19 {%k1}
+
+// CHECK: vpgatherdd 1024(%rcx,%ymm31,4), %ymm19 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x21,0x90,0x9c,0xb9,0x00,0x04,0x00,0x00]
+          vpgatherdd 1024(%rcx,%ymm31,4), %ymm19 {%k1}
+
+// CHECK: vpgatherdq 123(%r14,%xmm31,8), %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0x90,0x8c,0xfe,0x7b,0x00,0x00,0x00]
+          vpgatherdq 123(%r14,%xmm31,8), %xmm17 {%k1}
+
+// CHECK: vpgatherdq 256(%r9,%xmm31), %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0x90,0x4c,0x39,0x20]
+          vpgatherdq 256(%r9,%xmm31), %xmm17 {%k1}
+
+// CHECK: vpgatherdq 1024(%rcx,%xmm31,4), %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x01,0x90,0x8c,0xb9,0x00,0x04,0x00,0x00]
+          vpgatherdq 1024(%rcx,%xmm31,4), %xmm17 {%k1}
+
+// CHECK: vpgatherdq 123(%r14,%xmm31,8), %ymm26 {%k1}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x21,0x90,0x94,0xfe,0x7b,0x00,0x00,0x00]
+          vpgatherdq 123(%r14,%xmm31,8), %ymm26 {%k1}
+
+// CHECK: vpgatherdq 256(%r9,%xmm31), %ymm26 {%k1}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x21,0x90,0x54,0x39,0x20]
+          vpgatherdq 256(%r9,%xmm31), %ymm26 {%k1}
+
+// CHECK: vpgatherdq 1024(%rcx,%xmm31,4), %ymm26 {%k1}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x21,0x90,0x94,0xb9,0x00,0x04,0x00,0x00]
+          vpgatherdq 1024(%rcx,%xmm31,4), %ymm26 {%k1}
+
+// CHECK: vpgatherqd 123(%r14,%xmm31,8), %xmm21 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0x91,0xac,0xfe,0x7b,0x00,0x00,0x00]
+          vpgatherqd 123(%r14,%xmm31,8), %xmm21 {%k1}
+
+// CHECK: vpgatherqd 256(%r9,%xmm31), %xmm21 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0x91,0x6c,0x39,0x40]
+          vpgatherqd 256(%r9,%xmm31), %xmm21 {%k1}
+
+// CHECK: vpgatherqd 1024(%rcx,%xmm31,4), %xmm21 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x01,0x91,0xac,0xb9,0x00,0x04,0x00,0x00]
+          vpgatherqd 1024(%rcx,%xmm31,4), %xmm21 {%k1}
+
+// CHECK: vpgatherqd 123(%r14,%ymm31,8), %xmm25 {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x21,0x91,0x8c,0xfe,0x7b,0x00,0x00,0x00]
+          vpgatherqd 123(%r14,%ymm31,8), %xmm25 {%k1}
+
+// CHECK: vpgatherqd 256(%r9,%ymm31), %xmm25 {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x21,0x91,0x4c,0x39,0x40]
+          vpgatherqd 256(%r9,%ymm31), %xmm25 {%k1}
+
+// CHECK: vpgatherqd 1024(%rcx,%ymm31,4), %xmm25 {%k1}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x21,0x91,0x8c,0xb9,0x00,0x04,0x00,0x00]
+          vpgatherqd 1024(%rcx,%ymm31,4), %xmm25 {%k1}
+
+// CHECK: vpgatherqq 123(%r14,%xmm31,8), %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0x91,0x94,0xfe,0x7b,0x00,0x00,0x00]
+          vpgatherqq 123(%r14,%xmm31,8), %xmm18 {%k1}
+
+// CHECK: vpgatherqq 256(%r9,%xmm31), %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0x91,0x54,0x39,0x20]
+          vpgatherqq 256(%r9,%xmm31), %xmm18 {%k1}
+
+// CHECK: vpgatherqq 1024(%rcx,%xmm31,4), %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x01,0x91,0x94,0xb9,0x00,0x04,0x00,0x00]
+          vpgatherqq 1024(%rcx,%xmm31,4), %xmm18 {%k1}
+
+// CHECK: vpgatherqq 123(%r14,%ymm31,8), %ymm19 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x21,0x91,0x9c,0xfe,0x7b,0x00,0x00,0x00]
+          vpgatherqq 123(%r14,%ymm31,8), %ymm19 {%k1}
+
+// CHECK: vpgatherqq 256(%r9,%ymm31), %ymm19 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x21,0x91,0x5c,0x39,0x20]
+          vpgatherqq 256(%r9,%ymm31), %ymm19 {%k1}
+
+// CHECK: vpgatherqq 1024(%rcx,%ymm31,4), %ymm19 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x21,0x91,0x9c,0xb9,0x00,0x04,0x00,0x00]
+          vpgatherqq 1024(%rcx,%ymm31,4), %ymm19 {%k1}
+
+// CHECK: vgatherdpd 123(%r14,%xmm31,8), %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0x92,0x8c,0xfe,0x7b,0x00,0x00,0x00]
+          vgatherdpd 123(%r14,%xmm31,8), %xmm17 {%k1}
+
+// CHECK: vgatherdpd 256(%r9,%xmm31), %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0x92,0x4c,0x39,0x20]
+          vgatherdpd 256(%r9,%xmm31), %xmm17 {%k1}
+
+// CHECK: vgatherdpd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x01,0x92,0x8c,0xb9,0x00,0x04,0x00,0x00]
+          vgatherdpd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
+
+// CHECK: vgatherdpd 123(%r14,%xmm31,8), %ymm23 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x21,0x92,0xbc,0xfe,0x7b,0x00,0x00,0x00]
+          vgatherdpd 123(%r14,%xmm31,8), %ymm23 {%k1}
+
+// CHECK: vgatherdpd 256(%r9,%xmm31), %ymm23 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x21,0x92,0x7c,0x39,0x20]
+          vgatherdpd 256(%r9,%xmm31), %ymm23 {%k1}
+
+// CHECK: vgatherdpd 1024(%rcx,%xmm31,4), %ymm23 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x21,0x92,0xbc,0xb9,0x00,0x04,0x00,0x00]
+          vgatherdpd 1024(%rcx,%xmm31,4), %ymm23 {%k1}
+
+// CHECK: vgatherdps 123(%r14,%xmm31,8), %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0x92,0x94,0xfe,0x7b,0x00,0x00,0x00]
+          vgatherdps 123(%r14,%xmm31,8), %xmm18 {%k1}
+
+// CHECK: vgatherdps 256(%r9,%xmm31), %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0x92,0x54,0x39,0x40]
+          vgatherdps 256(%r9,%xmm31), %xmm18 {%k1}
+
+// CHECK: vgatherdps 1024(%rcx,%xmm31,4), %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x01,0x92,0x94,0xb9,0x00,0x04,0x00,0x00]
+          vgatherdps 1024(%rcx,%xmm31,4), %xmm18 {%k1}
+
+// CHECK: vgatherdps 123(%r14,%ymm31,8), %ymm27 {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x21,0x92,0x9c,0xfe,0x7b,0x00,0x00,0x00]
+          vgatherdps 123(%r14,%ymm31,8), %ymm27 {%k1}
+
+// CHECK: vgatherdps 256(%r9,%ymm31), %ymm27 {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x21,0x92,0x5c,0x39,0x40]
+          vgatherdps 256(%r9,%ymm31), %ymm27 {%k1}
+
+// CHECK: vgatherdps 1024(%rcx,%ymm31,4), %ymm27 {%k1}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x21,0x92,0x9c,0xb9,0x00,0x04,0x00,0x00]
+          vgatherdps 1024(%rcx,%ymm31,4), %ymm27 {%k1}
+
+// CHECK: vgatherqpd 123(%r14,%xmm31,8), %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0x93,0x8c,0xfe,0x7b,0x00,0x00,0x00]
+          vgatherqpd 123(%r14,%xmm31,8), %xmm17 {%k1}
+
+// CHECK: vgatherqpd 256(%r9,%xmm31), %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0x93,0x4c,0x39,0x20]
+          vgatherqpd 256(%r9,%xmm31), %xmm17 {%k1}
+
+// CHECK: vgatherqpd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x01,0x93,0x8c,0xb9,0x00,0x04,0x00,0x00]
+          vgatherqpd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
+
+// CHECK: vgatherqpd 123(%r14,%ymm31,8), %ymm29 {%k1}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x21,0x93,0xac,0xfe,0x7b,0x00,0x00,0x00]
+          vgatherqpd 123(%r14,%ymm31,8), %ymm29 {%k1}
+
+// CHECK: vgatherqpd 256(%r9,%ymm31), %ymm29 {%k1}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x21,0x93,0x6c,0x39,0x20]
+          vgatherqpd 256(%r9,%ymm31), %ymm29 {%k1}
+
+// CHECK: vgatherqpd 1024(%rcx,%ymm31,4), %ymm29 {%k1}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x21,0x93,0xac,0xb9,0x00,0x04,0x00,0x00]
+          vgatherqpd 1024(%rcx,%ymm31,4), %ymm29 {%k1}
+
+// CHECK: vgatherqps 123(%r14,%xmm31,8), %xmm21 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0x93,0xac,0xfe,0x7b,0x00,0x00,0x00]
+          vgatherqps 123(%r14,%xmm31,8), %xmm21 {%k1}
+
+// CHECK: vgatherqps 256(%r9,%xmm31), %xmm21 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0x93,0x6c,0x39,0x40]
+          vgatherqps 256(%r9,%xmm31), %xmm21 {%k1}
+
+// CHECK: vgatherqps 1024(%rcx,%xmm31,4), %xmm21 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x01,0x93,0xac,0xb9,0x00,0x04,0x00,0x00]
+          vgatherqps 1024(%rcx,%xmm31,4), %xmm21 {%k1}
+
+// CHECK: vgatherqps 123(%r14,%ymm31,8), %xmm19 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x21,0x93,0x9c,0xfe,0x7b,0x00,0x00,0x00]
+          vgatherqps 123(%r14,%ymm31,8), %xmm19 {%k1}
+
+// CHECK: vgatherqps 256(%r9,%ymm31), %xmm19 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x21,0x93,0x5c,0x39,0x40]
+          vgatherqps 256(%r9,%ymm31), %xmm19 {%k1}
+
+// CHECK: vgatherqps 1024(%rcx,%ymm31,4), %xmm19 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x21,0x93,0x9c,0xb9,0x00,0x04,0x00,0x00]
+          vgatherqps 1024(%rcx,%ymm31,4), %xmm19 {%k1}
+
+// CHECK: vpscatterdd %xmm20, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterdd %xmm20, 123(%r14,%xmm31,8) {%k1}
+
+// CHECK: vpscatterdd %xmm20, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterdd %xmm20, 123(%r14,%xmm31,8) {%k1}
+
+// CHECK: vpscatterdd %xmm20, 256(%r9,%xmm31) {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0xa0,0x64,0x39,0x40]
+          vpscatterdd %xmm20, 256(%r9,%xmm31) {%k1}
+
+// CHECK: vpscatterdd %xmm20, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x01,0xa0,0xa4,0xb9,0x00,0x04,0x00,0x00]
+          vpscatterdd %xmm20, 1024(%rcx,%xmm31,4) {%k1}
+
+// CHECK: vpscatterdd %ymm28, 123(%r14,%ymm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x21,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterdd %ymm28, 123(%r14,%ymm31,8) {%k1}
+
+// CHECK: vpscatterdd %ymm28, 123(%r14,%ymm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x21,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterdd %ymm28, 123(%r14,%ymm31,8) {%k1}
+
+// CHECK: vpscatterdd %ymm28, 256(%r9,%ymm31) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x21,0xa0,0x64,0x39,0x40]
+          vpscatterdd %ymm28, 256(%r9,%ymm31) {%k1}
+
+// CHECK: vpscatterdd %ymm28, 1024(%rcx,%ymm31,4) {%k1}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x21,0xa0,0xa4,0xb9,0x00,0x04,0x00,0x00]
+          vpscatterdd %ymm28, 1024(%rcx,%ymm31,4) {%k1}
+
+// CHECK: vpscatterdq %xmm21, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0xa0,0xac,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterdq %xmm21, 123(%r14,%xmm31,8) {%k1}
+
+// CHECK: vpscatterdq %xmm21, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0xa0,0xac,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterdq %xmm21, 123(%r14,%xmm31,8) {%k1}
+
+// CHECK: vpscatterdq %xmm21, 256(%r9,%xmm31) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0xa0,0x6c,0x39,0x20]
+          vpscatterdq %xmm21, 256(%r9,%xmm31) {%k1}
+
+// CHECK: vpscatterdq %xmm21, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x01,0xa0,0xac,0xb9,0x00,0x04,0x00,0x00]
+          vpscatterdq %xmm21, 1024(%rcx,%xmm31,4) {%k1}
+
+// CHECK: vpscatterdq %ymm28, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x21,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterdq %ymm28, 123(%r14,%xmm31,8) {%k1}
+
+// CHECK: vpscatterdq %ymm28, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x21,0xa0,0xa4,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterdq %ymm28, 123(%r14,%xmm31,8) {%k1}
+
+// CHECK: vpscatterdq %ymm28, 256(%r9,%xmm31) {%k1}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x21,0xa0,0x64,0x39,0x20]
+          vpscatterdq %ymm28, 256(%r9,%xmm31) {%k1}
+
+// CHECK: vpscatterdq %ymm28, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x21,0xa0,0xa4,0xb9,0x00,0x04,0x00,0x00]
+          vpscatterdq %ymm28, 1024(%rcx,%xmm31,4) {%k1}
+
+// CHECK: vpscatterqd %xmm22, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0xa1,0xb4,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterqd %xmm22, 123(%r14,%xmm31,8) {%k1}
+
+// CHECK: vpscatterqd %xmm22, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0xa1,0xb4,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterqd %xmm22, 123(%r14,%xmm31,8) {%k1}
+
+// CHECK: vpscatterqd %xmm22, 256(%r9,%xmm31) {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x01,0xa1,0x74,0x39,0x40]
+          vpscatterqd %xmm22, 256(%r9,%xmm31) {%k1}
+
+// CHECK: vpscatterqd %xmm22, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x01,0xa1,0xb4,0xb9,0x00,0x04,0x00,0x00]
+          vpscatterqd %xmm22, 1024(%rcx,%xmm31,4) {%k1}
+
+// CHECK: vpscatterqd %xmm24, 123(%r14,%ymm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x21,0xa1,0x84,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterqd %xmm24, 123(%r14,%ymm31,8) {%k1}
+
+// CHECK: vpscatterqd %xmm24, 123(%r14,%ymm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x21,0xa1,0x84,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterqd %xmm24, 123(%r14,%ymm31,8) {%k1}
+
+// CHECK: vpscatterqd %xmm24, 256(%r9,%ymm31) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x21,0xa1,0x44,0x39,0x40]
+          vpscatterqd %xmm24, 256(%r9,%ymm31) {%k1}
+
+// CHECK: vpscatterqd %xmm24, 1024(%rcx,%ymm31,4) {%k1}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x21,0xa1,0x84,0xb9,0x00,0x04,0x00,0x00]
+          vpscatterqd %xmm24, 1024(%rcx,%ymm31,4) {%k1}
+
+// CHECK: vpscatterqq %xmm28, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x01,0xa1,0xa4,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterqq %xmm28, 123(%r14,%xmm31,8) {%k1}
+
+// CHECK: vpscatterqq %xmm28, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x01,0xa1,0xa4,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterqq %xmm28, 123(%r14,%xmm31,8) {%k1}
+
+// CHECK: vpscatterqq %xmm28, 256(%r9,%xmm31) {%k1}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x01,0xa1,0x64,0x39,0x20]
+          vpscatterqq %xmm28, 256(%r9,%xmm31) {%k1}
+
+// CHECK: vpscatterqq %xmm28, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x01,0xa1,0xa4,0xb9,0x00,0x04,0x00,0x00]
+          vpscatterqq %xmm28, 1024(%rcx,%xmm31,4) {%k1}
+
+// CHECK: vpscatterqq %ymm19, 123(%r14,%ymm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x21,0xa1,0x9c,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterqq %ymm19, 123(%r14,%ymm31,8) {%k1}
+
+// CHECK: vpscatterqq %ymm19, 123(%r14,%ymm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x21,0xa1,0x9c,0xfe,0x7b,0x00,0x00,0x00]
+          vpscatterqq %ymm19, 123(%r14,%ymm31,8) {%k1}
+
+// CHECK: vpscatterqq %ymm19, 256(%r9,%ymm31) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x21,0xa1,0x5c,0x39,0x20]
+          vpscatterqq %ymm19, 256(%r9,%ymm31) {%k1}
+
+// CHECK: vpscatterqq %ymm19, 1024(%rcx,%ymm31,4) {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x21,0xa1,0x9c,0xb9,0x00,0x04,0x00,0x00]
+          vpscatterqq %ymm19, 1024(%rcx,%ymm31,4) {%k1}
diff --git a/test/MC/X86/faultmap-section-parsing.s b/test/MC/X86/faultmap-section-parsing.s
new file mode 100644
index 0000000..758e70f
--- /dev/null
+++ b/test/MC/X86/faultmap-section-parsing.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc < %s -triple=x86_64-apple-macosx -filetype=obj -o - | llvm-objdump -fault-map-section - | FileCheck %s
+
+	.section	__LLVM_FAULTMAPS,__llvm_faultmaps
+__LLVM_FaultMaps:
+	.byte	1
+	.byte	0
+	.short	0
+	.long	2
+	.quad	0xFFDEAD
+	.long	1
+	.long	0
+	.long	1
+	.long	100
+	.long	200
+
+	.quad	0xFFDAED
+	.long	1
+	.long	0
+	.long	1
+	.long	400
+	.long	500
+
+// CHECK: FaultMap table:
+// CHECK-NEXT: Version: 0x1
+// CHECK-NEXT: NumFunctions: 2
+// CHECK-NEXT: FunctionAddress: 0xffdead, NumFaultingPCs: 1
+// CHECK-NEXT: Fault kind: FaultingLoad, faulting PC offset: 100, handling PC offset: 200
+// CHECK-NEXT: FunctionAddress: 0xffdaed, NumFaultingPCs: 1
+// CHECK-NEXT: Fault kind: FaultingLoad, faulting PC offset: 400, handling PC offset: 500
diff --git a/test/MC/X86/inline-asm-obj.ll b/test/MC/X86/inline-asm-obj.ll
new file mode 100644
index 0000000..2ee998d
--- /dev/null
+++ b/test/MC/X86/inline-asm-obj.ll
@@ -0,0 +1,13 @@
+; RUN: llc %s -o - | llvm-mc -triple=x86_64-pc-linux -o %t1 -filetype=obj
+; RUN: llc %s -o %t2 -filetype=obj
+; RUN: cmp %t1 %t2
+
+; Test that we can handle inline assembly referring to a temporary label.
+; We crashed when using direct object emission in the past.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @fj()  {
+  call void asm "bsr $0,%eax", "o"(i32 1)
+  ret void
+}
diff --git a/test/MC/X86/x86-64-avx512bw.s b/test/MC/X86/x86-64-avx512bw.s
index 45e7463..fc6df8c 100644
--- a/test/MC/X86/x86-64-avx512bw.s
+++ b/test/MC/X86/x86-64-avx512bw.s
@@ -3560,3 +3560,110 @@
 // CHECK:  encoding: [0x62,0x61,0x15,0x40,0xe3,0xaa,0xc0,0xdf,0xff,0xff]
           vpavgw -8256(%rdx), %zmm29, %zmm29
 
+// CHECK: vpshufb %zmm20, %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xa2,0x2d,0x40,0x00,0xf4]
+          vpshufb %zmm20, %zmm26, %zmm22
+
+// CHECK: vpshufb %zmm20, %zmm26, %zmm22 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x2d,0x47,0x00,0xf4]
+          vpshufb %zmm20, %zmm26, %zmm22 {%k7}
+
+// CHECK: vpshufb %zmm20, %zmm26, %zmm22 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x2d,0xc7,0x00,0xf4]
+          vpshufb %zmm20, %zmm26, %zmm22 {%k7} {z}
+
+// CHECK: vpshufb (%rcx), %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x40,0x00,0x31]
+          vpshufb (%rcx), %zmm26, %zmm22
+
+// CHECK: vpshufb 291(%rax,%r14,8), %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xa2,0x2d,0x40,0x00,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpshufb 291(%rax,%r14,8), %zmm26, %zmm22
+
+// CHECK: vpshufb 8128(%rdx), %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x40,0x00,0x72,0x7f]
+          vpshufb 8128(%rdx), %zmm26, %zmm22
+
+// CHECK: vpshufb 8192(%rdx), %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x40,0x00,0xb2,0x00,0x20,0x00,0x00]
+          vpshufb 8192(%rdx), %zmm26, %zmm22
+
+// CHECK: vpshufb -8192(%rdx), %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x40,0x00,0x72,0x80]
+          vpshufb -8192(%rdx), %zmm26, %zmm22
+
+// CHECK: vpshufb -8256(%rdx), %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x40,0x00,0xb2,0xc0,0xdf,0xff,0xff]
+          vpshufb -8256(%rdx), %zmm26, %zmm22
+
+// CHECK: vpabsb %zmm27, %zmm17
+// CHECK:  encoding: [0x62,0x82,0x7d,0x48,0x1c,0xcb]
+          vpabsb %zmm27, %zmm17
+
+// CHECK: vpabsb %zmm27, %zmm17 {%k7}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x4f,0x1c,0xcb]
+          vpabsb %zmm27, %zmm17 {%k7}
+
+// CHECK: vpabsb %zmm27, %zmm17 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xcf,0x1c,0xcb]
+          vpabsb %zmm27, %zmm17 {%k7} {z}
+
+// CHECK: vpabsb (%rcx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x09]
+          vpabsb (%rcx), %zmm17
+
+// CHECK: vpabsb 291(%rax,%r14,8), %zmm17
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x1c,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpabsb 291(%rax,%r14,8), %zmm17
+
+// CHECK: vpabsb 8128(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x4a,0x7f]
+          vpabsb 8128(%rdx), %zmm17
+
+// CHECK: vpabsb 8192(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x8a,0x00,0x20,0x00,0x00]
+          vpabsb 8192(%rdx), %zmm17
+
+// CHECK: vpabsb -8192(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x4a,0x80]
+          vpabsb -8192(%rdx), %zmm17
+
+// CHECK: vpabsb -8256(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x8a,0xc0,0xdf,0xff,0xff]
+          vpabsb -8256(%rdx), %zmm17
+
+// CHECK: vpabsw %zmm24, %zmm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x48,0x1d,0xf0]
+          vpabsw %zmm24, %zmm30
+
+// CHECK: vpabsw %zmm24, %zmm30 {%k6}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x4e,0x1d,0xf0]
+          vpabsw %zmm24, %zmm30 {%k6}
+
+// CHECK: vpabsw %zmm24, %zmm30 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xce,0x1d,0xf0]
+          vpabsw %zmm24, %zmm30 {%k6} {z}
+
+// CHECK: vpabsw (%rcx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x1d,0x31]
+          vpabsw (%rcx), %zmm30
+
+// CHECK: vpabsw 291(%rax,%r14,8), %zmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x1d,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpabsw 291(%rax,%r14,8), %zmm30
+
+// CHECK: vpabsw 8128(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x1d,0x72,0x7f]
+          vpabsw 8128(%rdx), %zmm30
+
+// CHECK: vpabsw 8192(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x1d,0xb2,0x00,0x20,0x00,0x00]
+          vpabsw 8192(%rdx), %zmm30
+
+// CHECK: vpabsw -8192(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x1d,0x72,0x80]
+          vpabsw -8192(%rdx), %zmm30
+
+// CHECK: vpabsw -8256(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x1d,0xb2,0xc0,0xdf,0xff,0xff]
+          vpabsw -8256(%rdx), %zmm30
diff --git a/test/MC/X86/x86-64-avx512bw_vl.s b/test/MC/X86/x86-64-avx512bw_vl.s
index 991c610..14a87df 100644
--- a/test/MC/X86/x86-64-avx512bw_vl.s
+++ b/test/MC/X86/x86-64-avx512bw_vl.s
@@ -6510,3 +6510,76 @@
 // CHECK: vpavgw -4128(%rdx), %ymm23, %ymm21
 // CHECK:  encoding: [0x62,0xe1,0x45,0x20,0xe3,0xaa,0xe0,0xef,0xff,0xff]
           vpavgw -4128(%rdx), %ymm23, %ymm21
+
+// CHECK: vpshufb %xmm27, %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0x82,0x3d,0x00,0x00,0xfb]
+          vpshufb %xmm27, %xmm24, %xmm23
+
+// CHECK: vpshufb %xmm27, %xmm24, %xmm23 {%k4}
+// CHECK:  encoding: [0x62,0x82,0x3d,0x04,0x00,0xfb]
+          vpshufb %xmm27, %xmm24, %xmm23 {%k4}
+
+// CHECK: vpshufb %xmm27, %xmm24, %xmm23 {%k4} {z}
+// CHECK:  encoding: [0x62,0x82,0x3d,0x84,0x00,0xfb]
+          vpshufb %xmm27, %xmm24, %xmm23 {%k4} {z}
+
+// CHECK: vpshufb (%rcx), %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x00,0x39]
+          vpshufb (%rcx), %xmm24, %xmm23
+
+// CHECK: vpshufb 291(%rax,%r14,8), %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0xa2,0x3d,0x00,0x00,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpshufb 291(%rax,%r14,8), %xmm24, %xmm23
+
+// CHECK: vpshufb 2032(%rdx), %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x00,0x7a,0x7f]
+          vpshufb 2032(%rdx), %xmm24, %xmm23
+
+// CHECK: vpshufb 2048(%rdx), %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x00,0xba,0x00,0x08,0x00,0x00]
+          vpshufb 2048(%rdx), %xmm24, %xmm23
+
+// CHECK: vpshufb -2048(%rdx), %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x00,0x7a,0x80]
+          vpshufb -2048(%rdx), %xmm24, %xmm23
+
+// CHECK: vpshufb -2064(%rdx), %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x00,0xba,0xf0,0xf7,0xff,0xff]
+          vpshufb -2064(%rdx), %xmm24, %xmm23
+
+// CHECK: vpshufb %ymm17, %ymm18, %ymm19
+// CHECK:  encoding: [0x62,0xa2,0x6d,0x20,0x00,0xd9]
+          vpshufb %ymm17, %ymm18, %ymm19
+
+// CHECK: vpshufb %ymm17, %ymm18, %ymm19 {%k4}
+// CHECK:  encoding: [0x62,0xa2,0x6d,0x24,0x00,0xd9]
+          vpshufb %ymm17, %ymm18, %ymm19 {%k4}
+
+// CHECK: vpshufb %ymm17, %ymm18, %ymm19 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa2,0x6d,0xa4,0x00,0xd9]
+          vpshufb %ymm17, %ymm18, %ymm19 {%k4} {z}
+
+// CHECK: vpshufb (%rcx), %ymm18, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x20,0x00,0x19]
+          vpshufb (%rcx), %ymm18, %ymm19
+
+// CHECK: vpshufb 291(%rax,%r14,8), %ymm18, %ymm19
+// CHECK:  encoding: [0x62,0xa2,0x6d,0x20,0x00,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpshufb 291(%rax,%r14,8), %ymm18, %ymm19
+
+// CHECK: vpshufb 4064(%rdx), %ymm18, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x20,0x00,0x5a,0x7f]
+          vpshufb 4064(%rdx), %ymm18, %ymm19
+
+// CHECK: vpshufb 4096(%rdx), %ymm18, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0x00,0x10,0x00,0x00]
+          vpshufb 4096(%rdx), %ymm18, %ymm19
+
+// CHECK: vpshufb -4096(%rdx), %ymm18, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x20,0x00,0x5a,0x80]
+          vpshufb -4096(%rdx), %ymm18, %ymm19
+
+// CHECK: vpshufb -4128(%rdx), %ymm18, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0xe0,0xef,0xff,0xff]
+          vpshufb -4128(%rdx), %ymm18, %ymm19
+
diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s
index 1381b2e..c587f8a 100644
--- a/test/MC/X86/x86-64-avx512f_vl.s
+++ b/test/MC/X86/x86-64-avx512f_vl.s
@@ -11133,6 +11133,4038 @@ vaddpd  {rz-sae}, %zmm2, %zmm1, %zmm1
 // CHECK:  encoding: [0x62,0x63,0xbd,0x30,0x03,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
           valignq $0x7b, -1032(%rdx){1to4}, %ymm24, %ymm25
 
+// CHECK: vfmadd132ps %xmm19, %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x22,0x35,0x00,0x98,0xeb]
+          vfmadd132ps %xmm19, %xmm25, %xmm29
+
+// CHECK: vfmadd132ps %xmm19, %xmm25, %xmm29 {%k4}
+// CHECK:  encoding: [0x62,0x22,0x35,0x04,0x98,0xeb]
+          vfmadd132ps %xmm19, %xmm25, %xmm29 {%k4}
+
+// CHECK: vfmadd132ps %xmm19, %xmm25, %xmm29 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0x35,0x84,0x98,0xeb]
+          vfmadd132ps %xmm19, %xmm25, %xmm29 {%k4} {z}
+
+// CHECK: vfmadd132ps (%rcx), %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x35,0x00,0x98,0x29]
+          vfmadd132ps (%rcx), %xmm25, %xmm29
+
+// CHECK: vfmadd132ps 291(%rax,%r14,8), %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x22,0x35,0x00,0x98,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd132ps 291(%rax,%r14,8), %xmm25, %xmm29
+
+// CHECK: vfmadd132ps (%rcx){1to4}, %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x35,0x10,0x98,0x29]
+          vfmadd132ps (%rcx){1to4}, %xmm25, %xmm29
+
+// CHECK: vfmadd132ps 2032(%rdx), %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x35,0x00,0x98,0x6a,0x7f]
+          vfmadd132ps 2032(%rdx), %xmm25, %xmm29
+
+// CHECK: vfmadd132ps 2048(%rdx), %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x35,0x00,0x98,0xaa,0x00,0x08,0x00,0x00]
+          vfmadd132ps 2048(%rdx), %xmm25, %xmm29
+
+// CHECK: vfmadd132ps -2048(%rdx), %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x35,0x00,0x98,0x6a,0x80]
+          vfmadd132ps -2048(%rdx), %xmm25, %xmm29
+
+// CHECK: vfmadd132ps -2064(%rdx), %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x35,0x00,0x98,0xaa,0xf0,0xf7,0xff,0xff]
+          vfmadd132ps -2064(%rdx), %xmm25, %xmm29
+
+// CHECK: vfmadd132ps 508(%rdx){1to4}, %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x35,0x10,0x98,0x6a,0x7f]
+          vfmadd132ps 508(%rdx){1to4}, %xmm25, %xmm29
+
+// CHECK: vfmadd132ps 512(%rdx){1to4}, %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x35,0x10,0x98,0xaa,0x00,0x02,0x00,0x00]
+          vfmadd132ps 512(%rdx){1to4}, %xmm25, %xmm29
+
+// CHECK: vfmadd132ps -512(%rdx){1to4}, %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x35,0x10,0x98,0x6a,0x80]
+          vfmadd132ps -512(%rdx){1to4}, %xmm25, %xmm29
+
+// CHECK: vfmadd132ps -516(%rdx){1to4}, %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x35,0x10,0x98,0xaa,0xfc,0xfd,0xff,0xff]
+          vfmadd132ps -516(%rdx){1to4}, %xmm25, %xmm29
+
+// CHECK: vfmadd132ps %ymm26, %ymm22, %ymm23
+// CHECK:  encoding: [0x62,0x82,0x4d,0x20,0x98,0xfa]
+          vfmadd132ps %ymm26, %ymm22, %ymm23
+
+// CHECK: vfmadd132ps %ymm26, %ymm22, %ymm23 {%k5}
+// CHECK:  encoding: [0x62,0x82,0x4d,0x25,0x98,0xfa]
+          vfmadd132ps %ymm26, %ymm22, %ymm23 {%k5}
+
+// CHECK: vfmadd132ps %ymm26, %ymm22, %ymm23 {%k5} {z}
+// CHECK:  encoding: [0x62,0x82,0x4d,0xa5,0x98,0xfa]
+          vfmadd132ps %ymm26, %ymm22, %ymm23 {%k5} {z}
+
+// CHECK: vfmadd132ps (%rcx), %ymm22, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x98,0x39]
+          vfmadd132ps (%rcx), %ymm22, %ymm23
+
+// CHECK: vfmadd132ps 291(%rax,%r14,8), %ymm22, %ymm23
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x20,0x98,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd132ps 291(%rax,%r14,8), %ymm22, %ymm23
+
+// CHECK: vfmadd132ps (%rcx){1to8}, %ymm22, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x30,0x98,0x39]
+          vfmadd132ps (%rcx){1to8}, %ymm22, %ymm23
+
+// CHECK: vfmadd132ps 4064(%rdx), %ymm22, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x98,0x7a,0x7f]
+          vfmadd132ps 4064(%rdx), %ymm22, %ymm23
+
+// CHECK: vfmadd132ps 4096(%rdx), %ymm22, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x98,0xba,0x00,0x10,0x00,0x00]
+          vfmadd132ps 4096(%rdx), %ymm22, %ymm23
+
+// CHECK: vfmadd132ps -4096(%rdx), %ymm22, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x98,0x7a,0x80]
+          vfmadd132ps -4096(%rdx), %ymm22, %ymm23
+
+// CHECK: vfmadd132ps -4128(%rdx), %ymm22, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x98,0xba,0xe0,0xef,0xff,0xff]
+          vfmadd132ps -4128(%rdx), %ymm22, %ymm23
+
+// CHECK: vfmadd132ps 508(%rdx){1to8}, %ymm22, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x30,0x98,0x7a,0x7f]
+          vfmadd132ps 508(%rdx){1to8}, %ymm22, %ymm23
+
+// CHECK: vfmadd132ps 512(%rdx){1to8}, %ymm22, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x30,0x98,0xba,0x00,0x02,0x00,0x00]
+          vfmadd132ps 512(%rdx){1to8}, %ymm22, %ymm23
+
+// CHECK: vfmadd132ps -512(%rdx){1to8}, %ymm22, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x30,0x98,0x7a,0x80]
+          vfmadd132ps -512(%rdx){1to8}, %ymm22, %ymm23
+
+// CHECK: vfmadd132ps -516(%rdx){1to8}, %ymm22, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x30,0x98,0xba,0xfc,0xfd,0xff,0xff]
+          vfmadd132ps -516(%rdx){1to8}, %ymm22, %ymm23
+
+// CHECK: vfmadd132pd %xmm27, %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x02,0xd5,0x00,0x98,0xe3]
+          vfmadd132pd %xmm27, %xmm21, %xmm28
+
+// CHECK: vfmadd132pd %xmm27, %xmm21, %xmm28 {%k1}
+// CHECK:  encoding: [0x62,0x02,0xd5,0x01,0x98,0xe3]
+          vfmadd132pd %xmm27, %xmm21, %xmm28 {%k1}
+
+// CHECK: vfmadd132pd %xmm27, %xmm21, %xmm28 {%k1} {z}
+// CHECK:  encoding: [0x62,0x02,0xd5,0x81,0x98,0xe3]
+          vfmadd132pd %xmm27, %xmm21, %xmm28 {%k1} {z}
+
+// CHECK: vfmadd132pd (%rcx), %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x98,0x21]
+          vfmadd132pd (%rcx), %xmm21, %xmm28
+
+// CHECK: vfmadd132pd 291(%rax,%r14,8), %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x22,0xd5,0x00,0x98,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd132pd 291(%rax,%r14,8), %xmm21, %xmm28
+
+// CHECK: vfmadd132pd (%rcx){1to2}, %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x98,0x21]
+          vfmadd132pd (%rcx){1to2}, %xmm21, %xmm28
+
+// CHECK: vfmadd132pd 2032(%rdx), %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x98,0x62,0x7f]
+          vfmadd132pd 2032(%rdx), %xmm21, %xmm28
+
+// CHECK: vfmadd132pd 2048(%rdx), %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x98,0xa2,0x00,0x08,0x00,0x00]
+          vfmadd132pd 2048(%rdx), %xmm21, %xmm28
+
+// CHECK: vfmadd132pd -2048(%rdx), %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x98,0x62,0x80]
+          vfmadd132pd -2048(%rdx), %xmm21, %xmm28
+
+// CHECK: vfmadd132pd -2064(%rdx), %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x98,0xa2,0xf0,0xf7,0xff,0xff]
+          vfmadd132pd -2064(%rdx), %xmm21, %xmm28
+
+// CHECK: vfmadd132pd 1016(%rdx){1to2}, %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x98,0x62,0x7f]
+          vfmadd132pd 1016(%rdx){1to2}, %xmm21, %xmm28
+
+// CHECK: vfmadd132pd 1024(%rdx){1to2}, %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x98,0xa2,0x00,0x04,0x00,0x00]
+          vfmadd132pd 1024(%rdx){1to2}, %xmm21, %xmm28
+
+// CHECK: vfmadd132pd -1024(%rdx){1to2}, %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x98,0x62,0x80]
+          vfmadd132pd -1024(%rdx){1to2}, %xmm21, %xmm28
+
+// CHECK: vfmadd132pd -1032(%rdx){1to2}, %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x98,0xa2,0xf8,0xfb,0xff,0xff]
+          vfmadd132pd -1032(%rdx){1to2}, %xmm21, %xmm28
+
+// CHECK: vfmadd132pd %ymm27, %ymm24, %ymm22
+// CHECK:  encoding: [0x62,0x82,0xbd,0x20,0x98,0xf3]
+          vfmadd132pd %ymm27, %ymm24, %ymm22
+
+// CHECK: vfmadd132pd %ymm27, %ymm24, %ymm22 {%k7}
+// CHECK:  encoding: [0x62,0x82,0xbd,0x27,0x98,0xf3]
+          vfmadd132pd %ymm27, %ymm24, %ymm22 {%k7}
+
+// CHECK: vfmadd132pd %ymm27, %ymm24, %ymm22 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0xbd,0xa7,0x98,0xf3]
+          vfmadd132pd %ymm27, %ymm24, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd132pd (%rcx), %ymm24, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x20,0x98,0x31]
+          vfmadd132pd (%rcx), %ymm24, %ymm22
+
+// CHECK: vfmadd132pd 291(%rax,%r14,8), %ymm24, %ymm22
+// CHECK:  encoding: [0x62,0xa2,0xbd,0x20,0x98,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd132pd 291(%rax,%r14,8), %ymm24, %ymm22
+
+// CHECK: vfmadd132pd (%rcx){1to4}, %ymm24, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x30,0x98,0x31]
+          vfmadd132pd (%rcx){1to4}, %ymm24, %ymm22
+
+// CHECK: vfmadd132pd 4064(%rdx), %ymm24, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x20,0x98,0x72,0x7f]
+          vfmadd132pd 4064(%rdx), %ymm24, %ymm22
+
+// CHECK: vfmadd132pd 4096(%rdx), %ymm24, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x20,0x98,0xb2,0x00,0x10,0x00,0x00]
+          vfmadd132pd 4096(%rdx), %ymm24, %ymm22
+
+// CHECK: vfmadd132pd -4096(%rdx), %ymm24, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x20,0x98,0x72,0x80]
+          vfmadd132pd -4096(%rdx), %ymm24, %ymm22
+
+// CHECK: vfmadd132pd -4128(%rdx), %ymm24, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x20,0x98,0xb2,0xe0,0xef,0xff,0xff]
+          vfmadd132pd -4128(%rdx), %ymm24, %ymm22
+
+// CHECK: vfmadd132pd 1016(%rdx){1to4}, %ymm24, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x30,0x98,0x72,0x7f]
+          vfmadd132pd 1016(%rdx){1to4}, %ymm24, %ymm22
+
+// CHECK: vfmadd132pd 1024(%rdx){1to4}, %ymm24, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x30,0x98,0xb2,0x00,0x04,0x00,0x00]
+          vfmadd132pd 1024(%rdx){1to4}, %ymm24, %ymm22
+
+// CHECK: vfmadd132pd -1024(%rdx){1to4}, %ymm24, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x30,0x98,0x72,0x80]
+          vfmadd132pd -1024(%rdx){1to4}, %ymm24, %ymm22
+
+// CHECK: vfmadd132pd -1032(%rdx){1to4}, %ymm24, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x30,0x98,0xb2,0xf8,0xfb,0xff,0xff]
+          vfmadd132pd -1032(%rdx){1to4}, %ymm24, %ymm22
+
+// CHECK: vfmadd213ps %xmm28, %xmm28, %xmm24
+// CHECK:  encoding: [0x62,0x02,0x1d,0x00,0xa8,0xc4]
+          vfmadd213ps %xmm28, %xmm28, %xmm24
+
+// CHECK: vfmadd213ps %xmm28, %xmm28, %xmm24 {%k1}
+// CHECK:  encoding: [0x62,0x02,0x1d,0x01,0xa8,0xc4]
+          vfmadd213ps %xmm28, %xmm28, %xmm24 {%k1}
+
+// CHECK: vfmadd213ps %xmm28, %xmm28, %xmm24 {%k1} {z}
+// CHECK:  encoding: [0x62,0x02,0x1d,0x81,0xa8,0xc4]
+          vfmadd213ps %xmm28, %xmm28, %xmm24 {%k1} {z}
+
+// CHECK: vfmadd213ps (%rcx), %xmm28, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0xa8,0x01]
+          vfmadd213ps (%rcx), %xmm28, %xmm24
+
+// CHECK: vfmadd213ps 291(%rax,%r14,8), %xmm28, %xmm24
+// CHECK:  encoding: [0x62,0x22,0x1d,0x00,0xa8,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd213ps 291(%rax,%r14,8), %xmm28, %xmm24
+
+// CHECK: vfmadd213ps (%rcx){1to4}, %xmm28, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0xa8,0x01]
+          vfmadd213ps (%rcx){1to4}, %xmm28, %xmm24
+
+// CHECK: vfmadd213ps 2032(%rdx), %xmm28, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0xa8,0x42,0x7f]
+          vfmadd213ps 2032(%rdx), %xmm28, %xmm24
+
+// CHECK: vfmadd213ps 2048(%rdx), %xmm28, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0xa8,0x82,0x00,0x08,0x00,0x00]
+          vfmadd213ps 2048(%rdx), %xmm28, %xmm24
+
+// CHECK: vfmadd213ps -2048(%rdx), %xmm28, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0xa8,0x42,0x80]
+          vfmadd213ps -2048(%rdx), %xmm28, %xmm24
+
+// CHECK: vfmadd213ps -2064(%rdx), %xmm28, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0xa8,0x82,0xf0,0xf7,0xff,0xff]
+          vfmadd213ps -2064(%rdx), %xmm28, %xmm24
+
+// CHECK: vfmadd213ps 508(%rdx){1to4}, %xmm28, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0xa8,0x42,0x7f]
+          vfmadd213ps 508(%rdx){1to4}, %xmm28, %xmm24
+
+// CHECK: vfmadd213ps 512(%rdx){1to4}, %xmm28, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0xa8,0x82,0x00,0x02,0x00,0x00]
+          vfmadd213ps 512(%rdx){1to4}, %xmm28, %xmm24
+
+// CHECK: vfmadd213ps -512(%rdx){1to4}, %xmm28, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0xa8,0x42,0x80]
+          vfmadd213ps -512(%rdx){1to4}, %xmm28, %xmm24
+
+// CHECK: vfmadd213ps -516(%rdx){1to4}, %xmm28, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0xa8,0x82,0xfc,0xfd,0xff,0xff]
+          vfmadd213ps -516(%rdx){1to4}, %xmm28, %xmm24
+
+// CHECK: vfmadd213ps %ymm17, %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x22,0x4d,0x20,0xa8,0xd1]
+          vfmadd213ps %ymm17, %ymm22, %ymm26
+
+// CHECK: vfmadd213ps %ymm17, %ymm22, %ymm26 {%k3}
+// CHECK:  encoding: [0x62,0x22,0x4d,0x23,0xa8,0xd1]
+          vfmadd213ps %ymm17, %ymm22, %ymm26 {%k3}
+
+// CHECK: vfmadd213ps %ymm17, %ymm22, %ymm26 {%k3} {z}
+// CHECK:  encoding: [0x62,0x22,0x4d,0xa3,0xa8,0xd1]
+          vfmadd213ps %ymm17, %ymm22, %ymm26 {%k3} {z}
+
+// CHECK: vfmadd213ps (%rcx), %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0xa8,0x11]
+          vfmadd213ps (%rcx), %ymm22, %ymm26
+
+// CHECK: vfmadd213ps 291(%rax,%r14,8), %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x22,0x4d,0x20,0xa8,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd213ps 291(%rax,%r14,8), %ymm22, %ymm26
+
+// CHECK: vfmadd213ps (%rcx){1to8}, %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x4d,0x30,0xa8,0x11]
+          vfmadd213ps (%rcx){1to8}, %ymm22, %ymm26
+
+// CHECK: vfmadd213ps 4064(%rdx), %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0xa8,0x52,0x7f]
+          vfmadd213ps 4064(%rdx), %ymm22, %ymm26
+
+// CHECK: vfmadd213ps 4096(%rdx), %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0xa8,0x92,0x00,0x10,0x00,0x00]
+          vfmadd213ps 4096(%rdx), %ymm22, %ymm26
+
+// CHECK: vfmadd213ps -4096(%rdx), %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0xa8,0x52,0x80]
+          vfmadd213ps -4096(%rdx), %ymm22, %ymm26
+
+// CHECK: vfmadd213ps -4128(%rdx), %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0xa8,0x92,0xe0,0xef,0xff,0xff]
+          vfmadd213ps -4128(%rdx), %ymm22, %ymm26
+
+// CHECK: vfmadd213ps 508(%rdx){1to8}, %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x4d,0x30,0xa8,0x52,0x7f]
+          vfmadd213ps 508(%rdx){1to8}, %ymm22, %ymm26
+
+// CHECK: vfmadd213ps 512(%rdx){1to8}, %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x4d,0x30,0xa8,0x92,0x00,0x02,0x00,0x00]
+          vfmadd213ps 512(%rdx){1to8}, %ymm22, %ymm26
+
+// CHECK: vfmadd213ps -512(%rdx){1to8}, %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x4d,0x30,0xa8,0x52,0x80]
+          vfmadd213ps -512(%rdx){1to8}, %ymm22, %ymm26
+
+// CHECK: vfmadd213ps -516(%rdx){1to8}, %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x4d,0x30,0xa8,0x92,0xfc,0xfd,0xff,0xff]
+          vfmadd213ps -516(%rdx){1to8}, %ymm22, %ymm26
+
+// CHECK: vfmadd213pd %xmm23, %xmm21, %xmm22
+// CHECK:  encoding: [0x62,0xa2,0xd5,0x00,0xa8,0xf7]
+          vfmadd213pd %xmm23, %xmm21, %xmm22
+
+// CHECK: vfmadd213pd %xmm23, %xmm21, %xmm22 {%k4}
+// CHECK:  encoding: [0x62,0xa2,0xd5,0x04,0xa8,0xf7]
+          vfmadd213pd %xmm23, %xmm21, %xmm22 {%k4}
+
+// CHECK: vfmadd213pd %xmm23, %xmm21, %xmm22 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa2,0xd5,0x84,0xa8,0xf7]
+          vfmadd213pd %xmm23, %xmm21, %xmm22 {%k4} {z}
+
+// CHECK: vfmadd213pd (%rcx), %xmm21, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0xa8,0x31]
+          vfmadd213pd (%rcx), %xmm21, %xmm22
+
+// CHECK: vfmadd213pd 291(%rax,%r14,8), %xmm21, %xmm22
+// CHECK:  encoding: [0x62,0xa2,0xd5,0x00,0xa8,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd213pd 291(%rax,%r14,8), %xmm21, %xmm22
+
+// CHECK: vfmadd213pd (%rcx){1to2}, %xmm21, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0xa8,0x31]
+          vfmadd213pd (%rcx){1to2}, %xmm21, %xmm22
+
+// CHECK: vfmadd213pd 2032(%rdx), %xmm21, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0xa8,0x72,0x7f]
+          vfmadd213pd 2032(%rdx), %xmm21, %xmm22
+
+// CHECK: vfmadd213pd 2048(%rdx), %xmm21, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0xa8,0xb2,0x00,0x08,0x00,0x00]
+          vfmadd213pd 2048(%rdx), %xmm21, %xmm22
+
+// CHECK: vfmadd213pd -2048(%rdx), %xmm21, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0xa8,0x72,0x80]
+          vfmadd213pd -2048(%rdx), %xmm21, %xmm22
+
+// CHECK: vfmadd213pd -2064(%rdx), %xmm21, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0xa8,0xb2,0xf0,0xf7,0xff,0xff]
+          vfmadd213pd -2064(%rdx), %xmm21, %xmm22
+
+// CHECK: vfmadd213pd 1016(%rdx){1to2}, %xmm21, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0xa8,0x72,0x7f]
+          vfmadd213pd 1016(%rdx){1to2}, %xmm21, %xmm22
+
+// CHECK: vfmadd213pd 1024(%rdx){1to2}, %xmm21, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0xa8,0xb2,0x00,0x04,0x00,0x00]
+          vfmadd213pd 1024(%rdx){1to2}, %xmm21, %xmm22
+
+// CHECK: vfmadd213pd -1024(%rdx){1to2}, %xmm21, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0xa8,0x72,0x80]
+          vfmadd213pd -1024(%rdx){1to2}, %xmm21, %xmm22
+
+// CHECK: vfmadd213pd -1032(%rdx){1to2}, %xmm21, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0xa8,0xb2,0xf8,0xfb,0xff,0xff]
+          vfmadd213pd -1032(%rdx){1to2}, %xmm21, %xmm22
+
+// CHECK: vfmadd213pd %ymm17, %ymm19, %ymm18
+// CHECK:  encoding: [0x62,0xa2,0xe5,0x20,0xa8,0xd1]
+          vfmadd213pd %ymm17, %ymm19, %ymm18
+
+// CHECK: vfmadd213pd %ymm17, %ymm19, %ymm18 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xe5,0x21,0xa8,0xd1]
+          vfmadd213pd %ymm17, %ymm19, %ymm18 {%k1}
+
+// CHECK: vfmadd213pd %ymm17, %ymm19, %ymm18 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0xe5,0xa1,0xa8,0xd1]
+          vfmadd213pd %ymm17, %ymm19, %ymm18 {%k1} {z}
+
+// CHECK: vfmadd213pd (%rcx), %ymm19, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x11]
+          vfmadd213pd (%rcx), %ymm19, %ymm18
+
+// CHECK: vfmadd213pd 291(%rax,%r14,8), %ymm19, %ymm18
+// CHECK:  encoding: [0x62,0xa2,0xe5,0x20,0xa8,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd213pd 291(%rax,%r14,8), %ymm19, %ymm18
+
+// CHECK: vfmadd213pd (%rcx){1to4}, %ymm19, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x11]
+          vfmadd213pd (%rcx){1to4}, %ymm19, %ymm18
+
+// CHECK: vfmadd213pd 4064(%rdx), %ymm19, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x52,0x7f]
+          vfmadd213pd 4064(%rdx), %ymm19, %ymm18
+
+// CHECK: vfmadd213pd 4096(%rdx), %ymm19, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x92,0x00,0x10,0x00,0x00]
+          vfmadd213pd 4096(%rdx), %ymm19, %ymm18
+
+// CHECK: vfmadd213pd -4096(%rdx), %ymm19, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x52,0x80]
+          vfmadd213pd -4096(%rdx), %ymm19, %ymm18
+
+// CHECK: vfmadd213pd -4128(%rdx), %ymm19, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x20,0xa8,0x92,0xe0,0xef,0xff,0xff]
+          vfmadd213pd -4128(%rdx), %ymm19, %ymm18
+
+// CHECK: vfmadd213pd 1016(%rdx){1to4}, %ymm19, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x52,0x7f]
+          vfmadd213pd 1016(%rdx){1to4}, %ymm19, %ymm18
+
+// CHECK: vfmadd213pd 1024(%rdx){1to4}, %ymm19, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x92,0x00,0x04,0x00,0x00]
+          vfmadd213pd 1024(%rdx){1to4}, %ymm19, %ymm18
+
+// CHECK: vfmadd213pd -1024(%rdx){1to4}, %ymm19, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x52,0x80]
+          vfmadd213pd -1024(%rdx){1to4}, %ymm19, %ymm18
+
+// CHECK: vfmadd213pd -1032(%rdx){1to4}, %ymm19, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x30,0xa8,0x92,0xf8,0xfb,0xff,0xff]
+          vfmadd213pd -1032(%rdx){1to4}, %ymm19, %ymm18
+
+// CHECK: vfmadd231ps %xmm27, %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x5d,0x00,0xb8,0xf3]
+          vfmadd231ps %xmm27, %xmm20, %xmm30
+
+// CHECK: vfmadd231ps %xmm27, %xmm20, %xmm30 {%k7}
+// CHECK:  encoding: [0x62,0x02,0x5d,0x07,0xb8,0xf3]
+          vfmadd231ps %xmm27, %xmm20, %xmm30 {%k7}
+
+// CHECK: vfmadd231ps %xmm27, %xmm20, %xmm30 {%k7} {z}
+// CHECK:  encoding: [0x62,0x02,0x5d,0x87,0xb8,0xf3]
+          vfmadd231ps %xmm27, %xmm20, %xmm30 {%k7} {z}
+
+// CHECK: vfmadd231ps (%rcx), %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x00,0xb8,0x31]
+          vfmadd231ps (%rcx), %xmm20, %xmm30
+
+// CHECK: vfmadd231ps 291(%rax,%r14,8), %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x5d,0x00,0xb8,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd231ps 291(%rax,%r14,8), %xmm20, %xmm30
+
+// CHECK: vfmadd231ps (%rcx){1to4}, %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x10,0xb8,0x31]
+          vfmadd231ps (%rcx){1to4}, %xmm20, %xmm30
+
+// CHECK: vfmadd231ps 2032(%rdx), %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x00,0xb8,0x72,0x7f]
+          vfmadd231ps 2032(%rdx), %xmm20, %xmm30
+
+// CHECK: vfmadd231ps 2048(%rdx), %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x00,0xb8,0xb2,0x00,0x08,0x00,0x00]
+          vfmadd231ps 2048(%rdx), %xmm20, %xmm30
+
+// CHECK: vfmadd231ps -2048(%rdx), %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x00,0xb8,0x72,0x80]
+          vfmadd231ps -2048(%rdx), %xmm20, %xmm30
+
+// CHECK: vfmadd231ps -2064(%rdx), %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x00,0xb8,0xb2,0xf0,0xf7,0xff,0xff]
+          vfmadd231ps -2064(%rdx), %xmm20, %xmm30
+
+// CHECK: vfmadd231ps 508(%rdx){1to4}, %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x10,0xb8,0x72,0x7f]
+          vfmadd231ps 508(%rdx){1to4}, %xmm20, %xmm30
+
+// CHECK: vfmadd231ps 512(%rdx){1to4}, %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x10,0xb8,0xb2,0x00,0x02,0x00,0x00]
+          vfmadd231ps 512(%rdx){1to4}, %xmm20, %xmm30
+
+// CHECK: vfmadd231ps -512(%rdx){1to4}, %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x10,0xb8,0x72,0x80]
+          vfmadd231ps -512(%rdx){1to4}, %xmm20, %xmm30
+
+// CHECK: vfmadd231ps -516(%rdx){1to4}, %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x10,0xb8,0xb2,0xfc,0xfd,0xff,0xff]
+          vfmadd231ps -516(%rdx){1to4}, %xmm20, %xmm30
+
+// CHECK: vfmadd231ps %ymm25, %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0x82,0x2d,0x20,0xb8,0xf1]
+          vfmadd231ps %ymm25, %ymm26, %ymm22
+
+// CHECK: vfmadd231ps %ymm25, %ymm26, %ymm22 {%k7}
+// CHECK:  encoding: [0x62,0x82,0x2d,0x27,0xb8,0xf1]
+          vfmadd231ps %ymm25, %ymm26, %ymm22 {%k7}
+
+// CHECK: vfmadd231ps %ymm25, %ymm26, %ymm22 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0x2d,0xa7,0xb8,0xf1]
+          vfmadd231ps %ymm25, %ymm26, %ymm22 {%k7} {z}
+
+// CHECK: vfmadd231ps (%rcx), %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0xb8,0x31]
+          vfmadd231ps (%rcx), %ymm26, %ymm22
+
+// CHECK: vfmadd231ps 291(%rax,%r14,8), %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0xa2,0x2d,0x20,0xb8,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd231ps 291(%rax,%r14,8), %ymm26, %ymm22
+
+// CHECK: vfmadd231ps (%rcx){1to8}, %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0xb8,0x31]
+          vfmadd231ps (%rcx){1to8}, %ymm26, %ymm22
+
+// CHECK: vfmadd231ps 4064(%rdx), %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0xb8,0x72,0x7f]
+          vfmadd231ps 4064(%rdx), %ymm26, %ymm22
+
+// CHECK: vfmadd231ps 4096(%rdx), %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0xb8,0xb2,0x00,0x10,0x00,0x00]
+          vfmadd231ps 4096(%rdx), %ymm26, %ymm22
+
+// CHECK: vfmadd231ps -4096(%rdx), %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0xb8,0x72,0x80]
+          vfmadd231ps -4096(%rdx), %ymm26, %ymm22
+
+// CHECK: vfmadd231ps -4128(%rdx), %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0xb8,0xb2,0xe0,0xef,0xff,0xff]
+          vfmadd231ps -4128(%rdx), %ymm26, %ymm22
+
+// CHECK: vfmadd231ps 508(%rdx){1to8}, %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0xb8,0x72,0x7f]
+          vfmadd231ps 508(%rdx){1to8}, %ymm26, %ymm22
+
+// CHECK: vfmadd231ps 512(%rdx){1to8}, %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0xb8,0xb2,0x00,0x02,0x00,0x00]
+          vfmadd231ps 512(%rdx){1to8}, %ymm26, %ymm22
+
+// CHECK: vfmadd231ps -512(%rdx){1to8}, %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0xb8,0x72,0x80]
+          vfmadd231ps -512(%rdx){1to8}, %ymm26, %ymm22
+
+// CHECK: vfmadd231ps -516(%rdx){1to8}, %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0xb8,0xb2,0xfc,0xfd,0xff,0xff]
+          vfmadd231ps -516(%rdx){1to8}, %ymm26, %ymm22
+
+// CHECK: vfmadd231pd %xmm24, %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x02,0xdd,0x00,0xb8,0xe8]
+          vfmadd231pd %xmm24, %xmm20, %xmm29
+
+// CHECK: vfmadd231pd %xmm24, %xmm20, %xmm29 {%k7}
+// CHECK:  encoding: [0x62,0x02,0xdd,0x07,0xb8,0xe8]
+          vfmadd231pd %xmm24, %xmm20, %xmm29 {%k7}
+
+// CHECK: vfmadd231pd %xmm24, %xmm20, %xmm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x02,0xdd,0x87,0xb8,0xe8]
+          vfmadd231pd %xmm24, %xmm20, %xmm29 {%k7} {z}
+
+// CHECK: vfmadd231pd (%rcx), %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0xb8,0x29]
+          vfmadd231pd (%rcx), %xmm20, %xmm29
+
+// CHECK: vfmadd231pd 291(%rax,%r14,8), %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x22,0xdd,0x00,0xb8,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd231pd 291(%rax,%r14,8), %xmm20, %xmm29
+
+// CHECK: vfmadd231pd (%rcx){1to2}, %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0xb8,0x29]
+          vfmadd231pd (%rcx){1to2}, %xmm20, %xmm29
+
+// CHECK: vfmadd231pd 2032(%rdx), %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0xb8,0x6a,0x7f]
+          vfmadd231pd 2032(%rdx), %xmm20, %xmm29
+
+// CHECK: vfmadd231pd 2048(%rdx), %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0xb8,0xaa,0x00,0x08,0x00,0x00]
+          vfmadd231pd 2048(%rdx), %xmm20, %xmm29
+
+// CHECK: vfmadd231pd -2048(%rdx), %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0xb8,0x6a,0x80]
+          vfmadd231pd -2048(%rdx), %xmm20, %xmm29
+
+// CHECK: vfmadd231pd -2064(%rdx), %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0xb8,0xaa,0xf0,0xf7,0xff,0xff]
+          vfmadd231pd -2064(%rdx), %xmm20, %xmm29
+
+// CHECK: vfmadd231pd 1016(%rdx){1to2}, %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0xb8,0x6a,0x7f]
+          vfmadd231pd 1016(%rdx){1to2}, %xmm20, %xmm29
+
+// CHECK: vfmadd231pd 1024(%rdx){1to2}, %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0xb8,0xaa,0x00,0x04,0x00,0x00]
+          vfmadd231pd 1024(%rdx){1to2}, %xmm20, %xmm29
+
+// CHECK: vfmadd231pd -1024(%rdx){1to2}, %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0xb8,0x6a,0x80]
+          vfmadd231pd -1024(%rdx){1to2}, %xmm20, %xmm29
+
+// CHECK: vfmadd231pd -1032(%rdx){1to2}, %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0xb8,0xaa,0xf8,0xfb,0xff,0xff]
+          vfmadd231pd -1032(%rdx){1to2}, %xmm20, %xmm29
+
+// CHECK: vfmadd231pd %ymm26, %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x02,0xdd,0x20,0xb8,0xc2]
+          vfmadd231pd %ymm26, %ymm20, %ymm24
+
+// CHECK: vfmadd231pd %ymm26, %ymm20, %ymm24 {%k6}
+// CHECK:  encoding: [0x62,0x02,0xdd,0x26,0xb8,0xc2]
+          vfmadd231pd %ymm26, %ymm20, %ymm24 {%k6}
+
+// CHECK: vfmadd231pd %ymm26, %ymm20, %ymm24 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0xdd,0xa6,0xb8,0xc2]
+          vfmadd231pd %ymm26, %ymm20, %ymm24 {%k6} {z}
+
+// CHECK: vfmadd231pd (%rcx), %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0xb8,0x01]
+          vfmadd231pd (%rcx), %ymm20, %ymm24
+
+// CHECK: vfmadd231pd 291(%rax,%r14,8), %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x22,0xdd,0x20,0xb8,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vfmadd231pd 291(%rax,%r14,8), %ymm20, %ymm24
+
+// CHECK: vfmadd231pd (%rcx){1to4}, %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0xb8,0x01]
+          vfmadd231pd (%rcx){1to4}, %ymm20, %ymm24
+
+// CHECK: vfmadd231pd 4064(%rdx), %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0xb8,0x42,0x7f]
+          vfmadd231pd 4064(%rdx), %ymm20, %ymm24
+
+// CHECK: vfmadd231pd 4096(%rdx), %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0xb8,0x82,0x00,0x10,0x00,0x00]
+          vfmadd231pd 4096(%rdx), %ymm20, %ymm24
+
+// CHECK: vfmadd231pd -4096(%rdx), %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0xb8,0x42,0x80]
+          vfmadd231pd -4096(%rdx), %ymm20, %ymm24
+
+// CHECK: vfmadd231pd -4128(%rdx), %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0xb8,0x82,0xe0,0xef,0xff,0xff]
+          vfmadd231pd -4128(%rdx), %ymm20, %ymm24
+
+// CHECK: vfmadd231pd 1016(%rdx){1to4}, %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0xb8,0x42,0x7f]
+          vfmadd231pd 1016(%rdx){1to4}, %ymm20, %ymm24
+
+// CHECK: vfmadd231pd 1024(%rdx){1to4}, %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0xb8,0x82,0x00,0x04,0x00,0x00]
+          vfmadd231pd 1024(%rdx){1to4}, %ymm20, %ymm24
+
+// CHECK: vfmadd231pd -1024(%rdx){1to4}, %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0xb8,0x42,0x80]
+          vfmadd231pd -1024(%rdx){1to4}, %ymm20, %ymm24
+
+// CHECK: vfmadd231pd -1032(%rdx){1to4}, %ymm20, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0xb8,0x82,0xf8,0xfb,0xff,0xff]
+          vfmadd231pd -1032(%rdx){1to4}, %ymm20, %ymm24
+
+// CHECK: vfmsub132ps %xmm21, %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xa2,0x6d,0x00,0x9a,0xcd]
+          vfmsub132ps %xmm21, %xmm18, %xmm17
+
+// CHECK: vfmsub132ps %xmm21, %xmm18, %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x6d,0x01,0x9a,0xcd]
+          vfmsub132ps %xmm21, %xmm18, %xmm17 {%k1}
+
+// CHECK: vfmsub132ps %xmm21, %xmm18, %xmm17 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0x6d,0x81,0x9a,0xcd]
+          vfmsub132ps %xmm21, %xmm18, %xmm17 {%k1} {z}
+
+// CHECK: vfmsub132ps (%rcx), %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x09]
+          vfmsub132ps (%rcx), %xmm18, %xmm17
+
+// CHECK: vfmsub132ps 291(%rax,%r14,8), %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xa2,0x6d,0x00,0x9a,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub132ps 291(%rax,%r14,8), %xmm18, %xmm17
+
+// CHECK: vfmsub132ps (%rcx){1to4}, %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x09]
+          vfmsub132ps (%rcx){1to4}, %xmm18, %xmm17
+
+// CHECK: vfmsub132ps 2032(%rdx), %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x4a,0x7f]
+          vfmsub132ps 2032(%rdx), %xmm18, %xmm17
+
+// CHECK: vfmsub132ps 2048(%rdx), %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x8a,0x00,0x08,0x00,0x00]
+          vfmsub132ps 2048(%rdx), %xmm18, %xmm17
+
+// CHECK: vfmsub132ps -2048(%rdx), %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x4a,0x80]
+          vfmsub132ps -2048(%rdx), %xmm18, %xmm17
+
+// CHECK: vfmsub132ps -2064(%rdx), %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x00,0x9a,0x8a,0xf0,0xf7,0xff,0xff]
+          vfmsub132ps -2064(%rdx), %xmm18, %xmm17
+
+// CHECK: vfmsub132ps 508(%rdx){1to4}, %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x4a,0x7f]
+          vfmsub132ps 508(%rdx){1to4}, %xmm18, %xmm17
+
+// CHECK: vfmsub132ps 512(%rdx){1to4}, %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x8a,0x00,0x02,0x00,0x00]
+          vfmsub132ps 512(%rdx){1to4}, %xmm18, %xmm17
+
+// CHECK: vfmsub132ps -512(%rdx){1to4}, %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x4a,0x80]
+          vfmsub132ps -512(%rdx){1to4}, %xmm18, %xmm17
+
+// CHECK: vfmsub132ps -516(%rdx){1to4}, %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x10,0x9a,0x8a,0xfc,0xfd,0xff,0xff]
+          vfmsub132ps -516(%rdx){1to4}, %xmm18, %xmm17
+
+// CHECK: vfmsub132ps %ymm23, %ymm28, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x1d,0x20,0x9a,0xcf]
+          vfmsub132ps %ymm23, %ymm28, %ymm25
+
+// CHECK: vfmsub132ps %ymm23, %ymm28, %ymm25 {%k5}
+// CHECK:  encoding: [0x62,0x22,0x1d,0x25,0x9a,0xcf]
+          vfmsub132ps %ymm23, %ymm28, %ymm25 {%k5}
+
+// CHECK: vfmsub132ps %ymm23, %ymm28, %ymm25 {%k5} {z}
+// CHECK:  encoding: [0x62,0x22,0x1d,0xa5,0x9a,0xcf]
+          vfmsub132ps %ymm23, %ymm28, %ymm25 {%k5} {z}
+
+// CHECK: vfmsub132ps (%rcx), %ymm28, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x20,0x9a,0x09]
+          vfmsub132ps (%rcx), %ymm28, %ymm25
+
+// CHECK: vfmsub132ps 291(%rax,%r14,8), %ymm28, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x1d,0x20,0x9a,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub132ps 291(%rax,%r14,8), %ymm28, %ymm25
+
+// CHECK: vfmsub132ps (%rcx){1to8}, %ymm28, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x30,0x9a,0x09]
+          vfmsub132ps (%rcx){1to8}, %ymm28, %ymm25
+
+// CHECK: vfmsub132ps 4064(%rdx), %ymm28, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x20,0x9a,0x4a,0x7f]
+          vfmsub132ps 4064(%rdx), %ymm28, %ymm25
+
+// CHECK: vfmsub132ps 4096(%rdx), %ymm28, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x20,0x9a,0x8a,0x00,0x10,0x00,0x00]
+          vfmsub132ps 4096(%rdx), %ymm28, %ymm25
+
+// CHECK: vfmsub132ps -4096(%rdx), %ymm28, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x20,0x9a,0x4a,0x80]
+          vfmsub132ps -4096(%rdx), %ymm28, %ymm25
+
+// CHECK: vfmsub132ps -4128(%rdx), %ymm28, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x20,0x9a,0x8a,0xe0,0xef,0xff,0xff]
+          vfmsub132ps -4128(%rdx), %ymm28, %ymm25
+
+// CHECK: vfmsub132ps 508(%rdx){1to8}, %ymm28, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x30,0x9a,0x4a,0x7f]
+          vfmsub132ps 508(%rdx){1to8}, %ymm28, %ymm25
+
+// CHECK: vfmsub132ps 512(%rdx){1to8}, %ymm28, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x30,0x9a,0x8a,0x00,0x02,0x00,0x00]
+          vfmsub132ps 512(%rdx){1to8}, %ymm28, %ymm25
+
+// CHECK: vfmsub132ps -512(%rdx){1to8}, %ymm28, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x30,0x9a,0x4a,0x80]
+          vfmsub132ps -512(%rdx){1to8}, %ymm28, %ymm25
+
+// CHECK: vfmsub132ps -516(%rdx){1to8}, %ymm28, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x30,0x9a,0x8a,0xfc,0xfd,0xff,0xff]
+          vfmsub132ps -516(%rdx){1to8}, %ymm28, %ymm25
+
+// CHECK: vfmsub132pd %xmm20, %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xa2,0xd5,0x00,0x9a,0xd4]
+          vfmsub132pd %xmm20, %xmm21, %xmm18
+
+// CHECK: vfmsub132pd %xmm20, %xmm21, %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xd5,0x01,0x9a,0xd4]
+          vfmsub132pd %xmm20, %xmm21, %xmm18 {%k1}
+
+// CHECK: vfmsub132pd %xmm20, %xmm21, %xmm18 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0xd5,0x81,0x9a,0xd4]
+          vfmsub132pd %xmm20, %xmm21, %xmm18 {%k1} {z}
+
+// CHECK: vfmsub132pd (%rcx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x11]
+          vfmsub132pd (%rcx), %xmm21, %xmm18
+
+// CHECK: vfmsub132pd 291(%rax,%r14,8), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xa2,0xd5,0x00,0x9a,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub132pd 291(%rax,%r14,8), %xmm21, %xmm18
+
+// CHECK: vfmsub132pd (%rcx){1to2}, %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x11]
+          vfmsub132pd (%rcx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsub132pd 2032(%rdx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x52,0x7f]
+          vfmsub132pd 2032(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsub132pd 2048(%rdx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x92,0x00,0x08,0x00,0x00]
+          vfmsub132pd 2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsub132pd -2048(%rdx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x52,0x80]
+          vfmsub132pd -2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsub132pd -2064(%rdx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0x9a,0x92,0xf0,0xf7,0xff,0xff]
+          vfmsub132pd -2064(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsub132pd 1016(%rdx){1to2}, %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x52,0x7f]
+          vfmsub132pd 1016(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsub132pd 1024(%rdx){1to2}, %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x92,0x00,0x04,0x00,0x00]
+          vfmsub132pd 1024(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsub132pd -1024(%rdx){1to2}, %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x52,0x80]
+          vfmsub132pd -1024(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsub132pd -1032(%rdx){1to2}, %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0x9a,0x92,0xf8,0xfb,0xff,0xff]
+          vfmsub132pd -1032(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsub132pd %ymm17, %ymm28, %ymm22
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x20,0x9a,0xf1]
+          vfmsub132pd %ymm17, %ymm28, %ymm22
+
+// CHECK: vfmsub132pd %ymm17, %ymm28, %ymm22 {%k5}
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x25,0x9a,0xf1]
+          vfmsub132pd %ymm17, %ymm28, %ymm22 {%k5}
+
+// CHECK: vfmsub132pd %ymm17, %ymm28, %ymm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa2,0x9d,0xa5,0x9a,0xf1]
+          vfmsub132pd %ymm17, %ymm28, %ymm22 {%k5} {z}
+
+// CHECK: vfmsub132pd (%rcx), %ymm28, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x20,0x9a,0x31]
+          vfmsub132pd (%rcx), %ymm28, %ymm22
+
+// CHECK: vfmsub132pd 291(%rax,%r14,8), %ymm28, %ymm22
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x20,0x9a,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub132pd 291(%rax,%r14,8), %ymm28, %ymm22
+
+// CHECK: vfmsub132pd (%rcx){1to4}, %ymm28, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x30,0x9a,0x31]
+          vfmsub132pd (%rcx){1to4}, %ymm28, %ymm22
+
+// CHECK: vfmsub132pd 4064(%rdx), %ymm28, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x20,0x9a,0x72,0x7f]
+          vfmsub132pd 4064(%rdx), %ymm28, %ymm22
+
+// CHECK: vfmsub132pd 4096(%rdx), %ymm28, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x20,0x9a,0xb2,0x00,0x10,0x00,0x00]
+          vfmsub132pd 4096(%rdx), %ymm28, %ymm22
+
+// CHECK: vfmsub132pd -4096(%rdx), %ymm28, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x20,0x9a,0x72,0x80]
+          vfmsub132pd -4096(%rdx), %ymm28, %ymm22
+
+// CHECK: vfmsub132pd -4128(%rdx), %ymm28, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x20,0x9a,0xb2,0xe0,0xef,0xff,0xff]
+          vfmsub132pd -4128(%rdx), %ymm28, %ymm22
+
+// CHECK: vfmsub132pd 1016(%rdx){1to4}, %ymm28, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x30,0x9a,0x72,0x7f]
+          vfmsub132pd 1016(%rdx){1to4}, %ymm28, %ymm22
+
+// CHECK: vfmsub132pd 1024(%rdx){1to4}, %ymm28, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x30,0x9a,0xb2,0x00,0x04,0x00,0x00]
+          vfmsub132pd 1024(%rdx){1to4}, %ymm28, %ymm22
+
+// CHECK: vfmsub132pd -1024(%rdx){1to4}, %ymm28, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x30,0x9a,0x72,0x80]
+          vfmsub132pd -1024(%rdx){1to4}, %ymm28, %ymm22
+
+// CHECK: vfmsub132pd -1032(%rdx){1to4}, %ymm28, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x30,0x9a,0xb2,0xf8,0xfb,0xff,0xff]
+          vfmsub132pd -1032(%rdx){1to4}, %ymm28, %ymm22
+
+// CHECK: vfmsub213ps %xmm28, %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0x82,0x25,0x00,0xaa,0xf4]
+          vfmsub213ps %xmm28, %xmm27, %xmm22
+
+// CHECK: vfmsub213ps %xmm28, %xmm27, %xmm22 {%k2}
+// CHECK:  encoding: [0x62,0x82,0x25,0x02,0xaa,0xf4]
+          vfmsub213ps %xmm28, %xmm27, %xmm22 {%k2}
+
+// CHECK: vfmsub213ps %xmm28, %xmm27, %xmm22 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0x25,0x82,0xaa,0xf4]
+          vfmsub213ps %xmm28, %xmm27, %xmm22 {%k2} {z}
+
+// CHECK: vfmsub213ps (%rcx), %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0xaa,0x31]
+          vfmsub213ps (%rcx), %xmm27, %xmm22
+
+// CHECK: vfmsub213ps 291(%rax,%r14,8), %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xa2,0x25,0x00,0xaa,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub213ps 291(%rax,%r14,8), %xmm27, %xmm22
+
+// CHECK: vfmsub213ps (%rcx){1to4}, %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0xaa,0x31]
+          vfmsub213ps (%rcx){1to4}, %xmm27, %xmm22
+
+// CHECK: vfmsub213ps 2032(%rdx), %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0xaa,0x72,0x7f]
+          vfmsub213ps 2032(%rdx), %xmm27, %xmm22
+
+// CHECK: vfmsub213ps 2048(%rdx), %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0xaa,0xb2,0x00,0x08,0x00,0x00]
+          vfmsub213ps 2048(%rdx), %xmm27, %xmm22
+
+// CHECK: vfmsub213ps -2048(%rdx), %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0xaa,0x72,0x80]
+          vfmsub213ps -2048(%rdx), %xmm27, %xmm22
+
+// CHECK: vfmsub213ps -2064(%rdx), %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0xaa,0xb2,0xf0,0xf7,0xff,0xff]
+          vfmsub213ps -2064(%rdx), %xmm27, %xmm22
+
+// CHECK: vfmsub213ps 508(%rdx){1to4}, %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0xaa,0x72,0x7f]
+          vfmsub213ps 508(%rdx){1to4}, %xmm27, %xmm22
+
+// CHECK: vfmsub213ps 512(%rdx){1to4}, %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0xaa,0xb2,0x00,0x02,0x00,0x00]
+          vfmsub213ps 512(%rdx){1to4}, %xmm27, %xmm22
+
+// CHECK: vfmsub213ps -512(%rdx){1to4}, %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0xaa,0x72,0x80]
+          vfmsub213ps -512(%rdx){1to4}, %xmm27, %xmm22
+
+// CHECK: vfmsub213ps -516(%rdx){1to4}, %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0xaa,0xb2,0xfc,0xfd,0xff,0xff]
+          vfmsub213ps -516(%rdx){1to4}, %xmm27, %xmm22
+
+// CHECK: vfmsub213ps %ymm22, %ymm29, %ymm28
+// CHECK:  encoding: [0x62,0x22,0x15,0x20,0xaa,0xe6]
+          vfmsub213ps %ymm22, %ymm29, %ymm28
+
+// CHECK: vfmsub213ps %ymm22, %ymm29, %ymm28 {%k1}
+// CHECK:  encoding: [0x62,0x22,0x15,0x21,0xaa,0xe6]
+          vfmsub213ps %ymm22, %ymm29, %ymm28 {%k1}
+
+// CHECK: vfmsub213ps %ymm22, %ymm29, %ymm28 {%k1} {z}
+// CHECK:  encoding: [0x62,0x22,0x15,0xa1,0xaa,0xe6]
+          vfmsub213ps %ymm22, %ymm29, %ymm28 {%k1} {z}
+
+// CHECK: vfmsub213ps (%rcx), %ymm29, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x15,0x20,0xaa,0x21]
+          vfmsub213ps (%rcx), %ymm29, %ymm28
+
+// CHECK: vfmsub213ps 291(%rax,%r14,8), %ymm29, %ymm28
+// CHECK:  encoding: [0x62,0x22,0x15,0x20,0xaa,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub213ps 291(%rax,%r14,8), %ymm29, %ymm28
+
+// CHECK: vfmsub213ps (%rcx){1to8}, %ymm29, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x15,0x30,0xaa,0x21]
+          vfmsub213ps (%rcx){1to8}, %ymm29, %ymm28
+
+// CHECK: vfmsub213ps 4064(%rdx), %ymm29, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x15,0x20,0xaa,0x62,0x7f]
+          vfmsub213ps 4064(%rdx), %ymm29, %ymm28
+
+// CHECK: vfmsub213ps 4096(%rdx), %ymm29, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x15,0x20,0xaa,0xa2,0x00,0x10,0x00,0x00]
+          vfmsub213ps 4096(%rdx), %ymm29, %ymm28
+
+// CHECK: vfmsub213ps -4096(%rdx), %ymm29, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x15,0x20,0xaa,0x62,0x80]
+          vfmsub213ps -4096(%rdx), %ymm29, %ymm28
+
+// CHECK: vfmsub213ps -4128(%rdx), %ymm29, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x15,0x20,0xaa,0xa2,0xe0,0xef,0xff,0xff]
+          vfmsub213ps -4128(%rdx), %ymm29, %ymm28
+
+// CHECK: vfmsub213ps 508(%rdx){1to8}, %ymm29, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x15,0x30,0xaa,0x62,0x7f]
+          vfmsub213ps 508(%rdx){1to8}, %ymm29, %ymm28
+
+// CHECK: vfmsub213ps 512(%rdx){1to8}, %ymm29, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x15,0x30,0xaa,0xa2,0x00,0x02,0x00,0x00]
+          vfmsub213ps 512(%rdx){1to8}, %ymm29, %ymm28
+
+// CHECK: vfmsub213ps -512(%rdx){1to8}, %ymm29, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x15,0x30,0xaa,0x62,0x80]
+          vfmsub213ps -512(%rdx){1to8}, %ymm29, %ymm28
+
+// CHECK: vfmsub213ps -516(%rdx){1to8}, %ymm29, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x15,0x30,0xaa,0xa2,0xfc,0xfd,0xff,0xff]
+          vfmsub213ps -516(%rdx){1to8}, %ymm29, %ymm28
+
+// CHECK: vfmsub213pd %xmm20, %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0xe5,0x00,0xaa,0xdc]
+          vfmsub213pd %xmm20, %xmm19, %xmm19
+
+// CHECK: vfmsub213pd %xmm20, %xmm19, %xmm19 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xe5,0x01,0xaa,0xdc]
+          vfmsub213pd %xmm20, %xmm19, %xmm19 {%k1}
+
+// CHECK: vfmsub213pd %xmm20, %xmm19, %xmm19 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0xe5,0x81,0xaa,0xdc]
+          vfmsub213pd %xmm20, %xmm19, %xmm19 {%k1} {z}
+
+// CHECK: vfmsub213pd (%rcx), %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x19]
+          vfmsub213pd (%rcx), %xmm19, %xmm19
+
+// CHECK: vfmsub213pd 291(%rax,%r14,8), %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0xe5,0x00,0xaa,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub213pd 291(%rax,%r14,8), %xmm19, %xmm19
+
+// CHECK: vfmsub213pd (%rcx){1to2}, %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x19]
+          vfmsub213pd (%rcx){1to2}, %xmm19, %xmm19
+
+// CHECK: vfmsub213pd 2032(%rdx), %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x5a,0x7f]
+          vfmsub213pd 2032(%rdx), %xmm19, %xmm19
+
+// CHECK: vfmsub213pd 2048(%rdx), %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x9a,0x00,0x08,0x00,0x00]
+          vfmsub213pd 2048(%rdx), %xmm19, %xmm19
+
+// CHECK: vfmsub213pd -2048(%rdx), %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x5a,0x80]
+          vfmsub213pd -2048(%rdx), %xmm19, %xmm19
+
+// CHECK: vfmsub213pd -2064(%rdx), %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x00,0xaa,0x9a,0xf0,0xf7,0xff,0xff]
+          vfmsub213pd -2064(%rdx), %xmm19, %xmm19
+
+// CHECK: vfmsub213pd 1016(%rdx){1to2}, %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x5a,0x7f]
+          vfmsub213pd 1016(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vfmsub213pd 1024(%rdx){1to2}, %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x9a,0x00,0x04,0x00,0x00]
+          vfmsub213pd 1024(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vfmsub213pd -1024(%rdx){1to2}, %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x5a,0x80]
+          vfmsub213pd -1024(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vfmsub213pd -1032(%rdx){1to2}, %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x10,0xaa,0x9a,0xf8,0xfb,0xff,0xff]
+          vfmsub213pd -1032(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vfmsub213pd %ymm28, %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x02,0xb5,0x20,0xaa,0xdc]
+          vfmsub213pd %ymm28, %ymm25, %ymm27
+
+// CHECK: vfmsub213pd %ymm28, %ymm25, %ymm27 {%k4}
+// CHECK:  encoding: [0x62,0x02,0xb5,0x24,0xaa,0xdc]
+          vfmsub213pd %ymm28, %ymm25, %ymm27 {%k4}
+
+// CHECK: vfmsub213pd %ymm28, %ymm25, %ymm27 {%k4} {z}
+// CHECK:  encoding: [0x62,0x02,0xb5,0xa4,0xaa,0xdc]
+          vfmsub213pd %ymm28, %ymm25, %ymm27 {%k4} {z}
+
+// CHECK: vfmsub213pd (%rcx), %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0xaa,0x19]
+          vfmsub213pd (%rcx), %ymm25, %ymm27
+
+// CHECK: vfmsub213pd 291(%rax,%r14,8), %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x22,0xb5,0x20,0xaa,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub213pd 291(%rax,%r14,8), %ymm25, %ymm27
+
+// CHECK: vfmsub213pd (%rcx){1to4}, %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0xaa,0x19]
+          vfmsub213pd (%rcx){1to4}, %ymm25, %ymm27
+
+// CHECK: vfmsub213pd 4064(%rdx), %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0xaa,0x5a,0x7f]
+          vfmsub213pd 4064(%rdx), %ymm25, %ymm27
+
+// CHECK: vfmsub213pd 4096(%rdx), %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0xaa,0x9a,0x00,0x10,0x00,0x00]
+          vfmsub213pd 4096(%rdx), %ymm25, %ymm27
+
+// CHECK: vfmsub213pd -4096(%rdx), %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0xaa,0x5a,0x80]
+          vfmsub213pd -4096(%rdx), %ymm25, %ymm27
+
+// CHECK: vfmsub213pd -4128(%rdx), %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0xaa,0x9a,0xe0,0xef,0xff,0xff]
+          vfmsub213pd -4128(%rdx), %ymm25, %ymm27
+
+// CHECK: vfmsub213pd 1016(%rdx){1to4}, %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0xaa,0x5a,0x7f]
+          vfmsub213pd 1016(%rdx){1to4}, %ymm25, %ymm27
+
+// CHECK: vfmsub213pd 1024(%rdx){1to4}, %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0xaa,0x9a,0x00,0x04,0x00,0x00]
+          vfmsub213pd 1024(%rdx){1to4}, %ymm25, %ymm27
+
+// CHECK: vfmsub213pd -1024(%rdx){1to4}, %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0xaa,0x5a,0x80]
+          vfmsub213pd -1024(%rdx){1to4}, %ymm25, %ymm27
+
+// CHECK: vfmsub213pd -1032(%rdx){1to4}, %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0xaa,0x9a,0xf8,0xfb,0xff,0xff]
+          vfmsub213pd -1032(%rdx){1to4}, %ymm25, %ymm27
+
+// CHECK: vfmsub231ps %xmm25, %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x02,0x65,0x00,0xba,0xe1]
+          vfmsub231ps %xmm25, %xmm19, %xmm28
+
+// CHECK: vfmsub231ps %xmm25, %xmm19, %xmm28 {%k1}
+// CHECK:  encoding: [0x62,0x02,0x65,0x01,0xba,0xe1]
+          vfmsub231ps %xmm25, %xmm19, %xmm28 {%k1}
+
+// CHECK: vfmsub231ps %xmm25, %xmm19, %xmm28 {%k1} {z}
+// CHECK:  encoding: [0x62,0x02,0x65,0x81,0xba,0xe1]
+          vfmsub231ps %xmm25, %xmm19, %xmm28 {%k1} {z}
+
+// CHECK: vfmsub231ps (%rcx), %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x65,0x00,0xba,0x21]
+          vfmsub231ps (%rcx), %xmm19, %xmm28
+
+// CHECK: vfmsub231ps 291(%rax,%r14,8), %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x22,0x65,0x00,0xba,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub231ps 291(%rax,%r14,8), %xmm19, %xmm28
+
+// CHECK: vfmsub231ps (%rcx){1to4}, %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x65,0x10,0xba,0x21]
+          vfmsub231ps (%rcx){1to4}, %xmm19, %xmm28
+
+// CHECK: vfmsub231ps 2032(%rdx), %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x65,0x00,0xba,0x62,0x7f]
+          vfmsub231ps 2032(%rdx), %xmm19, %xmm28
+
+// CHECK: vfmsub231ps 2048(%rdx), %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x65,0x00,0xba,0xa2,0x00,0x08,0x00,0x00]
+          vfmsub231ps 2048(%rdx), %xmm19, %xmm28
+
+// CHECK: vfmsub231ps -2048(%rdx), %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x65,0x00,0xba,0x62,0x80]
+          vfmsub231ps -2048(%rdx), %xmm19, %xmm28
+
+// CHECK: vfmsub231ps -2064(%rdx), %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x65,0x00,0xba,0xa2,0xf0,0xf7,0xff,0xff]
+          vfmsub231ps -2064(%rdx), %xmm19, %xmm28
+
+// CHECK: vfmsub231ps 508(%rdx){1to4}, %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x65,0x10,0xba,0x62,0x7f]
+          vfmsub231ps 508(%rdx){1to4}, %xmm19, %xmm28
+
+// CHECK: vfmsub231ps 512(%rdx){1to4}, %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x65,0x10,0xba,0xa2,0x00,0x02,0x00,0x00]
+          vfmsub231ps 512(%rdx){1to4}, %xmm19, %xmm28
+
+// CHECK: vfmsub231ps -512(%rdx){1to4}, %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x65,0x10,0xba,0x62,0x80]
+          vfmsub231ps -512(%rdx){1to4}, %xmm19, %xmm28
+
+// CHECK: vfmsub231ps -516(%rdx){1to4}, %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x65,0x10,0xba,0xa2,0xfc,0xfd,0xff,0xff]
+          vfmsub231ps -516(%rdx){1to4}, %xmm19, %xmm28
+
+// CHECK: vfmsub231ps %ymm26, %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0x82,0x2d,0x20,0xba,0xfa]
+          vfmsub231ps %ymm26, %ymm26, %ymm23
+
+// CHECK: vfmsub231ps %ymm26, %ymm26, %ymm23 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x2d,0x21,0xba,0xfa]
+          vfmsub231ps %ymm26, %ymm26, %ymm23 {%k1}
+
+// CHECK: vfmsub231ps %ymm26, %ymm26, %ymm23 {%k1} {z}
+// CHECK:  encoding: [0x62,0x82,0x2d,0xa1,0xba,0xfa]
+          vfmsub231ps %ymm26, %ymm26, %ymm23 {%k1} {z}
+
+// CHECK: vfmsub231ps (%rcx), %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0xba,0x39]
+          vfmsub231ps (%rcx), %ymm26, %ymm23
+
+// CHECK: vfmsub231ps 291(%rax,%r14,8), %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xa2,0x2d,0x20,0xba,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub231ps 291(%rax,%r14,8), %ymm26, %ymm23
+
+// CHECK: vfmsub231ps (%rcx){1to8}, %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0xba,0x39]
+          vfmsub231ps (%rcx){1to8}, %ymm26, %ymm23
+
+// CHECK: vfmsub231ps 4064(%rdx), %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0xba,0x7a,0x7f]
+          vfmsub231ps 4064(%rdx), %ymm26, %ymm23
+
+// CHECK: vfmsub231ps 4096(%rdx), %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0xba,0xba,0x00,0x10,0x00,0x00]
+          vfmsub231ps 4096(%rdx), %ymm26, %ymm23
+
+// CHECK: vfmsub231ps -4096(%rdx), %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0xba,0x7a,0x80]
+          vfmsub231ps -4096(%rdx), %ymm26, %ymm23
+
+// CHECK: vfmsub231ps -4128(%rdx), %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0xba,0xba,0xe0,0xef,0xff,0xff]
+          vfmsub231ps -4128(%rdx), %ymm26, %ymm23
+
+// CHECK: vfmsub231ps 508(%rdx){1to8}, %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0xba,0x7a,0x7f]
+          vfmsub231ps 508(%rdx){1to8}, %ymm26, %ymm23
+
+// CHECK: vfmsub231ps 512(%rdx){1to8}, %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0xba,0xba,0x00,0x02,0x00,0x00]
+          vfmsub231ps 512(%rdx){1to8}, %ymm26, %ymm23
+
+// CHECK: vfmsub231ps -512(%rdx){1to8}, %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0xba,0x7a,0x80]
+          vfmsub231ps -512(%rdx){1to8}, %ymm26, %ymm23
+
+// CHECK: vfmsub231ps -516(%rdx){1to8}, %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0xba,0xba,0xfc,0xfd,0xff,0xff]
+          vfmsub231ps -516(%rdx){1to8}, %ymm26, %ymm23
+
+// CHECK: vfmsub231pd %xmm23, %xmm20, %xmm28
+// CHECK:  encoding: [0x62,0x22,0xdd,0x00,0xba,0xe7]
+          vfmsub231pd %xmm23, %xmm20, %xmm28
+
+// CHECK: vfmsub231pd %xmm23, %xmm20, %xmm28 {%k4}
+// CHECK:  encoding: [0x62,0x22,0xdd,0x04,0xba,0xe7]
+          vfmsub231pd %xmm23, %xmm20, %xmm28 {%k4}
+
+// CHECK: vfmsub231pd %xmm23, %xmm20, %xmm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0xdd,0x84,0xba,0xe7]
+          vfmsub231pd %xmm23, %xmm20, %xmm28 {%k4} {z}
+
+// CHECK: vfmsub231pd (%rcx), %xmm20, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0xba,0x21]
+          vfmsub231pd (%rcx), %xmm20, %xmm28
+
+// CHECK: vfmsub231pd 291(%rax,%r14,8), %xmm20, %xmm28
+// CHECK:  encoding: [0x62,0x22,0xdd,0x00,0xba,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub231pd 291(%rax,%r14,8), %xmm20, %xmm28
+
+// CHECK: vfmsub231pd (%rcx){1to2}, %xmm20, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0xba,0x21]
+          vfmsub231pd (%rcx){1to2}, %xmm20, %xmm28
+
+// CHECK: vfmsub231pd 2032(%rdx), %xmm20, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0xba,0x62,0x7f]
+          vfmsub231pd 2032(%rdx), %xmm20, %xmm28
+
+// CHECK: vfmsub231pd 2048(%rdx), %xmm20, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0xba,0xa2,0x00,0x08,0x00,0x00]
+          vfmsub231pd 2048(%rdx), %xmm20, %xmm28
+
+// CHECK: vfmsub231pd -2048(%rdx), %xmm20, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0xba,0x62,0x80]
+          vfmsub231pd -2048(%rdx), %xmm20, %xmm28
+
+// CHECK: vfmsub231pd -2064(%rdx), %xmm20, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0xba,0xa2,0xf0,0xf7,0xff,0xff]
+          vfmsub231pd -2064(%rdx), %xmm20, %xmm28
+
+// CHECK: vfmsub231pd 1016(%rdx){1to2}, %xmm20, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0xba,0x62,0x7f]
+          vfmsub231pd 1016(%rdx){1to2}, %xmm20, %xmm28
+
+// CHECK: vfmsub231pd 1024(%rdx){1to2}, %xmm20, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0xba,0xa2,0x00,0x04,0x00,0x00]
+          vfmsub231pd 1024(%rdx){1to2}, %xmm20, %xmm28
+
+// CHECK: vfmsub231pd -1024(%rdx){1to2}, %xmm20, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0xba,0x62,0x80]
+          vfmsub231pd -1024(%rdx){1to2}, %xmm20, %xmm28
+
+// CHECK: vfmsub231pd -1032(%rdx){1to2}, %xmm20, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0xba,0xa2,0xf8,0xfb,0xff,0xff]
+          vfmsub231pd -1032(%rdx){1to2}, %xmm20, %xmm28
+
+// CHECK: vfmsub231pd %ymm22, %ymm18, %ymm17
+// CHECK:  encoding: [0x62,0xa2,0xed,0x20,0xba,0xce]
+          vfmsub231pd %ymm22, %ymm18, %ymm17
+
+// CHECK: vfmsub231pd %ymm22, %ymm18, %ymm17 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0xed,0x22,0xba,0xce]
+          vfmsub231pd %ymm22, %ymm18, %ymm17 {%k2}
+
+// CHECK: vfmsub231pd %ymm22, %ymm18, %ymm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0xed,0xa2,0xba,0xce]
+          vfmsub231pd %ymm22, %ymm18, %ymm17 {%k2} {z}
+
+// CHECK: vfmsub231pd (%rcx), %ymm18, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xed,0x20,0xba,0x09]
+          vfmsub231pd (%rcx), %ymm18, %ymm17
+
+// CHECK: vfmsub231pd 291(%rax,%r14,8), %ymm18, %ymm17
+// CHECK:  encoding: [0x62,0xa2,0xed,0x20,0xba,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vfmsub231pd 291(%rax,%r14,8), %ymm18, %ymm17
+
+// CHECK: vfmsub231pd (%rcx){1to4}, %ymm18, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xed,0x30,0xba,0x09]
+          vfmsub231pd (%rcx){1to4}, %ymm18, %ymm17
+
+// CHECK: vfmsub231pd 4064(%rdx), %ymm18, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xed,0x20,0xba,0x4a,0x7f]
+          vfmsub231pd 4064(%rdx), %ymm18, %ymm17
+
+// CHECK: vfmsub231pd 4096(%rdx), %ymm18, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xed,0x20,0xba,0x8a,0x00,0x10,0x00,0x00]
+          vfmsub231pd 4096(%rdx), %ymm18, %ymm17
+
+// CHECK: vfmsub231pd -4096(%rdx), %ymm18, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xed,0x20,0xba,0x4a,0x80]
+          vfmsub231pd -4096(%rdx), %ymm18, %ymm17
+
+// CHECK: vfmsub231pd -4128(%rdx), %ymm18, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xed,0x20,0xba,0x8a,0xe0,0xef,0xff,0xff]
+          vfmsub231pd -4128(%rdx), %ymm18, %ymm17
+
+// CHECK: vfmsub231pd 1016(%rdx){1to4}, %ymm18, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xed,0x30,0xba,0x4a,0x7f]
+          vfmsub231pd 1016(%rdx){1to4}, %ymm18, %ymm17
+
+// CHECK: vfmsub231pd 1024(%rdx){1to4}, %ymm18, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xed,0x30,0xba,0x8a,0x00,0x04,0x00,0x00]
+          vfmsub231pd 1024(%rdx){1to4}, %ymm18, %ymm17
+
+// CHECK: vfmsub231pd -1024(%rdx){1to4}, %ymm18, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xed,0x30,0xba,0x4a,0x80]
+          vfmsub231pd -1024(%rdx){1to4}, %ymm18, %ymm17
+
+// CHECK: vfmsub231pd -1032(%rdx){1to4}, %ymm18, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xed,0x30,0xba,0x8a,0xf8,0xfb,0xff,0xff]
+          vfmsub231pd -1032(%rdx){1to4}, %ymm18, %ymm17
+
+// CHECK: vfmaddsub132ps %xmm18, %xmm27, %xmm24
+// CHECK:  encoding: [0x62,0x22,0x25,0x00,0x96,0xc2]
+          vfmaddsub132ps %xmm18, %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps %xmm18, %xmm27, %xmm24 {%k2}
+// CHECK:  encoding: [0x62,0x22,0x25,0x02,0x96,0xc2]
+          vfmaddsub132ps %xmm18, %xmm27, %xmm24 {%k2}
+
+// CHECK: vfmaddsub132ps %xmm18, %xmm27, %xmm24 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0x25,0x82,0x96,0xc2]
+          vfmaddsub132ps %xmm18, %xmm27, %xmm24 {%k2} {z}
+
+// CHECK: vfmaddsub132ps (%rcx), %xmm27, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x25,0x00,0x96,0x01]
+          vfmaddsub132ps (%rcx), %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps 291(%rax,%r14,8), %xmm27, %xmm24
+// CHECK:  encoding: [0x62,0x22,0x25,0x00,0x96,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub132ps 291(%rax,%r14,8), %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps (%rcx){1to4}, %xmm27, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x25,0x10,0x96,0x01]
+          vfmaddsub132ps (%rcx){1to4}, %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps 2032(%rdx), %xmm27, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x25,0x00,0x96,0x42,0x7f]
+          vfmaddsub132ps 2032(%rdx), %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps 2048(%rdx), %xmm27, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x25,0x00,0x96,0x82,0x00,0x08,0x00,0x00]
+          vfmaddsub132ps 2048(%rdx), %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps -2048(%rdx), %xmm27, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x25,0x00,0x96,0x42,0x80]
+          vfmaddsub132ps -2048(%rdx), %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps -2064(%rdx), %xmm27, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x25,0x00,0x96,0x82,0xf0,0xf7,0xff,0xff]
+          vfmaddsub132ps -2064(%rdx), %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps 508(%rdx){1to4}, %xmm27, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x25,0x10,0x96,0x42,0x7f]
+          vfmaddsub132ps 508(%rdx){1to4}, %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps 512(%rdx){1to4}, %xmm27, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x25,0x10,0x96,0x82,0x00,0x02,0x00,0x00]
+          vfmaddsub132ps 512(%rdx){1to4}, %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps -512(%rdx){1to4}, %xmm27, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x25,0x10,0x96,0x42,0x80]
+          vfmaddsub132ps -512(%rdx){1to4}, %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps -516(%rdx){1to4}, %xmm27, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x25,0x10,0x96,0x82,0xfc,0xfd,0xff,0xff]
+          vfmaddsub132ps -516(%rdx){1to4}, %xmm27, %xmm24
+
+// CHECK: vfmaddsub132ps %ymm24, %ymm20, %ymm21
+// CHECK:  encoding: [0x62,0x82,0x5d,0x20,0x96,0xe8]
+          vfmaddsub132ps %ymm24, %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps %ymm24, %ymm20, %ymm21 {%k5}
+// CHECK:  encoding: [0x62,0x82,0x5d,0x25,0x96,0xe8]
+          vfmaddsub132ps %ymm24, %ymm20, %ymm21 {%k5}
+
+// CHECK: vfmaddsub132ps %ymm24, %ymm20, %ymm21 {%k5} {z}
+// CHECK:  encoding: [0x62,0x82,0x5d,0xa5,0x96,0xe8]
+          vfmaddsub132ps %ymm24, %ymm20, %ymm21 {%k5} {z}
+
+// CHECK: vfmaddsub132ps (%rcx), %ymm20, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x20,0x96,0x29]
+          vfmaddsub132ps (%rcx), %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps 291(%rax,%r14,8), %ymm20, %ymm21
+// CHECK:  encoding: [0x62,0xa2,0x5d,0x20,0x96,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub132ps 291(%rax,%r14,8), %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps (%rcx){1to8}, %ymm20, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x30,0x96,0x29]
+          vfmaddsub132ps (%rcx){1to8}, %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps 4064(%rdx), %ymm20, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x20,0x96,0x6a,0x7f]
+          vfmaddsub132ps 4064(%rdx), %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps 4096(%rdx), %ymm20, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x20,0x96,0xaa,0x00,0x10,0x00,0x00]
+          vfmaddsub132ps 4096(%rdx), %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps -4096(%rdx), %ymm20, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x20,0x96,0x6a,0x80]
+          vfmaddsub132ps -4096(%rdx), %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps -4128(%rdx), %ymm20, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x20,0x96,0xaa,0xe0,0xef,0xff,0xff]
+          vfmaddsub132ps -4128(%rdx), %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps 508(%rdx){1to8}, %ymm20, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x30,0x96,0x6a,0x7f]
+          vfmaddsub132ps 508(%rdx){1to8}, %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps 512(%rdx){1to8}, %ymm20, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x30,0x96,0xaa,0x00,0x02,0x00,0x00]
+          vfmaddsub132ps 512(%rdx){1to8}, %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps -512(%rdx){1to8}, %ymm20, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x30,0x96,0x6a,0x80]
+          vfmaddsub132ps -512(%rdx){1to8}, %ymm20, %ymm21
+
+// CHECK: vfmaddsub132ps -516(%rdx){1to8}, %ymm20, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x30,0x96,0xaa,0xfc,0xfd,0xff,0xff]
+          vfmaddsub132ps -516(%rdx){1to8}, %ymm20, %ymm21
+
+// CHECK: vfmaddsub132pd %xmm20, %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x22,0xa5,0x00,0x96,0xd4]
+          vfmaddsub132pd %xmm20, %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd %xmm20, %xmm27, %xmm26 {%k6}
+// CHECK:  encoding: [0x62,0x22,0xa5,0x06,0x96,0xd4]
+          vfmaddsub132pd %xmm20, %xmm27, %xmm26 {%k6}
+
+// CHECK: vfmaddsub132pd %xmm20, %xmm27, %xmm26 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0xa5,0x86,0x96,0xd4]
+          vfmaddsub132pd %xmm20, %xmm27, %xmm26 {%k6} {z}
+
+// CHECK: vfmaddsub132pd (%rcx), %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x96,0x11]
+          vfmaddsub132pd (%rcx), %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd 291(%rax,%r14,8), %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x22,0xa5,0x00,0x96,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub132pd 291(%rax,%r14,8), %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd (%rcx){1to2}, %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x96,0x11]
+          vfmaddsub132pd (%rcx){1to2}, %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd 2032(%rdx), %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x96,0x52,0x7f]
+          vfmaddsub132pd 2032(%rdx), %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd 2048(%rdx), %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x96,0x92,0x00,0x08,0x00,0x00]
+          vfmaddsub132pd 2048(%rdx), %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd -2048(%rdx), %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x96,0x52,0x80]
+          vfmaddsub132pd -2048(%rdx), %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd -2064(%rdx), %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x96,0x92,0xf0,0xf7,0xff,0xff]
+          vfmaddsub132pd -2064(%rdx), %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd 1016(%rdx){1to2}, %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x96,0x52,0x7f]
+          vfmaddsub132pd 1016(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd 1024(%rdx){1to2}, %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x96,0x92,0x00,0x04,0x00,0x00]
+          vfmaddsub132pd 1024(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd -1024(%rdx){1to2}, %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x96,0x52,0x80]
+          vfmaddsub132pd -1024(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd -1032(%rdx){1to2}, %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x96,0x92,0xf8,0xfb,0xff,0xff]
+          vfmaddsub132pd -1032(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vfmaddsub132pd %ymm28, %ymm18, %ymm30
+// CHECK:  encoding: [0x62,0x02,0xed,0x20,0x96,0xf4]
+          vfmaddsub132pd %ymm28, %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd %ymm28, %ymm18, %ymm30 {%k2}
+// CHECK:  encoding: [0x62,0x02,0xed,0x22,0x96,0xf4]
+          vfmaddsub132pd %ymm28, %ymm18, %ymm30 {%k2}
+
+// CHECK: vfmaddsub132pd %ymm28, %ymm18, %ymm30 {%k2} {z}
+// CHECK:  encoding: [0x62,0x02,0xed,0xa2,0x96,0xf4]
+          vfmaddsub132pd %ymm28, %ymm18, %ymm30 {%k2} {z}
+
+// CHECK: vfmaddsub132pd (%rcx), %ymm18, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xed,0x20,0x96,0x31]
+          vfmaddsub132pd (%rcx), %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd 291(%rax,%r14,8), %ymm18, %ymm30
+// CHECK:  encoding: [0x62,0x22,0xed,0x20,0x96,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub132pd 291(%rax,%r14,8), %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd (%rcx){1to4}, %ymm18, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xed,0x30,0x96,0x31]
+          vfmaddsub132pd (%rcx){1to4}, %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd 4064(%rdx), %ymm18, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xed,0x20,0x96,0x72,0x7f]
+          vfmaddsub132pd 4064(%rdx), %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd 4096(%rdx), %ymm18, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xed,0x20,0x96,0xb2,0x00,0x10,0x00,0x00]
+          vfmaddsub132pd 4096(%rdx), %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd -4096(%rdx), %ymm18, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xed,0x20,0x96,0x72,0x80]
+          vfmaddsub132pd -4096(%rdx), %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd -4128(%rdx), %ymm18, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xed,0x20,0x96,0xb2,0xe0,0xef,0xff,0xff]
+          vfmaddsub132pd -4128(%rdx), %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd 1016(%rdx){1to4}, %ymm18, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xed,0x30,0x96,0x72,0x7f]
+          vfmaddsub132pd 1016(%rdx){1to4}, %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd 1024(%rdx){1to4}, %ymm18, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xed,0x30,0x96,0xb2,0x00,0x04,0x00,0x00]
+          vfmaddsub132pd 1024(%rdx){1to4}, %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd -1024(%rdx){1to4}, %ymm18, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xed,0x30,0x96,0x72,0x80]
+          vfmaddsub132pd -1024(%rdx){1to4}, %ymm18, %ymm30
+
+// CHECK: vfmaddsub132pd -1032(%rdx){1to4}, %ymm18, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xed,0x30,0x96,0xb2,0xf8,0xfb,0xff,0xff]
+          vfmaddsub132pd -1032(%rdx){1to4}, %ymm18, %ymm30
+
+// CHECK: vfmaddsub213ps %xmm17, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xa2,0x15,0x00,0xa6,0xe1]
+          vfmaddsub213ps %xmm17, %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps %xmm17, %xmm29, %xmm20 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x15,0x01,0xa6,0xe1]
+          vfmaddsub213ps %xmm17, %xmm29, %xmm20 {%k1}
+
+// CHECK: vfmaddsub213ps %xmm17, %xmm29, %xmm20 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0x15,0x81,0xa6,0xe1]
+          vfmaddsub213ps %xmm17, %xmm29, %xmm20 {%k1} {z}
+
+// CHECK: vfmaddsub213ps (%rcx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0xa6,0x21]
+          vfmaddsub213ps (%rcx), %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps 291(%rax,%r14,8), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xa2,0x15,0x00,0xa6,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub213ps 291(%rax,%r14,8), %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps (%rcx){1to4}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0xa6,0x21]
+          vfmaddsub213ps (%rcx){1to4}, %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps 2032(%rdx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0xa6,0x62,0x7f]
+          vfmaddsub213ps 2032(%rdx), %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps 2048(%rdx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0xa6,0xa2,0x00,0x08,0x00,0x00]
+          vfmaddsub213ps 2048(%rdx), %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps -2048(%rdx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0xa6,0x62,0x80]
+          vfmaddsub213ps -2048(%rdx), %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps -2064(%rdx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0xa6,0xa2,0xf0,0xf7,0xff,0xff]
+          vfmaddsub213ps -2064(%rdx), %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps 508(%rdx){1to4}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0xa6,0x62,0x7f]
+          vfmaddsub213ps 508(%rdx){1to4}, %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps 512(%rdx){1to4}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0xa6,0xa2,0x00,0x02,0x00,0x00]
+          vfmaddsub213ps 512(%rdx){1to4}, %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps -512(%rdx){1to4}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0xa6,0x62,0x80]
+          vfmaddsub213ps -512(%rdx){1to4}, %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps -516(%rdx){1to4}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0xa6,0xa2,0xfc,0xfd,0xff,0xff]
+          vfmaddsub213ps -516(%rdx){1to4}, %xmm29, %xmm20
+
+// CHECK: vfmaddsub213ps %ymm23, %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x25,0x20,0xa6,0xcf]
+          vfmaddsub213ps %ymm23, %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps %ymm23, %ymm27, %ymm25 {%k4}
+// CHECK:  encoding: [0x62,0x22,0x25,0x24,0xa6,0xcf]
+          vfmaddsub213ps %ymm23, %ymm27, %ymm25 {%k4}
+
+// CHECK: vfmaddsub213ps %ymm23, %ymm27, %ymm25 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0x25,0xa4,0xa6,0xcf]
+          vfmaddsub213ps %ymm23, %ymm27, %ymm25 {%k4} {z}
+
+// CHECK: vfmaddsub213ps (%rcx), %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x25,0x20,0xa6,0x09]
+          vfmaddsub213ps (%rcx), %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps 291(%rax,%r14,8), %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x25,0x20,0xa6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub213ps 291(%rax,%r14,8), %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps (%rcx){1to8}, %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x25,0x30,0xa6,0x09]
+          vfmaddsub213ps (%rcx){1to8}, %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps 4064(%rdx), %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x25,0x20,0xa6,0x4a,0x7f]
+          vfmaddsub213ps 4064(%rdx), %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps 4096(%rdx), %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x25,0x20,0xa6,0x8a,0x00,0x10,0x00,0x00]
+          vfmaddsub213ps 4096(%rdx), %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps -4096(%rdx), %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x25,0x20,0xa6,0x4a,0x80]
+          vfmaddsub213ps -4096(%rdx), %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps -4128(%rdx), %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x25,0x20,0xa6,0x8a,0xe0,0xef,0xff,0xff]
+          vfmaddsub213ps -4128(%rdx), %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps 508(%rdx){1to8}, %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x25,0x30,0xa6,0x4a,0x7f]
+          vfmaddsub213ps 508(%rdx){1to8}, %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps 512(%rdx){1to8}, %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x25,0x30,0xa6,0x8a,0x00,0x02,0x00,0x00]
+          vfmaddsub213ps 512(%rdx){1to8}, %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps -512(%rdx){1to8}, %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x25,0x30,0xa6,0x4a,0x80]
+          vfmaddsub213ps -512(%rdx){1to8}, %ymm27, %ymm25
+
+// CHECK: vfmaddsub213ps -516(%rdx){1to8}, %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x25,0x30,0xa6,0x8a,0xfc,0xfd,0xff,0xff]
+          vfmaddsub213ps -516(%rdx){1to8}, %ymm27, %ymm25
+
+// CHECK: vfmaddsub213pd %xmm19, %xmm29, %xmm25
+// CHECK:  encoding: [0x62,0x22,0x95,0x00,0xa6,0xcb]
+          vfmaddsub213pd %xmm19, %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd %xmm19, %xmm29, %xmm25 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x95,0x07,0xa6,0xcb]
+          vfmaddsub213pd %xmm19, %xmm29, %xmm25 {%k7}
+
+// CHECK: vfmaddsub213pd %xmm19, %xmm29, %xmm25 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x95,0x87,0xa6,0xcb]
+          vfmaddsub213pd %xmm19, %xmm29, %xmm25 {%k7} {z}
+
+// CHECK: vfmaddsub213pd (%rcx), %xmm29, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x00,0xa6,0x09]
+          vfmaddsub213pd (%rcx), %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd 291(%rax,%r14,8), %xmm29, %xmm25
+// CHECK:  encoding: [0x62,0x22,0x95,0x00,0xa6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub213pd 291(%rax,%r14,8), %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd (%rcx){1to2}, %xmm29, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x10,0xa6,0x09]
+          vfmaddsub213pd (%rcx){1to2}, %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd 2032(%rdx), %xmm29, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x00,0xa6,0x4a,0x7f]
+          vfmaddsub213pd 2032(%rdx), %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd 2048(%rdx), %xmm29, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x00,0xa6,0x8a,0x00,0x08,0x00,0x00]
+          vfmaddsub213pd 2048(%rdx), %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd -2048(%rdx), %xmm29, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x00,0xa6,0x4a,0x80]
+          vfmaddsub213pd -2048(%rdx), %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd -2064(%rdx), %xmm29, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x00,0xa6,0x8a,0xf0,0xf7,0xff,0xff]
+          vfmaddsub213pd -2064(%rdx), %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd 1016(%rdx){1to2}, %xmm29, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x10,0xa6,0x4a,0x7f]
+          vfmaddsub213pd 1016(%rdx){1to2}, %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd 1024(%rdx){1to2}, %xmm29, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x10,0xa6,0x8a,0x00,0x04,0x00,0x00]
+          vfmaddsub213pd 1024(%rdx){1to2}, %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd -1024(%rdx){1to2}, %xmm29, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x10,0xa6,0x4a,0x80]
+          vfmaddsub213pd -1024(%rdx){1to2}, %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd -1032(%rdx){1to2}, %xmm29, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x10,0xa6,0x8a,0xf8,0xfb,0xff,0xff]
+          vfmaddsub213pd -1032(%rdx){1to2}, %xmm29, %xmm25
+
+// CHECK: vfmaddsub213pd %ymm20, %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xa2,0xf5,0x20,0xa6,0xec]
+          vfmaddsub213pd %ymm20, %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd %ymm20, %ymm17, %ymm21 {%k4}
+// CHECK:  encoding: [0x62,0xa2,0xf5,0x24,0xa6,0xec]
+          vfmaddsub213pd %ymm20, %ymm17, %ymm21 {%k4}
+
+// CHECK: vfmaddsub213pd %ymm20, %ymm17, %ymm21 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa2,0xf5,0xa4,0xa6,0xec]
+          vfmaddsub213pd %ymm20, %ymm17, %ymm21 {%k4} {z}
+
+// CHECK: vfmaddsub213pd (%rcx), %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x20,0xa6,0x29]
+          vfmaddsub213pd (%rcx), %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd 291(%rax,%r14,8), %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xa2,0xf5,0x20,0xa6,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub213pd 291(%rax,%r14,8), %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd (%rcx){1to4}, %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x30,0xa6,0x29]
+          vfmaddsub213pd (%rcx){1to4}, %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd 4064(%rdx), %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x20,0xa6,0x6a,0x7f]
+          vfmaddsub213pd 4064(%rdx), %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd 4096(%rdx), %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x20,0xa6,0xaa,0x00,0x10,0x00,0x00]
+          vfmaddsub213pd 4096(%rdx), %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd -4096(%rdx), %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x20,0xa6,0x6a,0x80]
+          vfmaddsub213pd -4096(%rdx), %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd -4128(%rdx), %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x20,0xa6,0xaa,0xe0,0xef,0xff,0xff]
+          vfmaddsub213pd -4128(%rdx), %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd 1016(%rdx){1to4}, %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x30,0xa6,0x6a,0x7f]
+          vfmaddsub213pd 1016(%rdx){1to4}, %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd 1024(%rdx){1to4}, %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x30,0xa6,0xaa,0x00,0x04,0x00,0x00]
+          vfmaddsub213pd 1024(%rdx){1to4}, %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd -1024(%rdx){1to4}, %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x30,0xa6,0x6a,0x80]
+          vfmaddsub213pd -1024(%rdx){1to4}, %ymm17, %ymm21
+
+// CHECK: vfmaddsub213pd -1032(%rdx){1to4}, %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x30,0xa6,0xaa,0xf8,0xfb,0xff,0xff]
+          vfmaddsub213pd -1032(%rdx){1to4}, %ymm17, %ymm21
+
+// CHECK: vfmaddsub231ps %xmm20, %xmm29, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x15,0x00,0xb6,0xdc]
+          vfmaddsub231ps %xmm20, %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps %xmm20, %xmm29, %xmm19 {%k6}
+// CHECK:  encoding: [0x62,0xa2,0x15,0x06,0xb6,0xdc]
+          vfmaddsub231ps %xmm20, %xmm29, %xmm19 {%k6}
+
+// CHECK: vfmaddsub231ps %xmm20, %xmm29, %xmm19 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa2,0x15,0x86,0xb6,0xdc]
+          vfmaddsub231ps %xmm20, %xmm29, %xmm19 {%k6} {z}
+
+// CHECK: vfmaddsub231ps (%rcx), %xmm29, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0xb6,0x19]
+          vfmaddsub231ps (%rcx), %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps 291(%rax,%r14,8), %xmm29, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x15,0x00,0xb6,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub231ps 291(%rax,%r14,8), %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps (%rcx){1to4}, %xmm29, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0xb6,0x19]
+          vfmaddsub231ps (%rcx){1to4}, %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps 2032(%rdx), %xmm29, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0xb6,0x5a,0x7f]
+          vfmaddsub231ps 2032(%rdx), %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps 2048(%rdx), %xmm29, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0xb6,0x9a,0x00,0x08,0x00,0x00]
+          vfmaddsub231ps 2048(%rdx), %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps -2048(%rdx), %xmm29, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0xb6,0x5a,0x80]
+          vfmaddsub231ps -2048(%rdx), %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps -2064(%rdx), %xmm29, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0xb6,0x9a,0xf0,0xf7,0xff,0xff]
+          vfmaddsub231ps -2064(%rdx), %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps 508(%rdx){1to4}, %xmm29, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0xb6,0x5a,0x7f]
+          vfmaddsub231ps 508(%rdx){1to4}, %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps 512(%rdx){1to4}, %xmm29, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0xb6,0x9a,0x00,0x02,0x00,0x00]
+          vfmaddsub231ps 512(%rdx){1to4}, %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps -512(%rdx){1to4}, %xmm29, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0xb6,0x5a,0x80]
+          vfmaddsub231ps -512(%rdx){1to4}, %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps -516(%rdx){1to4}, %xmm29, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0xb6,0x9a,0xfc,0xfd,0xff,0xff]
+          vfmaddsub231ps -516(%rdx){1to4}, %xmm29, %xmm19
+
+// CHECK: vfmaddsub231ps %ymm17, %ymm24, %ymm19
+// CHECK:  encoding: [0x62,0xa2,0x3d,0x20,0xb6,0xd9]
+          vfmaddsub231ps %ymm17, %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps %ymm17, %ymm24, %ymm19 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x3d,0x21,0xb6,0xd9]
+          vfmaddsub231ps %ymm17, %ymm24, %ymm19 {%k1}
+
+// CHECK: vfmaddsub231ps %ymm17, %ymm24, %ymm19 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0x3d,0xa1,0xb6,0xd9]
+          vfmaddsub231ps %ymm17, %ymm24, %ymm19 {%k1} {z}
+
+// CHECK: vfmaddsub231ps (%rcx), %ymm24, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x19]
+          vfmaddsub231ps (%rcx), %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps 291(%rax,%r14,8), %ymm24, %ymm19
+// CHECK:  encoding: [0x62,0xa2,0x3d,0x20,0xb6,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub231ps 291(%rax,%r14,8), %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps (%rcx){1to8}, %ymm24, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x19]
+          vfmaddsub231ps (%rcx){1to8}, %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps 4064(%rdx), %ymm24, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x5a,0x7f]
+          vfmaddsub231ps 4064(%rdx), %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps 4096(%rdx), %ymm24, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x9a,0x00,0x10,0x00,0x00]
+          vfmaddsub231ps 4096(%rdx), %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps -4096(%rdx), %ymm24, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x5a,0x80]
+          vfmaddsub231ps -4096(%rdx), %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps -4128(%rdx), %ymm24, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0xb6,0x9a,0xe0,0xef,0xff,0xff]
+          vfmaddsub231ps -4128(%rdx), %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps 508(%rdx){1to8}, %ymm24, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x5a,0x7f]
+          vfmaddsub231ps 508(%rdx){1to8}, %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps 512(%rdx){1to8}, %ymm24, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x9a,0x00,0x02,0x00,0x00]
+          vfmaddsub231ps 512(%rdx){1to8}, %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps -512(%rdx){1to8}, %ymm24, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x5a,0x80]
+          vfmaddsub231ps -512(%rdx){1to8}, %ymm24, %ymm19
+
+// CHECK: vfmaddsub231ps -516(%rdx){1to8}, %ymm24, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0xb6,0x9a,0xfc,0xfd,0xff,0xff]
+          vfmaddsub231ps -516(%rdx){1to8}, %ymm24, %ymm19
+
+// CHECK: vfmaddsub231pd %xmm28, %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0x82,0xad,0x00,0xb6,0xfc]
+          vfmaddsub231pd %xmm28, %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd %xmm28, %xmm26, %xmm23 {%k7}
+// CHECK:  encoding: [0x62,0x82,0xad,0x07,0xb6,0xfc]
+          vfmaddsub231pd %xmm28, %xmm26, %xmm23 {%k7}
+
+// CHECK: vfmaddsub231pd %xmm28, %xmm26, %xmm23 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0xad,0x87,0xb6,0xfc]
+          vfmaddsub231pd %xmm28, %xmm26, %xmm23 {%k7} {z}
+
+// CHECK: vfmaddsub231pd (%rcx), %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xad,0x00,0xb6,0x39]
+          vfmaddsub231pd (%rcx), %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd 291(%rax,%r14,8), %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0xa2,0xad,0x00,0xb6,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub231pd 291(%rax,%r14,8), %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd (%rcx){1to2}, %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xad,0x10,0xb6,0x39]
+          vfmaddsub231pd (%rcx){1to2}, %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd 2032(%rdx), %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xad,0x00,0xb6,0x7a,0x7f]
+          vfmaddsub231pd 2032(%rdx), %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd 2048(%rdx), %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xad,0x00,0xb6,0xba,0x00,0x08,0x00,0x00]
+          vfmaddsub231pd 2048(%rdx), %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd -2048(%rdx), %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xad,0x00,0xb6,0x7a,0x80]
+          vfmaddsub231pd -2048(%rdx), %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd -2064(%rdx), %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xad,0x00,0xb6,0xba,0xf0,0xf7,0xff,0xff]
+          vfmaddsub231pd -2064(%rdx), %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd 1016(%rdx){1to2}, %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xad,0x10,0xb6,0x7a,0x7f]
+          vfmaddsub231pd 1016(%rdx){1to2}, %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd 1024(%rdx){1to2}, %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xad,0x10,0xb6,0xba,0x00,0x04,0x00,0x00]
+          vfmaddsub231pd 1024(%rdx){1to2}, %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd -1024(%rdx){1to2}, %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xad,0x10,0xb6,0x7a,0x80]
+          vfmaddsub231pd -1024(%rdx){1to2}, %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd -1032(%rdx){1to2}, %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xad,0x10,0xb6,0xba,0xf8,0xfb,0xff,0xff]
+          vfmaddsub231pd -1032(%rdx){1to2}, %xmm26, %xmm23
+
+// CHECK: vfmaddsub231pd %ymm27, %ymm25, %ymm30
+// CHECK:  encoding: [0x62,0x02,0xb5,0x20,0xb6,0xf3]
+          vfmaddsub231pd %ymm27, %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd %ymm27, %ymm25, %ymm30 {%k5}
+// CHECK:  encoding: [0x62,0x02,0xb5,0x25,0xb6,0xf3]
+          vfmaddsub231pd %ymm27, %ymm25, %ymm30 {%k5}
+
+// CHECK: vfmaddsub231pd %ymm27, %ymm25, %ymm30 {%k5} {z}
+// CHECK:  encoding: [0x62,0x02,0xb5,0xa5,0xb6,0xf3]
+          vfmaddsub231pd %ymm27, %ymm25, %ymm30 {%k5} {z}
+
+// CHECK: vfmaddsub231pd (%rcx), %ymm25, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0xb6,0x31]
+          vfmaddsub231pd (%rcx), %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd 291(%rax,%r14,8), %ymm25, %ymm30
+// CHECK:  encoding: [0x62,0x22,0xb5,0x20,0xb6,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmaddsub231pd 291(%rax,%r14,8), %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd (%rcx){1to4}, %ymm25, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0xb6,0x31]
+          vfmaddsub231pd (%rcx){1to4}, %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd 4064(%rdx), %ymm25, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0xb6,0x72,0x7f]
+          vfmaddsub231pd 4064(%rdx), %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd 4096(%rdx), %ymm25, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0xb6,0xb2,0x00,0x10,0x00,0x00]
+          vfmaddsub231pd 4096(%rdx), %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd -4096(%rdx), %ymm25, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0xb6,0x72,0x80]
+          vfmaddsub231pd -4096(%rdx), %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd -4128(%rdx), %ymm25, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0xb6,0xb2,0xe0,0xef,0xff,0xff]
+          vfmaddsub231pd -4128(%rdx), %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd 1016(%rdx){1to4}, %ymm25, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0xb6,0x72,0x7f]
+          vfmaddsub231pd 1016(%rdx){1to4}, %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd 1024(%rdx){1to4}, %ymm25, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0xb6,0xb2,0x00,0x04,0x00,0x00]
+          vfmaddsub231pd 1024(%rdx){1to4}, %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd -1024(%rdx){1to4}, %ymm25, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0xb6,0x72,0x80]
+          vfmaddsub231pd -1024(%rdx){1to4}, %ymm25, %ymm30
+
+// CHECK: vfmaddsub231pd -1032(%rdx){1to4}, %ymm25, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0xb6,0xb2,0xf8,0xfb,0xff,0xff]
+          vfmaddsub231pd -1032(%rdx){1to4}, %ymm25, %ymm30
+
+// CHECK: vfmsubadd132ps %xmm20, %xmm23, %xmm24
+// CHECK:  encoding: [0x62,0x22,0x45,0x00,0x97,0xc4]
+          vfmsubadd132ps %xmm20, %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps %xmm20, %xmm23, %xmm24 {%k5}
+// CHECK:  encoding: [0x62,0x22,0x45,0x05,0x97,0xc4]
+          vfmsubadd132ps %xmm20, %xmm23, %xmm24 {%k5}
+
+// CHECK: vfmsubadd132ps %xmm20, %xmm23, %xmm24 {%k5} {z}
+// CHECK:  encoding: [0x62,0x22,0x45,0x85,0x97,0xc4]
+          vfmsubadd132ps %xmm20, %xmm23, %xmm24 {%k5} {z}
+
+// CHECK: vfmsubadd132ps (%rcx), %xmm23, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x00,0x97,0x01]
+          vfmsubadd132ps (%rcx), %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps 291(%rax,%r14,8), %xmm23, %xmm24
+// CHECK:  encoding: [0x62,0x22,0x45,0x00,0x97,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd132ps 291(%rax,%r14,8), %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps (%rcx){1to4}, %xmm23, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x10,0x97,0x01]
+          vfmsubadd132ps (%rcx){1to4}, %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps 2032(%rdx), %xmm23, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x00,0x97,0x42,0x7f]
+          vfmsubadd132ps 2032(%rdx), %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps 2048(%rdx), %xmm23, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x00,0x97,0x82,0x00,0x08,0x00,0x00]
+          vfmsubadd132ps 2048(%rdx), %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps -2048(%rdx), %xmm23, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x00,0x97,0x42,0x80]
+          vfmsubadd132ps -2048(%rdx), %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps -2064(%rdx), %xmm23, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x00,0x97,0x82,0xf0,0xf7,0xff,0xff]
+          vfmsubadd132ps -2064(%rdx), %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps 508(%rdx){1to4}, %xmm23, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x10,0x97,0x42,0x7f]
+          vfmsubadd132ps 508(%rdx){1to4}, %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps 512(%rdx){1to4}, %xmm23, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x10,0x97,0x82,0x00,0x02,0x00,0x00]
+          vfmsubadd132ps 512(%rdx){1to4}, %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps -512(%rdx){1to4}, %xmm23, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x10,0x97,0x42,0x80]
+          vfmsubadd132ps -512(%rdx){1to4}, %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps -516(%rdx){1to4}, %xmm23, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x45,0x10,0x97,0x82,0xfc,0xfd,0xff,0xff]
+          vfmsubadd132ps -516(%rdx){1to4}, %xmm23, %xmm24
+
+// CHECK: vfmsubadd132ps %ymm23, %ymm28, %ymm23
+// CHECK:  encoding: [0x62,0xa2,0x1d,0x20,0x97,0xff]
+          vfmsubadd132ps %ymm23, %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps %ymm23, %ymm28, %ymm23 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x1d,0x21,0x97,0xff]
+          vfmsubadd132ps %ymm23, %ymm28, %ymm23 {%k1}
+
+// CHECK: vfmsubadd132ps %ymm23, %ymm28, %ymm23 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0x1d,0xa1,0x97,0xff]
+          vfmsubadd132ps %ymm23, %ymm28, %ymm23 {%k1} {z}
+
+// CHECK: vfmsubadd132ps (%rcx), %ymm28, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x20,0x97,0x39]
+          vfmsubadd132ps (%rcx), %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps 291(%rax,%r14,8), %ymm28, %ymm23
+// CHECK:  encoding: [0x62,0xa2,0x1d,0x20,0x97,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd132ps 291(%rax,%r14,8), %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps (%rcx){1to8}, %ymm28, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x30,0x97,0x39]
+          vfmsubadd132ps (%rcx){1to8}, %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps 4064(%rdx), %ymm28, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x20,0x97,0x7a,0x7f]
+          vfmsubadd132ps 4064(%rdx), %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps 4096(%rdx), %ymm28, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x20,0x97,0xba,0x00,0x10,0x00,0x00]
+          vfmsubadd132ps 4096(%rdx), %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps -4096(%rdx), %ymm28, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x20,0x97,0x7a,0x80]
+          vfmsubadd132ps -4096(%rdx), %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps -4128(%rdx), %ymm28, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x20,0x97,0xba,0xe0,0xef,0xff,0xff]
+          vfmsubadd132ps -4128(%rdx), %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps 508(%rdx){1to8}, %ymm28, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x30,0x97,0x7a,0x7f]
+          vfmsubadd132ps 508(%rdx){1to8}, %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps 512(%rdx){1to8}, %ymm28, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x30,0x97,0xba,0x00,0x02,0x00,0x00]
+          vfmsubadd132ps 512(%rdx){1to8}, %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps -512(%rdx){1to8}, %ymm28, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x30,0x97,0x7a,0x80]
+          vfmsubadd132ps -512(%rdx){1to8}, %ymm28, %ymm23
+
+// CHECK: vfmsubadd132ps -516(%rdx){1to8}, %ymm28, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x1d,0x30,0x97,0xba,0xfc,0xfd,0xff,0xff]
+          vfmsubadd132ps -516(%rdx){1to8}, %ymm28, %ymm23
+
+// CHECK: vfmsubadd132pd %xmm24, %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0x82,0xad,0x00,0x97,0xf0]
+          vfmsubadd132pd %xmm24, %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd %xmm24, %xmm26, %xmm22 {%k3}
+// CHECK:  encoding: [0x62,0x82,0xad,0x03,0x97,0xf0]
+          vfmsubadd132pd %xmm24, %xmm26, %xmm22 {%k3}
+
+// CHECK: vfmsubadd132pd %xmm24, %xmm26, %xmm22 {%k3} {z}
+// CHECK:  encoding: [0x62,0x82,0xad,0x83,0x97,0xf0]
+          vfmsubadd132pd %xmm24, %xmm26, %xmm22 {%k3} {z}
+
+// CHECK: vfmsubadd132pd (%rcx), %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xad,0x00,0x97,0x31]
+          vfmsubadd132pd (%rcx), %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd 291(%rax,%r14,8), %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0xa2,0xad,0x00,0x97,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd132pd 291(%rax,%r14,8), %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd (%rcx){1to2}, %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xad,0x10,0x97,0x31]
+          vfmsubadd132pd (%rcx){1to2}, %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd 2032(%rdx), %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xad,0x00,0x97,0x72,0x7f]
+          vfmsubadd132pd 2032(%rdx), %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd 2048(%rdx), %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xad,0x00,0x97,0xb2,0x00,0x08,0x00,0x00]
+          vfmsubadd132pd 2048(%rdx), %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd -2048(%rdx), %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xad,0x00,0x97,0x72,0x80]
+          vfmsubadd132pd -2048(%rdx), %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd -2064(%rdx), %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xad,0x00,0x97,0xb2,0xf0,0xf7,0xff,0xff]
+          vfmsubadd132pd -2064(%rdx), %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd 1016(%rdx){1to2}, %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xad,0x10,0x97,0x72,0x7f]
+          vfmsubadd132pd 1016(%rdx){1to2}, %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd 1024(%rdx){1to2}, %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xad,0x10,0x97,0xb2,0x00,0x04,0x00,0x00]
+          vfmsubadd132pd 1024(%rdx){1to2}, %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd -1024(%rdx){1to2}, %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xad,0x10,0x97,0x72,0x80]
+          vfmsubadd132pd -1024(%rdx){1to2}, %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd -1032(%rdx){1to2}, %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xad,0x10,0x97,0xb2,0xf8,0xfb,0xff,0xff]
+          vfmsubadd132pd -1032(%rdx){1to2}, %xmm26, %xmm22
+
+// CHECK: vfmsubadd132pd %ymm21, %ymm17, %ymm24
+// CHECK:  encoding: [0x62,0x22,0xf5,0x20,0x97,0xc5]
+          vfmsubadd132pd %ymm21, %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd %ymm21, %ymm17, %ymm24 {%k7}
+// CHECK:  encoding: [0x62,0x22,0xf5,0x27,0x97,0xc5]
+          vfmsubadd132pd %ymm21, %ymm17, %ymm24 {%k7}
+
+// CHECK: vfmsubadd132pd %ymm21, %ymm17, %ymm24 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0xf5,0xa7,0x97,0xc5]
+          vfmsubadd132pd %ymm21, %ymm17, %ymm24 {%k7} {z}
+
+// CHECK: vfmsubadd132pd (%rcx), %ymm17, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x20,0x97,0x01]
+          vfmsubadd132pd (%rcx), %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd 291(%rax,%r14,8), %ymm17, %ymm24
+// CHECK:  encoding: [0x62,0x22,0xf5,0x20,0x97,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd132pd 291(%rax,%r14,8), %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd (%rcx){1to4}, %ymm17, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x30,0x97,0x01]
+          vfmsubadd132pd (%rcx){1to4}, %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd 4064(%rdx), %ymm17, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x20,0x97,0x42,0x7f]
+          vfmsubadd132pd 4064(%rdx), %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd 4096(%rdx), %ymm17, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x20,0x97,0x82,0x00,0x10,0x00,0x00]
+          vfmsubadd132pd 4096(%rdx), %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd -4096(%rdx), %ymm17, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x20,0x97,0x42,0x80]
+          vfmsubadd132pd -4096(%rdx), %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd -4128(%rdx), %ymm17, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x20,0x97,0x82,0xe0,0xef,0xff,0xff]
+          vfmsubadd132pd -4128(%rdx), %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd 1016(%rdx){1to4}, %ymm17, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x30,0x97,0x42,0x7f]
+          vfmsubadd132pd 1016(%rdx){1to4}, %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd 1024(%rdx){1to4}, %ymm17, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x30,0x97,0x82,0x00,0x04,0x00,0x00]
+          vfmsubadd132pd 1024(%rdx){1to4}, %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd -1024(%rdx){1to4}, %ymm17, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x30,0x97,0x42,0x80]
+          vfmsubadd132pd -1024(%rdx){1to4}, %ymm17, %ymm24
+
+// CHECK: vfmsubadd132pd -1032(%rdx){1to4}, %ymm17, %ymm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x30,0x97,0x82,0xf8,0xfb,0xff,0xff]
+          vfmsubadd132pd -1032(%rdx){1to4}, %ymm17, %ymm24
+
+// CHECK: vfmsubadd213ps %xmm17, %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x5d,0x00,0xa7,0xd9]
+          vfmsubadd213ps %xmm17, %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps %xmm17, %xmm20, %xmm19 {%k5}
+// CHECK:  encoding: [0x62,0xa2,0x5d,0x05,0xa7,0xd9]
+          vfmsubadd213ps %xmm17, %xmm20, %xmm19 {%k5}
+
+// CHECK: vfmsubadd213ps %xmm17, %xmm20, %xmm19 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa2,0x5d,0x85,0xa7,0xd9]
+          vfmsubadd213ps %xmm17, %xmm20, %xmm19 {%k5} {z}
+
+// CHECK: vfmsubadd213ps (%rcx), %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x19]
+          vfmsubadd213ps (%rcx), %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps 291(%rax,%r14,8), %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x5d,0x00,0xa7,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd213ps 291(%rax,%r14,8), %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps (%rcx){1to4}, %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x19]
+          vfmsubadd213ps (%rcx){1to4}, %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps 2032(%rdx), %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x5a,0x7f]
+          vfmsubadd213ps 2032(%rdx), %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps 2048(%rdx), %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x9a,0x00,0x08,0x00,0x00]
+          vfmsubadd213ps 2048(%rdx), %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps -2048(%rdx), %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x5a,0x80]
+          vfmsubadd213ps -2048(%rdx), %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps -2064(%rdx), %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x00,0xa7,0x9a,0xf0,0xf7,0xff,0xff]
+          vfmsubadd213ps -2064(%rdx), %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps 508(%rdx){1to4}, %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x5a,0x7f]
+          vfmsubadd213ps 508(%rdx){1to4}, %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps 512(%rdx){1to4}, %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x9a,0x00,0x02,0x00,0x00]
+          vfmsubadd213ps 512(%rdx){1to4}, %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps -512(%rdx){1to4}, %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x5a,0x80]
+          vfmsubadd213ps -512(%rdx){1to4}, %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps -516(%rdx){1to4}, %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x10,0xa7,0x9a,0xfc,0xfd,0xff,0xff]
+          vfmsubadd213ps -516(%rdx){1to4}, %xmm20, %xmm19
+
+// CHECK: vfmsubadd213ps %ymm23, %ymm17, %ymm26
+// CHECK:  encoding: [0x62,0x22,0x75,0x20,0xa7,0xd7]
+          vfmsubadd213ps %ymm23, %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps %ymm23, %ymm17, %ymm26 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x75,0x27,0xa7,0xd7]
+          vfmsubadd213ps %ymm23, %ymm17, %ymm26 {%k7}
+
+// CHECK: vfmsubadd213ps %ymm23, %ymm17, %ymm26 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x75,0xa7,0xa7,0xd7]
+          vfmsubadd213ps %ymm23, %ymm17, %ymm26 {%k7} {z}
+
+// CHECK: vfmsubadd213ps (%rcx), %ymm17, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x75,0x20,0xa7,0x11]
+          vfmsubadd213ps (%rcx), %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps 291(%rax,%r14,8), %ymm17, %ymm26
+// CHECK:  encoding: [0x62,0x22,0x75,0x20,0xa7,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd213ps 291(%rax,%r14,8), %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps (%rcx){1to8}, %ymm17, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x75,0x30,0xa7,0x11]
+          vfmsubadd213ps (%rcx){1to8}, %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps 4064(%rdx), %ymm17, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x75,0x20,0xa7,0x52,0x7f]
+          vfmsubadd213ps 4064(%rdx), %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps 4096(%rdx), %ymm17, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x75,0x20,0xa7,0x92,0x00,0x10,0x00,0x00]
+          vfmsubadd213ps 4096(%rdx), %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps -4096(%rdx), %ymm17, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x75,0x20,0xa7,0x52,0x80]
+          vfmsubadd213ps -4096(%rdx), %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps -4128(%rdx), %ymm17, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x75,0x20,0xa7,0x92,0xe0,0xef,0xff,0xff]
+          vfmsubadd213ps -4128(%rdx), %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps 508(%rdx){1to8}, %ymm17, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x75,0x30,0xa7,0x52,0x7f]
+          vfmsubadd213ps 508(%rdx){1to8}, %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps 512(%rdx){1to8}, %ymm17, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x75,0x30,0xa7,0x92,0x00,0x02,0x00,0x00]
+          vfmsubadd213ps 512(%rdx){1to8}, %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps -512(%rdx){1to8}, %ymm17, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x75,0x30,0xa7,0x52,0x80]
+          vfmsubadd213ps -512(%rdx){1to8}, %ymm17, %ymm26
+
+// CHECK: vfmsubadd213ps -516(%rdx){1to8}, %ymm17, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x75,0x30,0xa7,0x92,0xfc,0xfd,0xff,0xff]
+          vfmsubadd213ps -516(%rdx){1to8}, %ymm17, %ymm26
+
+// CHECK: vfmsubadd213pd %xmm28, %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0x82,0xd5,0x00,0xa7,0xd4]
+          vfmsubadd213pd %xmm28, %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd %xmm28, %xmm21, %xmm18 {%k4}
+// CHECK:  encoding: [0x62,0x82,0xd5,0x04,0xa7,0xd4]
+          vfmsubadd213pd %xmm28, %xmm21, %xmm18 {%k4}
+
+// CHECK: vfmsubadd213pd %xmm28, %xmm21, %xmm18 {%k4} {z}
+// CHECK:  encoding: [0x62,0x82,0xd5,0x84,0xa7,0xd4]
+          vfmsubadd213pd %xmm28, %xmm21, %xmm18 {%k4} {z}
+
+// CHECK: vfmsubadd213pd (%rcx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x11]
+          vfmsubadd213pd (%rcx), %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd 291(%rax,%r14,8), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xa2,0xd5,0x00,0xa7,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd213pd 291(%rax,%r14,8), %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd (%rcx){1to2}, %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x11]
+          vfmsubadd213pd (%rcx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd 2032(%rdx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x52,0x7f]
+          vfmsubadd213pd 2032(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd 2048(%rdx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x92,0x00,0x08,0x00,0x00]
+          vfmsubadd213pd 2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd -2048(%rdx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x52,0x80]
+          vfmsubadd213pd -2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd -2064(%rdx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x00,0xa7,0x92,0xf0,0xf7,0xff,0xff]
+          vfmsubadd213pd -2064(%rdx), %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd 1016(%rdx){1to2}, %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x52,0x7f]
+          vfmsubadd213pd 1016(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd 1024(%rdx){1to2}, %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x92,0x00,0x04,0x00,0x00]
+          vfmsubadd213pd 1024(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd -1024(%rdx){1to2}, %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x52,0x80]
+          vfmsubadd213pd -1024(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd -1032(%rdx){1to2}, %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xd5,0x10,0xa7,0x92,0xf8,0xfb,0xff,0xff]
+          vfmsubadd213pd -1032(%rdx){1to2}, %xmm21, %xmm18
+
+// CHECK: vfmsubadd213pd %ymm25, %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x02,0xdd,0x20,0xa7,0xd9]
+          vfmsubadd213pd %ymm25, %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd %ymm25, %ymm20, %ymm27 {%k7}
+// CHECK:  encoding: [0x62,0x02,0xdd,0x27,0xa7,0xd9]
+          vfmsubadd213pd %ymm25, %ymm20, %ymm27 {%k7}
+
+// CHECK: vfmsubadd213pd %ymm25, %ymm20, %ymm27 {%k7} {z}
+// CHECK:  encoding: [0x62,0x02,0xdd,0xa7,0xa7,0xd9]
+          vfmsubadd213pd %ymm25, %ymm20, %ymm27 {%k7} {z}
+
+// CHECK: vfmsubadd213pd (%rcx), %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0xa7,0x19]
+          vfmsubadd213pd (%rcx), %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd 291(%rax,%r14,8), %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x22,0xdd,0x20,0xa7,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd213pd 291(%rax,%r14,8), %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd (%rcx){1to4}, %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0xa7,0x19]
+          vfmsubadd213pd (%rcx){1to4}, %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd 4064(%rdx), %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0xa7,0x5a,0x7f]
+          vfmsubadd213pd 4064(%rdx), %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd 4096(%rdx), %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0xa7,0x9a,0x00,0x10,0x00,0x00]
+          vfmsubadd213pd 4096(%rdx), %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd -4096(%rdx), %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0xa7,0x5a,0x80]
+          vfmsubadd213pd -4096(%rdx), %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd -4128(%rdx), %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0xa7,0x9a,0xe0,0xef,0xff,0xff]
+          vfmsubadd213pd -4128(%rdx), %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd 1016(%rdx){1to4}, %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0xa7,0x5a,0x7f]
+          vfmsubadd213pd 1016(%rdx){1to4}, %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd 1024(%rdx){1to4}, %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0xa7,0x9a,0x00,0x04,0x00,0x00]
+          vfmsubadd213pd 1024(%rdx){1to4}, %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd -1024(%rdx){1to4}, %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0xa7,0x5a,0x80]
+          vfmsubadd213pd -1024(%rdx){1to4}, %ymm20, %ymm27
+
+// CHECK: vfmsubadd213pd -1032(%rdx){1to4}, %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0xa7,0x9a,0xf8,0xfb,0xff,0xff]
+          vfmsubadd213pd -1032(%rdx){1to4}, %ymm20, %ymm27
+
+// CHECK: vfmsubadd231ps %xmm23, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0x35,0x00,0xb7,0xef]
+          vfmsubadd231ps %xmm23, %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps %xmm23, %xmm25, %xmm21 {%k4}
+// CHECK:  encoding: [0x62,0xa2,0x35,0x04,0xb7,0xef]
+          vfmsubadd231ps %xmm23, %xmm25, %xmm21 {%k4}
+
+// CHECK: vfmsubadd231ps %xmm23, %xmm25, %xmm21 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa2,0x35,0x84,0xb7,0xef]
+          vfmsubadd231ps %xmm23, %xmm25, %xmm21 {%k4} {z}
+
+// CHECK: vfmsubadd231ps (%rcx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0xb7,0x29]
+          vfmsubadd231ps (%rcx), %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps 291(%rax,%r14,8), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0x35,0x00,0xb7,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd231ps 291(%rax,%r14,8), %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps (%rcx){1to4}, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0xb7,0x29]
+          vfmsubadd231ps (%rcx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps 2032(%rdx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0xb7,0x6a,0x7f]
+          vfmsubadd231ps 2032(%rdx), %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps 2048(%rdx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0xb7,0xaa,0x00,0x08,0x00,0x00]
+          vfmsubadd231ps 2048(%rdx), %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps -2048(%rdx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0xb7,0x6a,0x80]
+          vfmsubadd231ps -2048(%rdx), %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps -2064(%rdx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0xb7,0xaa,0xf0,0xf7,0xff,0xff]
+          vfmsubadd231ps -2064(%rdx), %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps 508(%rdx){1to4}, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0xb7,0x6a,0x7f]
+          vfmsubadd231ps 508(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps 512(%rdx){1to4}, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0xb7,0xaa,0x00,0x02,0x00,0x00]
+          vfmsubadd231ps 512(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps -512(%rdx){1to4}, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0xb7,0x6a,0x80]
+          vfmsubadd231ps -512(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps -516(%rdx){1to4}, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0xb7,0xaa,0xfc,0xfd,0xff,0xff]
+          vfmsubadd231ps -516(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfmsubadd231ps %ymm20, %ymm23, %ymm27
+// CHECK:  encoding: [0x62,0x22,0x45,0x20,0xb7,0xdc]
+          vfmsubadd231ps %ymm20, %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps %ymm20, %ymm23, %ymm27 {%k3}
+// CHECK:  encoding: [0x62,0x22,0x45,0x23,0xb7,0xdc]
+          vfmsubadd231ps %ymm20, %ymm23, %ymm27 {%k3}
+
+// CHECK: vfmsubadd231ps %ymm20, %ymm23, %ymm27 {%k3} {z}
+// CHECK:  encoding: [0x62,0x22,0x45,0xa3,0xb7,0xdc]
+          vfmsubadd231ps %ymm20, %ymm23, %ymm27 {%k3} {z}
+
+// CHECK: vfmsubadd231ps (%rcx), %ymm23, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x45,0x20,0xb7,0x19]
+          vfmsubadd231ps (%rcx), %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps 291(%rax,%r14,8), %ymm23, %ymm27
+// CHECK:  encoding: [0x62,0x22,0x45,0x20,0xb7,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd231ps 291(%rax,%r14,8), %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps (%rcx){1to8}, %ymm23, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x45,0x30,0xb7,0x19]
+          vfmsubadd231ps (%rcx){1to8}, %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps 4064(%rdx), %ymm23, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x45,0x20,0xb7,0x5a,0x7f]
+          vfmsubadd231ps 4064(%rdx), %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps 4096(%rdx), %ymm23, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x45,0x20,0xb7,0x9a,0x00,0x10,0x00,0x00]
+          vfmsubadd231ps 4096(%rdx), %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps -4096(%rdx), %ymm23, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x45,0x20,0xb7,0x5a,0x80]
+          vfmsubadd231ps -4096(%rdx), %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps -4128(%rdx), %ymm23, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x45,0x20,0xb7,0x9a,0xe0,0xef,0xff,0xff]
+          vfmsubadd231ps -4128(%rdx), %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps 508(%rdx){1to8}, %ymm23, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x45,0x30,0xb7,0x5a,0x7f]
+          vfmsubadd231ps 508(%rdx){1to8}, %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps 512(%rdx){1to8}, %ymm23, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x45,0x30,0xb7,0x9a,0x00,0x02,0x00,0x00]
+          vfmsubadd231ps 512(%rdx){1to8}, %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps -512(%rdx){1to8}, %ymm23, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x45,0x30,0xb7,0x5a,0x80]
+          vfmsubadd231ps -512(%rdx){1to8}, %ymm23, %ymm27
+
+// CHECK: vfmsubadd231ps -516(%rdx){1to8}, %ymm23, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x45,0x30,0xb7,0x9a,0xfc,0xfd,0xff,0xff]
+          vfmsubadd231ps -516(%rdx){1to8}, %ymm23, %ymm27
+
+// CHECK: vfmsubadd231pd %xmm28, %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0x82,0xbd,0x00,0xb7,0xe4]
+          vfmsubadd231pd %xmm28, %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd %xmm28, %xmm24, %xmm20 {%k3}
+// CHECK:  encoding: [0x62,0x82,0xbd,0x03,0xb7,0xe4]
+          vfmsubadd231pd %xmm28, %xmm24, %xmm20 {%k3}
+
+// CHECK: vfmsubadd231pd %xmm28, %xmm24, %xmm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0x82,0xbd,0x83,0xb7,0xe4]
+          vfmsubadd231pd %xmm28, %xmm24, %xmm20 {%k3} {z}
+
+// CHECK: vfmsubadd231pd (%rcx), %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x00,0xb7,0x21]
+          vfmsubadd231pd (%rcx), %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd 291(%rax,%r14,8), %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0xa2,0xbd,0x00,0xb7,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd231pd 291(%rax,%r14,8), %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd (%rcx){1to2}, %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x10,0xb7,0x21]
+          vfmsubadd231pd (%rcx){1to2}, %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd 2032(%rdx), %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x00,0xb7,0x62,0x7f]
+          vfmsubadd231pd 2032(%rdx), %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd 2048(%rdx), %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x00,0xb7,0xa2,0x00,0x08,0x00,0x00]
+          vfmsubadd231pd 2048(%rdx), %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd -2048(%rdx), %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x00,0xb7,0x62,0x80]
+          vfmsubadd231pd -2048(%rdx), %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd -2064(%rdx), %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x00,0xb7,0xa2,0xf0,0xf7,0xff,0xff]
+          vfmsubadd231pd -2064(%rdx), %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd 1016(%rdx){1to2}, %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x10,0xb7,0x62,0x7f]
+          vfmsubadd231pd 1016(%rdx){1to2}, %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd 1024(%rdx){1to2}, %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x10,0xb7,0xa2,0x00,0x04,0x00,0x00]
+          vfmsubadd231pd 1024(%rdx){1to2}, %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd -1024(%rdx){1to2}, %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x10,0xb7,0x62,0x80]
+          vfmsubadd231pd -1024(%rdx){1to2}, %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd -1032(%rdx){1to2}, %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x10,0xb7,0xa2,0xf8,0xfb,0xff,0xff]
+          vfmsubadd231pd -1032(%rdx){1to2}, %xmm24, %xmm20
+
+// CHECK: vfmsubadd231pd %ymm21, %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x22,0x95,0x20,0xb7,0xf5]
+          vfmsubadd231pd %ymm21, %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd %ymm21, %ymm29, %ymm30 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x95,0x27,0xb7,0xf5]
+          vfmsubadd231pd %ymm21, %ymm29, %ymm30 {%k7}
+
+// CHECK: vfmsubadd231pd %ymm21, %ymm29, %ymm30 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x95,0xa7,0xb7,0xf5]
+          vfmsubadd231pd %ymm21, %ymm29, %ymm30 {%k7} {z}
+
+// CHECK: vfmsubadd231pd (%rcx), %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x95,0x20,0xb7,0x31]
+          vfmsubadd231pd (%rcx), %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd 291(%rax,%r14,8), %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x22,0x95,0x20,0xb7,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfmsubadd231pd 291(%rax,%r14,8), %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd (%rcx){1to4}, %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x95,0x30,0xb7,0x31]
+          vfmsubadd231pd (%rcx){1to4}, %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd 4064(%rdx), %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x95,0x20,0xb7,0x72,0x7f]
+          vfmsubadd231pd 4064(%rdx), %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd 4096(%rdx), %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x95,0x20,0xb7,0xb2,0x00,0x10,0x00,0x00]
+          vfmsubadd231pd 4096(%rdx), %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd -4096(%rdx), %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x95,0x20,0xb7,0x72,0x80]
+          vfmsubadd231pd -4096(%rdx), %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd -4128(%rdx), %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x95,0x20,0xb7,0xb2,0xe0,0xef,0xff,0xff]
+          vfmsubadd231pd -4128(%rdx), %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd 1016(%rdx){1to4}, %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x95,0x30,0xb7,0x72,0x7f]
+          vfmsubadd231pd 1016(%rdx){1to4}, %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd 1024(%rdx){1to4}, %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x95,0x30,0xb7,0xb2,0x00,0x04,0x00,0x00]
+          vfmsubadd231pd 1024(%rdx){1to4}, %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd -1024(%rdx){1to4}, %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x95,0x30,0xb7,0x72,0x80]
+          vfmsubadd231pd -1024(%rdx){1to4}, %ymm29, %ymm30
+
+// CHECK: vfmsubadd231pd -1032(%rdx){1to4}, %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x95,0x30,0xb7,0xb2,0xf8,0xfb,0xff,0xff]
+          vfmsubadd231pd -1032(%rdx){1to4}, %ymm29, %ymm30
+
+// CHECK: vfnmadd132ps %xmm18, %xmm26, %xmm20
+// CHECK:  encoding: [0x62,0xa2,0x2d,0x00,0x9c,0xe2]
+          vfnmadd132ps %xmm18, %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps %xmm18, %xmm26, %xmm20 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x2d,0x07,0x9c,0xe2]
+          vfnmadd132ps %xmm18, %xmm26, %xmm20 {%k7}
+
+// CHECK: vfnmadd132ps %xmm18, %xmm26, %xmm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x2d,0x87,0x9c,0xe2]
+          vfnmadd132ps %xmm18, %xmm26, %xmm20 {%k7} {z}
+
+// CHECK: vfnmadd132ps (%rcx), %xmm26, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x00,0x9c,0x21]
+          vfnmadd132ps (%rcx), %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps 291(%rax,%r14,8), %xmm26, %xmm20
+// CHECK:  encoding: [0x62,0xa2,0x2d,0x00,0x9c,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd132ps 291(%rax,%r14,8), %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps (%rcx){1to4}, %xmm26, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x10,0x9c,0x21]
+          vfnmadd132ps (%rcx){1to4}, %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps 2032(%rdx), %xmm26, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x00,0x9c,0x62,0x7f]
+          vfnmadd132ps 2032(%rdx), %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps 2048(%rdx), %xmm26, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x00,0x9c,0xa2,0x00,0x08,0x00,0x00]
+          vfnmadd132ps 2048(%rdx), %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps -2048(%rdx), %xmm26, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x00,0x9c,0x62,0x80]
+          vfnmadd132ps -2048(%rdx), %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps -2064(%rdx), %xmm26, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x00,0x9c,0xa2,0xf0,0xf7,0xff,0xff]
+          vfnmadd132ps -2064(%rdx), %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps 508(%rdx){1to4}, %xmm26, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x10,0x9c,0x62,0x7f]
+          vfnmadd132ps 508(%rdx){1to4}, %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps 512(%rdx){1to4}, %xmm26, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x10,0x9c,0xa2,0x00,0x02,0x00,0x00]
+          vfnmadd132ps 512(%rdx){1to4}, %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps -512(%rdx){1to4}, %xmm26, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x10,0x9c,0x62,0x80]
+          vfnmadd132ps -512(%rdx){1to4}, %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps -516(%rdx){1to4}, %xmm26, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x10,0x9c,0xa2,0xfc,0xfd,0xff,0xff]
+          vfnmadd132ps -516(%rdx){1to4}, %xmm26, %xmm20
+
+// CHECK: vfnmadd132ps %ymm18, %ymm21, %ymm20
+// CHECK:  encoding: [0x62,0xa2,0x55,0x20,0x9c,0xe2]
+          vfnmadd132ps %ymm18, %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps %ymm18, %ymm21, %ymm20 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x55,0x27,0x9c,0xe2]
+          vfnmadd132ps %ymm18, %ymm21, %ymm20 {%k7}
+
+// CHECK: vfnmadd132ps %ymm18, %ymm21, %ymm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x55,0xa7,0x9c,0xe2]
+          vfnmadd132ps %ymm18, %ymm21, %ymm20 {%k7} {z}
+
+// CHECK: vfnmadd132ps (%rcx), %ymm21, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x55,0x20,0x9c,0x21]
+          vfnmadd132ps (%rcx), %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps 291(%rax,%r14,8), %ymm21, %ymm20
+// CHECK:  encoding: [0x62,0xa2,0x55,0x20,0x9c,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd132ps 291(%rax,%r14,8), %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps (%rcx){1to8}, %ymm21, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x55,0x30,0x9c,0x21]
+          vfnmadd132ps (%rcx){1to8}, %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps 4064(%rdx), %ymm21, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x55,0x20,0x9c,0x62,0x7f]
+          vfnmadd132ps 4064(%rdx), %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps 4096(%rdx), %ymm21, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x55,0x20,0x9c,0xa2,0x00,0x10,0x00,0x00]
+          vfnmadd132ps 4096(%rdx), %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps -4096(%rdx), %ymm21, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x55,0x20,0x9c,0x62,0x80]
+          vfnmadd132ps -4096(%rdx), %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps -4128(%rdx), %ymm21, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x55,0x20,0x9c,0xa2,0xe0,0xef,0xff,0xff]
+          vfnmadd132ps -4128(%rdx), %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps 508(%rdx){1to8}, %ymm21, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x55,0x30,0x9c,0x62,0x7f]
+          vfnmadd132ps 508(%rdx){1to8}, %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps 512(%rdx){1to8}, %ymm21, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x55,0x30,0x9c,0xa2,0x00,0x02,0x00,0x00]
+          vfnmadd132ps 512(%rdx){1to8}, %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps -512(%rdx){1to8}, %ymm21, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x55,0x30,0x9c,0x62,0x80]
+          vfnmadd132ps -512(%rdx){1to8}, %ymm21, %ymm20
+
+// CHECK: vfnmadd132ps -516(%rdx){1to8}, %ymm21, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x55,0x30,0x9c,0xa2,0xfc,0xfd,0xff,0xff]
+          vfnmadd132ps -516(%rdx){1to8}, %ymm21, %ymm20
+
+// CHECK: vfnmadd132pd %xmm18, %xmm21, %xmm26
+// CHECK:  encoding: [0x62,0x22,0xd5,0x00,0x9c,0xd2]
+          vfnmadd132pd %xmm18, %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd %xmm18, %xmm21, %xmm26 {%k6}
+// CHECK:  encoding: [0x62,0x22,0xd5,0x06,0x9c,0xd2]
+          vfnmadd132pd %xmm18, %xmm21, %xmm26 {%k6}
+
+// CHECK: vfnmadd132pd %xmm18, %xmm21, %xmm26 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0xd5,0x86,0x9c,0xd2]
+          vfnmadd132pd %xmm18, %xmm21, %xmm26 {%k6} {z}
+
+// CHECK: vfnmadd132pd (%rcx), %xmm21, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x9c,0x11]
+          vfnmadd132pd (%rcx), %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd 291(%rax,%r14,8), %xmm21, %xmm26
+// CHECK:  encoding: [0x62,0x22,0xd5,0x00,0x9c,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd132pd 291(%rax,%r14,8), %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd (%rcx){1to2}, %xmm21, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x9c,0x11]
+          vfnmadd132pd (%rcx){1to2}, %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd 2032(%rdx), %xmm21, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x9c,0x52,0x7f]
+          vfnmadd132pd 2032(%rdx), %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd 2048(%rdx), %xmm21, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x9c,0x92,0x00,0x08,0x00,0x00]
+          vfnmadd132pd 2048(%rdx), %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd -2048(%rdx), %xmm21, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x9c,0x52,0x80]
+          vfnmadd132pd -2048(%rdx), %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd -2064(%rdx), %xmm21, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x9c,0x92,0xf0,0xf7,0xff,0xff]
+          vfnmadd132pd -2064(%rdx), %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd 1016(%rdx){1to2}, %xmm21, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x9c,0x52,0x7f]
+          vfnmadd132pd 1016(%rdx){1to2}, %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd 1024(%rdx){1to2}, %xmm21, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x9c,0x92,0x00,0x04,0x00,0x00]
+          vfnmadd132pd 1024(%rdx){1to2}, %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd -1024(%rdx){1to2}, %xmm21, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x9c,0x52,0x80]
+          vfnmadd132pd -1024(%rdx){1to2}, %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd -1032(%rdx){1to2}, %xmm21, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x9c,0x92,0xf8,0xfb,0xff,0xff]
+          vfnmadd132pd -1032(%rdx){1to2}, %xmm21, %xmm26
+
+// CHECK: vfnmadd132pd %ymm18, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x22,0xcd,0x20,0x9c,0xca]
+          vfnmadd132pd %ymm18, %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd %ymm18, %ymm22, %ymm25 {%k4}
+// CHECK:  encoding: [0x62,0x22,0xcd,0x24,0x9c,0xca]
+          vfnmadd132pd %ymm18, %ymm22, %ymm25 {%k4}
+
+// CHECK: vfnmadd132pd %ymm18, %ymm22, %ymm25 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0xcd,0xa4,0x9c,0xca]
+          vfnmadd132pd %ymm18, %ymm22, %ymm25 {%k4} {z}
+
+// CHECK: vfnmadd132pd (%rcx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xcd,0x20,0x9c,0x09]
+          vfnmadd132pd (%rcx), %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd 291(%rax,%r14,8), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x22,0xcd,0x20,0x9c,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd132pd 291(%rax,%r14,8), %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd (%rcx){1to4}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xcd,0x30,0x9c,0x09]
+          vfnmadd132pd (%rcx){1to4}, %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd 4064(%rdx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xcd,0x20,0x9c,0x4a,0x7f]
+          vfnmadd132pd 4064(%rdx), %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd 4096(%rdx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xcd,0x20,0x9c,0x8a,0x00,0x10,0x00,0x00]
+          vfnmadd132pd 4096(%rdx), %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd -4096(%rdx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xcd,0x20,0x9c,0x4a,0x80]
+          vfnmadd132pd -4096(%rdx), %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd -4128(%rdx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xcd,0x20,0x9c,0x8a,0xe0,0xef,0xff,0xff]
+          vfnmadd132pd -4128(%rdx), %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd 1016(%rdx){1to4}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xcd,0x30,0x9c,0x4a,0x7f]
+          vfnmadd132pd 1016(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd 1024(%rdx){1to4}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xcd,0x30,0x9c,0x8a,0x00,0x04,0x00,0x00]
+          vfnmadd132pd 1024(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd -1024(%rdx){1to4}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xcd,0x30,0x9c,0x4a,0x80]
+          vfnmadd132pd -1024(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vfnmadd132pd -1032(%rdx){1to4}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xcd,0x30,0x9c,0x8a,0xf8,0xfb,0xff,0xff]
+          vfnmadd132pd -1032(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vfnmadd213ps %xmm20, %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x22,0x3d,0x00,0xac,0xc4]
+          vfnmadd213ps %xmm20, %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps %xmm20, %xmm24, %xmm24 {%k4}
+// CHECK:  encoding: [0x62,0x22,0x3d,0x04,0xac,0xc4]
+          vfnmadd213ps %xmm20, %xmm24, %xmm24 {%k4}
+
+// CHECK: vfnmadd213ps %xmm20, %xmm24, %xmm24 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0x3d,0x84,0xac,0xc4]
+          vfnmadd213ps %xmm20, %xmm24, %xmm24 {%k4} {z}
+
+// CHECK: vfnmadd213ps (%rcx), %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x3d,0x00,0xac,0x01]
+          vfnmadd213ps (%rcx), %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps 291(%rax,%r14,8), %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x22,0x3d,0x00,0xac,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd213ps 291(%rax,%r14,8), %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps (%rcx){1to4}, %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x3d,0x10,0xac,0x01]
+          vfnmadd213ps (%rcx){1to4}, %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps 2032(%rdx), %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x3d,0x00,0xac,0x42,0x7f]
+          vfnmadd213ps 2032(%rdx), %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps 2048(%rdx), %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x3d,0x00,0xac,0x82,0x00,0x08,0x00,0x00]
+          vfnmadd213ps 2048(%rdx), %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps -2048(%rdx), %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x3d,0x00,0xac,0x42,0x80]
+          vfnmadd213ps -2048(%rdx), %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps -2064(%rdx), %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x3d,0x00,0xac,0x82,0xf0,0xf7,0xff,0xff]
+          vfnmadd213ps -2064(%rdx), %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps 508(%rdx){1to4}, %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x3d,0x10,0xac,0x42,0x7f]
+          vfnmadd213ps 508(%rdx){1to4}, %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps 512(%rdx){1to4}, %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x3d,0x10,0xac,0x82,0x00,0x02,0x00,0x00]
+          vfnmadd213ps 512(%rdx){1to4}, %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps -512(%rdx){1to4}, %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x3d,0x10,0xac,0x42,0x80]
+          vfnmadd213ps -512(%rdx){1to4}, %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps -516(%rdx){1to4}, %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x3d,0x10,0xac,0x82,0xfc,0xfd,0xff,0xff]
+          vfnmadd213ps -516(%rdx){1to4}, %xmm24, %xmm24
+
+// CHECK: vfnmadd213ps %ymm22, %ymm19, %ymm21
+// CHECK:  encoding: [0x62,0xa2,0x65,0x20,0xac,0xee]
+          vfnmadd213ps %ymm22, %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps %ymm22, %ymm19, %ymm21 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0x65,0x22,0xac,0xee]
+          vfnmadd213ps %ymm22, %ymm19, %ymm21 {%k2}
+
+// CHECK: vfnmadd213ps %ymm22, %ymm19, %ymm21 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0x65,0xa2,0xac,0xee]
+          vfnmadd213ps %ymm22, %ymm19, %ymm21 {%k2} {z}
+
+// CHECK: vfnmadd213ps (%rcx), %ymm19, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x65,0x20,0xac,0x29]
+          vfnmadd213ps (%rcx), %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps 291(%rax,%r14,8), %ymm19, %ymm21
+// CHECK:  encoding: [0x62,0xa2,0x65,0x20,0xac,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd213ps 291(%rax,%r14,8), %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps (%rcx){1to8}, %ymm19, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x65,0x30,0xac,0x29]
+          vfnmadd213ps (%rcx){1to8}, %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps 4064(%rdx), %ymm19, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x65,0x20,0xac,0x6a,0x7f]
+          vfnmadd213ps 4064(%rdx), %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps 4096(%rdx), %ymm19, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x65,0x20,0xac,0xaa,0x00,0x10,0x00,0x00]
+          vfnmadd213ps 4096(%rdx), %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps -4096(%rdx), %ymm19, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x65,0x20,0xac,0x6a,0x80]
+          vfnmadd213ps -4096(%rdx), %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps -4128(%rdx), %ymm19, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x65,0x20,0xac,0xaa,0xe0,0xef,0xff,0xff]
+          vfnmadd213ps -4128(%rdx), %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps 508(%rdx){1to8}, %ymm19, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x65,0x30,0xac,0x6a,0x7f]
+          vfnmadd213ps 508(%rdx){1to8}, %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps 512(%rdx){1to8}, %ymm19, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x65,0x30,0xac,0xaa,0x00,0x02,0x00,0x00]
+          vfnmadd213ps 512(%rdx){1to8}, %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps -512(%rdx){1to8}, %ymm19, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x65,0x30,0xac,0x6a,0x80]
+          vfnmadd213ps -512(%rdx){1to8}, %ymm19, %ymm21
+
+// CHECK: vfnmadd213ps -516(%rdx){1to8}, %ymm19, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x65,0x30,0xac,0xaa,0xfc,0xfd,0xff,0xff]
+          vfnmadd213ps -516(%rdx){1to8}, %ymm19, %ymm21
+
+// CHECK: vfnmadd213pd %xmm24, %xmm25, %xmm24
+// CHECK:  encoding: [0x62,0x02,0xb5,0x00,0xac,0xc0]
+          vfnmadd213pd %xmm24, %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd %xmm24, %xmm25, %xmm24 {%k4}
+// CHECK:  encoding: [0x62,0x02,0xb5,0x04,0xac,0xc0]
+          vfnmadd213pd %xmm24, %xmm25, %xmm24 {%k4}
+
+// CHECK: vfnmadd213pd %xmm24, %xmm25, %xmm24 {%k4} {z}
+// CHECK:  encoding: [0x62,0x02,0xb5,0x84,0xac,0xc0]
+          vfnmadd213pd %xmm24, %xmm25, %xmm24 {%k4} {z}
+
+// CHECK: vfnmadd213pd (%rcx), %xmm25, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xb5,0x00,0xac,0x01]
+          vfnmadd213pd (%rcx), %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd 291(%rax,%r14,8), %xmm25, %xmm24
+// CHECK:  encoding: [0x62,0x22,0xb5,0x00,0xac,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd213pd 291(%rax,%r14,8), %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd (%rcx){1to2}, %xmm25, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xb5,0x10,0xac,0x01]
+          vfnmadd213pd (%rcx){1to2}, %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd 2032(%rdx), %xmm25, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xb5,0x00,0xac,0x42,0x7f]
+          vfnmadd213pd 2032(%rdx), %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd 2048(%rdx), %xmm25, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xb5,0x00,0xac,0x82,0x00,0x08,0x00,0x00]
+          vfnmadd213pd 2048(%rdx), %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd -2048(%rdx), %xmm25, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xb5,0x00,0xac,0x42,0x80]
+          vfnmadd213pd -2048(%rdx), %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd -2064(%rdx), %xmm25, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xb5,0x00,0xac,0x82,0xf0,0xf7,0xff,0xff]
+          vfnmadd213pd -2064(%rdx), %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd 1016(%rdx){1to2}, %xmm25, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xb5,0x10,0xac,0x42,0x7f]
+          vfnmadd213pd 1016(%rdx){1to2}, %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd 1024(%rdx){1to2}, %xmm25, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xb5,0x10,0xac,0x82,0x00,0x04,0x00,0x00]
+          vfnmadd213pd 1024(%rdx){1to2}, %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd -1024(%rdx){1to2}, %xmm25, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xb5,0x10,0xac,0x42,0x80]
+          vfnmadd213pd -1024(%rdx){1to2}, %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd -1032(%rdx){1to2}, %xmm25, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xb5,0x10,0xac,0x82,0xf8,0xfb,0xff,0xff]
+          vfnmadd213pd -1032(%rdx){1to2}, %xmm25, %xmm24
+
+// CHECK: vfnmadd213pd %ymm24, %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0x82,0xa5,0x20,0xac,0xe0]
+          vfnmadd213pd %ymm24, %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd %ymm24, %ymm27, %ymm20 {%k4}
+// CHECK:  encoding: [0x62,0x82,0xa5,0x24,0xac,0xe0]
+          vfnmadd213pd %ymm24, %ymm27, %ymm20 {%k4}
+
+// CHECK: vfnmadd213pd %ymm24, %ymm27, %ymm20 {%k4} {z}
+// CHECK:  encoding: [0x62,0x82,0xa5,0xa4,0xac,0xe0]
+          vfnmadd213pd %ymm24, %ymm27, %ymm20 {%k4} {z}
+
+// CHECK: vfnmadd213pd (%rcx), %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x20,0xac,0x21]
+          vfnmadd213pd (%rcx), %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd 291(%rax,%r14,8), %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xa2,0xa5,0x20,0xac,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd213pd 291(%rax,%r14,8), %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd (%rcx){1to4}, %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x30,0xac,0x21]
+          vfnmadd213pd (%rcx){1to4}, %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd 4064(%rdx), %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x20,0xac,0x62,0x7f]
+          vfnmadd213pd 4064(%rdx), %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd 4096(%rdx), %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x20,0xac,0xa2,0x00,0x10,0x00,0x00]
+          vfnmadd213pd 4096(%rdx), %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd -4096(%rdx), %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x20,0xac,0x62,0x80]
+          vfnmadd213pd -4096(%rdx), %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd -4128(%rdx), %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x20,0xac,0xa2,0xe0,0xef,0xff,0xff]
+          vfnmadd213pd -4128(%rdx), %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd 1016(%rdx){1to4}, %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x30,0xac,0x62,0x7f]
+          vfnmadd213pd 1016(%rdx){1to4}, %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd 1024(%rdx){1to4}, %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x30,0xac,0xa2,0x00,0x04,0x00,0x00]
+          vfnmadd213pd 1024(%rdx){1to4}, %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd -1024(%rdx){1to4}, %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x30,0xac,0x62,0x80]
+          vfnmadd213pd -1024(%rdx){1to4}, %ymm27, %ymm20
+
+// CHECK: vfnmadd213pd -1032(%rdx){1to4}, %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xa5,0x30,0xac,0xa2,0xf8,0xfb,0xff,0xff]
+          vfnmadd213pd -1032(%rdx){1to4}, %ymm27, %ymm20
+
+// CHECK: vfnmadd231ps %xmm24, %xmm26, %xmm18
+// CHECK:  encoding: [0x62,0x82,0x2d,0x00,0xbc,0xd0]
+          vfnmadd231ps %xmm24, %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps %xmm24, %xmm26, %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x2d,0x01,0xbc,0xd0]
+          vfnmadd231ps %xmm24, %xmm26, %xmm18 {%k1}
+
+// CHECK: vfnmadd231ps %xmm24, %xmm26, %xmm18 {%k1} {z}
+// CHECK:  encoding: [0x62,0x82,0x2d,0x81,0xbc,0xd0]
+          vfnmadd231ps %xmm24, %xmm26, %xmm18 {%k1} {z}
+
+// CHECK: vfnmadd231ps (%rcx), %xmm26, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x11]
+          vfnmadd231ps (%rcx), %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps 291(%rax,%r14,8), %xmm26, %xmm18
+// CHECK:  encoding: [0x62,0xa2,0x2d,0x00,0xbc,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd231ps 291(%rax,%r14,8), %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps (%rcx){1to4}, %xmm26, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x11]
+          vfnmadd231ps (%rcx){1to4}, %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps 2032(%rdx), %xmm26, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x52,0x7f]
+          vfnmadd231ps 2032(%rdx), %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps 2048(%rdx), %xmm26, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x92,0x00,0x08,0x00,0x00]
+          vfnmadd231ps 2048(%rdx), %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps -2048(%rdx), %xmm26, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x52,0x80]
+          vfnmadd231ps -2048(%rdx), %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps -2064(%rdx), %xmm26, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x00,0xbc,0x92,0xf0,0xf7,0xff,0xff]
+          vfnmadd231ps -2064(%rdx), %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps 508(%rdx){1to4}, %xmm26, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x52,0x7f]
+          vfnmadd231ps 508(%rdx){1to4}, %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps 512(%rdx){1to4}, %xmm26, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x92,0x00,0x02,0x00,0x00]
+          vfnmadd231ps 512(%rdx){1to4}, %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps -512(%rdx){1to4}, %xmm26, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x52,0x80]
+          vfnmadd231ps -512(%rdx){1to4}, %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps -516(%rdx){1to4}, %xmm26, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x10,0xbc,0x92,0xfc,0xfd,0xff,0xff]
+          vfnmadd231ps -516(%rdx){1to4}, %xmm26, %xmm18
+
+// CHECK: vfnmadd231ps %ymm21, %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xa2,0x5d,0x20,0xbc,0xe5]
+          vfnmadd231ps %ymm21, %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps %ymm21, %ymm20, %ymm20 {%k4}
+// CHECK:  encoding: [0x62,0xa2,0x5d,0x24,0xbc,0xe5]
+          vfnmadd231ps %ymm21, %ymm20, %ymm20 {%k4}
+
+// CHECK: vfnmadd231ps %ymm21, %ymm20, %ymm20 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa2,0x5d,0xa4,0xbc,0xe5]
+          vfnmadd231ps %ymm21, %ymm20, %ymm20 {%k4} {z}
+
+// CHECK: vfnmadd231ps (%rcx), %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x20,0xbc,0x21]
+          vfnmadd231ps (%rcx), %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps 291(%rax,%r14,8), %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xa2,0x5d,0x20,0xbc,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd231ps 291(%rax,%r14,8), %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps (%rcx){1to8}, %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x30,0xbc,0x21]
+          vfnmadd231ps (%rcx){1to8}, %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps 4064(%rdx), %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x20,0xbc,0x62,0x7f]
+          vfnmadd231ps 4064(%rdx), %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps 4096(%rdx), %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x20,0xbc,0xa2,0x00,0x10,0x00,0x00]
+          vfnmadd231ps 4096(%rdx), %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps -4096(%rdx), %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x20,0xbc,0x62,0x80]
+          vfnmadd231ps -4096(%rdx), %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps -4128(%rdx), %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x20,0xbc,0xa2,0xe0,0xef,0xff,0xff]
+          vfnmadd231ps -4128(%rdx), %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps 508(%rdx){1to8}, %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x30,0xbc,0x62,0x7f]
+          vfnmadd231ps 508(%rdx){1to8}, %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps 512(%rdx){1to8}, %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x30,0xbc,0xa2,0x00,0x02,0x00,0x00]
+          vfnmadd231ps 512(%rdx){1to8}, %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps -512(%rdx){1to8}, %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x30,0xbc,0x62,0x80]
+          vfnmadd231ps -512(%rdx){1to8}, %ymm20, %ymm20
+
+// CHECK: vfnmadd231ps -516(%rdx){1to8}, %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x5d,0x30,0xbc,0xa2,0xfc,0xfd,0xff,0xff]
+          vfnmadd231ps -516(%rdx){1to8}, %ymm20, %ymm20
+
+// CHECK: vfnmadd231pd %xmm26, %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x02,0xb5,0x00,0xbc,0xea]
+          vfnmadd231pd %xmm26, %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd %xmm26, %xmm25, %xmm29 {%k3}
+// CHECK:  encoding: [0x62,0x02,0xb5,0x03,0xbc,0xea]
+          vfnmadd231pd %xmm26, %xmm25, %xmm29 {%k3}
+
+// CHECK: vfnmadd231pd %xmm26, %xmm25, %xmm29 {%k3} {z}
+// CHECK:  encoding: [0x62,0x02,0xb5,0x83,0xbc,0xea]
+          vfnmadd231pd %xmm26, %xmm25, %xmm29 {%k3} {z}
+
+// CHECK: vfnmadd231pd (%rcx), %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xb5,0x00,0xbc,0x29]
+          vfnmadd231pd (%rcx), %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd 291(%rax,%r14,8), %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x22,0xb5,0x00,0xbc,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd231pd 291(%rax,%r14,8), %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd (%rcx){1to2}, %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xb5,0x10,0xbc,0x29]
+          vfnmadd231pd (%rcx){1to2}, %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd 2032(%rdx), %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xb5,0x00,0xbc,0x6a,0x7f]
+          vfnmadd231pd 2032(%rdx), %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd 2048(%rdx), %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xb5,0x00,0xbc,0xaa,0x00,0x08,0x00,0x00]
+          vfnmadd231pd 2048(%rdx), %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd -2048(%rdx), %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xb5,0x00,0xbc,0x6a,0x80]
+          vfnmadd231pd -2048(%rdx), %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd -2064(%rdx), %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xb5,0x00,0xbc,0xaa,0xf0,0xf7,0xff,0xff]
+          vfnmadd231pd -2064(%rdx), %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd 1016(%rdx){1to2}, %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xb5,0x10,0xbc,0x6a,0x7f]
+          vfnmadd231pd 1016(%rdx){1to2}, %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd 1024(%rdx){1to2}, %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xb5,0x10,0xbc,0xaa,0x00,0x04,0x00,0x00]
+          vfnmadd231pd 1024(%rdx){1to2}, %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd -1024(%rdx){1to2}, %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xb5,0x10,0xbc,0x6a,0x80]
+          vfnmadd231pd -1024(%rdx){1to2}, %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd -1032(%rdx){1to2}, %xmm25, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xb5,0x10,0xbc,0xaa,0xf8,0xfb,0xff,0xff]
+          vfnmadd231pd -1032(%rdx){1to2}, %xmm25, %xmm29
+
+// CHECK: vfnmadd231pd %ymm23, %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xa2,0x95,0x20,0xbc,0xf7]
+          vfnmadd231pd %ymm23, %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd %ymm23, %ymm29, %ymm22 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x95,0x21,0xbc,0xf7]
+          vfnmadd231pd %ymm23, %ymm29, %ymm22 {%k1}
+
+// CHECK: vfnmadd231pd %ymm23, %ymm29, %ymm22 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0x95,0xa1,0xbc,0xf7]
+          vfnmadd231pd %ymm23, %ymm29, %ymm22 {%k1} {z}
+
+// CHECK: vfnmadd231pd (%rcx), %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x95,0x20,0xbc,0x31]
+          vfnmadd231pd (%rcx), %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd 291(%rax,%r14,8), %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xa2,0x95,0x20,0xbc,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmadd231pd 291(%rax,%r14,8), %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd (%rcx){1to4}, %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x95,0x30,0xbc,0x31]
+          vfnmadd231pd (%rcx){1to4}, %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd 4064(%rdx), %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x95,0x20,0xbc,0x72,0x7f]
+          vfnmadd231pd 4064(%rdx), %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd 4096(%rdx), %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x95,0x20,0xbc,0xb2,0x00,0x10,0x00,0x00]
+          vfnmadd231pd 4096(%rdx), %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd -4096(%rdx), %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x95,0x20,0xbc,0x72,0x80]
+          vfnmadd231pd -4096(%rdx), %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd -4128(%rdx), %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x95,0x20,0xbc,0xb2,0xe0,0xef,0xff,0xff]
+          vfnmadd231pd -4128(%rdx), %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd 1016(%rdx){1to4}, %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x95,0x30,0xbc,0x72,0x7f]
+          vfnmadd231pd 1016(%rdx){1to4}, %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd 1024(%rdx){1to4}, %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x95,0x30,0xbc,0xb2,0x00,0x04,0x00,0x00]
+          vfnmadd231pd 1024(%rdx){1to4}, %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd -1024(%rdx){1to4}, %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x95,0x30,0xbc,0x72,0x80]
+          vfnmadd231pd -1024(%rdx){1to4}, %ymm29, %ymm22
+
+// CHECK: vfnmadd231pd -1032(%rdx){1to4}, %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x95,0x30,0xbc,0xb2,0xf8,0xfb,0xff,0xff]
+          vfnmadd231pd -1032(%rdx){1to4}, %ymm29, %ymm22
+
+// CHECK: vfnmsub132ps %xmm26, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0x82,0x35,0x00,0x9e,0xea]
+          vfnmsub132ps %xmm26, %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps %xmm26, %xmm25, %xmm21 {%k3}
+// CHECK:  encoding: [0x62,0x82,0x35,0x03,0x9e,0xea]
+          vfnmsub132ps %xmm26, %xmm25, %xmm21 {%k3}
+
+// CHECK: vfnmsub132ps %xmm26, %xmm25, %xmm21 {%k3} {z}
+// CHECK:  encoding: [0x62,0x82,0x35,0x83,0x9e,0xea]
+          vfnmsub132ps %xmm26, %xmm25, %xmm21 {%k3} {z}
+
+// CHECK: vfnmsub132ps (%rcx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0x9e,0x29]
+          vfnmsub132ps (%rcx), %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps 291(%rax,%r14,8), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0x35,0x00,0x9e,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub132ps 291(%rax,%r14,8), %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps (%rcx){1to4}, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0x9e,0x29]
+          vfnmsub132ps (%rcx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps 2032(%rdx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0x9e,0x6a,0x7f]
+          vfnmsub132ps 2032(%rdx), %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps 2048(%rdx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0x9e,0xaa,0x00,0x08,0x00,0x00]
+          vfnmsub132ps 2048(%rdx), %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps -2048(%rdx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0x9e,0x6a,0x80]
+          vfnmsub132ps -2048(%rdx), %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps -2064(%rdx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0x9e,0xaa,0xf0,0xf7,0xff,0xff]
+          vfnmsub132ps -2064(%rdx), %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps 508(%rdx){1to4}, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0x9e,0x6a,0x7f]
+          vfnmsub132ps 508(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps 512(%rdx){1to4}, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0x9e,0xaa,0x00,0x02,0x00,0x00]
+          vfnmsub132ps 512(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps -512(%rdx){1to4}, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0x9e,0x6a,0x80]
+          vfnmsub132ps -512(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps -516(%rdx){1to4}, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0x9e,0xaa,0xfc,0xfd,0xff,0xff]
+          vfnmsub132ps -516(%rdx){1to4}, %xmm25, %xmm21
+
+// CHECK: vfnmsub132ps %ymm22, %ymm24, %ymm18
+// CHECK:  encoding: [0x62,0xa2,0x3d,0x20,0x9e,0xd6]
+          vfnmsub132ps %ymm22, %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps %ymm22, %ymm24, %ymm18 {%k5}
+// CHECK:  encoding: [0x62,0xa2,0x3d,0x25,0x9e,0xd6]
+          vfnmsub132ps %ymm22, %ymm24, %ymm18 {%k5}
+
+// CHECK: vfnmsub132ps %ymm22, %ymm24, %ymm18 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa2,0x3d,0xa5,0x9e,0xd6]
+          vfnmsub132ps %ymm22, %ymm24, %ymm18 {%k5} {z}
+
+// CHECK: vfnmsub132ps (%rcx), %ymm24, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x11]
+          vfnmsub132ps (%rcx), %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps 291(%rax,%r14,8), %ymm24, %ymm18
+// CHECK:  encoding: [0x62,0xa2,0x3d,0x20,0x9e,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub132ps 291(%rax,%r14,8), %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps (%rcx){1to8}, %ymm24, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x11]
+          vfnmsub132ps (%rcx){1to8}, %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps 4064(%rdx), %ymm24, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x52,0x7f]
+          vfnmsub132ps 4064(%rdx), %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps 4096(%rdx), %ymm24, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x92,0x00,0x10,0x00,0x00]
+          vfnmsub132ps 4096(%rdx), %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps -4096(%rdx), %ymm24, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x52,0x80]
+          vfnmsub132ps -4096(%rdx), %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps -4128(%rdx), %ymm24, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x9e,0x92,0xe0,0xef,0xff,0xff]
+          vfnmsub132ps -4128(%rdx), %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps 508(%rdx){1to8}, %ymm24, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x52,0x7f]
+          vfnmsub132ps 508(%rdx){1to8}, %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps 512(%rdx){1to8}, %ymm24, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x92,0x00,0x02,0x00,0x00]
+          vfnmsub132ps 512(%rdx){1to8}, %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps -512(%rdx){1to8}, %ymm24, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x52,0x80]
+          vfnmsub132ps -512(%rdx){1to8}, %ymm24, %ymm18
+
+// CHECK: vfnmsub132ps -516(%rdx){1to8}, %ymm24, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x9e,0x92,0xfc,0xfd,0xff,0xff]
+          vfnmsub132ps -516(%rdx){1to8}, %ymm24, %ymm18
+
+// CHECK: vfnmsub132pd %xmm17, %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0xb5,0x00,0x9e,0xd9]
+          vfnmsub132pd %xmm17, %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd %xmm17, %xmm25, %xmm19 {%k4}
+// CHECK:  encoding: [0x62,0xa2,0xb5,0x04,0x9e,0xd9]
+          vfnmsub132pd %xmm17, %xmm25, %xmm19 {%k4}
+
+// CHECK: vfnmsub132pd %xmm17, %xmm25, %xmm19 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa2,0xb5,0x84,0x9e,0xd9]
+          vfnmsub132pd %xmm17, %xmm25, %xmm19 {%k4} {z}
+
+// CHECK: vfnmsub132pd (%rcx), %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x19]
+          vfnmsub132pd (%rcx), %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd 291(%rax,%r14,8), %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0xb5,0x00,0x9e,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub132pd 291(%rax,%r14,8), %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd (%rcx){1to2}, %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x19]
+          vfnmsub132pd (%rcx){1to2}, %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd 2032(%rdx), %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x5a,0x7f]
+          vfnmsub132pd 2032(%rdx), %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd 2048(%rdx), %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x9a,0x00,0x08,0x00,0x00]
+          vfnmsub132pd 2048(%rdx), %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd -2048(%rdx), %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x5a,0x80]
+          vfnmsub132pd -2048(%rdx), %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd -2064(%rdx), %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x00,0x9e,0x9a,0xf0,0xf7,0xff,0xff]
+          vfnmsub132pd -2064(%rdx), %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd 1016(%rdx){1to2}, %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x5a,0x7f]
+          vfnmsub132pd 1016(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd 1024(%rdx){1to2}, %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x9a,0x00,0x04,0x00,0x00]
+          vfnmsub132pd 1024(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd -1024(%rdx){1to2}, %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x5a,0x80]
+          vfnmsub132pd -1024(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd -1032(%rdx){1to2}, %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xb5,0x10,0x9e,0x9a,0xf8,0xfb,0xff,0xff]
+          vfnmsub132pd -1032(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vfnmsub132pd %ymm22, %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x20,0x9e,0xce]
+          vfnmsub132pd %ymm22, %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd %ymm22, %ymm28, %ymm17 {%k5}
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x25,0x9e,0xce]
+          vfnmsub132pd %ymm22, %ymm28, %ymm17 {%k5}
+
+// CHECK: vfnmsub132pd %ymm22, %ymm28, %ymm17 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa2,0x9d,0xa5,0x9e,0xce]
+          vfnmsub132pd %ymm22, %ymm28, %ymm17 {%k5} {z}
+
+// CHECK: vfnmsub132pd (%rcx), %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x09]
+          vfnmsub132pd (%rcx), %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd 291(%rax,%r14,8), %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xa2,0x9d,0x20,0x9e,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub132pd 291(%rax,%r14,8), %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd (%rcx){1to4}, %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x09]
+          vfnmsub132pd (%rcx){1to4}, %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd 4064(%rdx), %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x4a,0x7f]
+          vfnmsub132pd 4064(%rdx), %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd 4096(%rdx), %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x8a,0x00,0x10,0x00,0x00]
+          vfnmsub132pd 4096(%rdx), %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd -4096(%rdx), %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x4a,0x80]
+          vfnmsub132pd -4096(%rdx), %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd -4128(%rdx), %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x20,0x9e,0x8a,0xe0,0xef,0xff,0xff]
+          vfnmsub132pd -4128(%rdx), %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd 1016(%rdx){1to4}, %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x4a,0x7f]
+          vfnmsub132pd 1016(%rdx){1to4}, %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd 1024(%rdx){1to4}, %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x8a,0x00,0x04,0x00,0x00]
+          vfnmsub132pd 1024(%rdx){1to4}, %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd -1024(%rdx){1to4}, %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x4a,0x80]
+          vfnmsub132pd -1024(%rdx){1to4}, %ymm28, %ymm17
+
+// CHECK: vfnmsub132pd -1032(%rdx){1to4}, %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x9d,0x30,0x9e,0x8a,0xf8,0xfb,0xff,0xff]
+          vfnmsub132pd -1032(%rdx){1to4}, %ymm28, %ymm17
+
+// CHECK: vfnmsub213ps %xmm18, %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x22,0x1d,0x00,0xae,0xe2]
+          vfnmsub213ps %xmm18, %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps %xmm18, %xmm28, %xmm28 {%k4}
+// CHECK:  encoding: [0x62,0x22,0x1d,0x04,0xae,0xe2]
+          vfnmsub213ps %xmm18, %xmm28, %xmm28 {%k4}
+
+// CHECK: vfnmsub213ps %xmm18, %xmm28, %xmm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0x1d,0x84,0xae,0xe2]
+          vfnmsub213ps %xmm18, %xmm28, %xmm28 {%k4} {z}
+
+// CHECK: vfnmsub213ps (%rcx), %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0xae,0x21]
+          vfnmsub213ps (%rcx), %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps 291(%rax,%r14,8), %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x22,0x1d,0x00,0xae,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub213ps 291(%rax,%r14,8), %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps (%rcx){1to4}, %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0xae,0x21]
+          vfnmsub213ps (%rcx){1to4}, %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps 2032(%rdx), %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0xae,0x62,0x7f]
+          vfnmsub213ps 2032(%rdx), %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps 2048(%rdx), %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0xae,0xa2,0x00,0x08,0x00,0x00]
+          vfnmsub213ps 2048(%rdx), %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps -2048(%rdx), %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0xae,0x62,0x80]
+          vfnmsub213ps -2048(%rdx), %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps -2064(%rdx), %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0xae,0xa2,0xf0,0xf7,0xff,0xff]
+          vfnmsub213ps -2064(%rdx), %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps 508(%rdx){1to4}, %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0xae,0x62,0x7f]
+          vfnmsub213ps 508(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps 512(%rdx){1to4}, %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0xae,0xa2,0x00,0x02,0x00,0x00]
+          vfnmsub213ps 512(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps -512(%rdx){1to4}, %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0xae,0x62,0x80]
+          vfnmsub213ps -512(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps -516(%rdx){1to4}, %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0xae,0xa2,0xfc,0xfd,0xff,0xff]
+          vfnmsub213ps -516(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vfnmsub213ps %ymm23, %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xa2,0x35,0x20,0xae,0xe7]
+          vfnmsub213ps %ymm23, %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps %ymm23, %ymm25, %ymm20 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x35,0x21,0xae,0xe7]
+          vfnmsub213ps %ymm23, %ymm25, %ymm20 {%k1}
+
+// CHECK: vfnmsub213ps %ymm23, %ymm25, %ymm20 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0x35,0xa1,0xae,0xe7]
+          vfnmsub213ps %ymm23, %ymm25, %ymm20 {%k1} {z}
+
+// CHECK: vfnmsub213ps (%rcx), %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x35,0x20,0xae,0x21]
+          vfnmsub213ps (%rcx), %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps 291(%rax,%r14,8), %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xa2,0x35,0x20,0xae,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub213ps 291(%rax,%r14,8), %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps (%rcx){1to8}, %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x35,0x30,0xae,0x21]
+          vfnmsub213ps (%rcx){1to8}, %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps 4064(%rdx), %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x35,0x20,0xae,0x62,0x7f]
+          vfnmsub213ps 4064(%rdx), %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps 4096(%rdx), %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x35,0x20,0xae,0xa2,0x00,0x10,0x00,0x00]
+          vfnmsub213ps 4096(%rdx), %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps -4096(%rdx), %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x35,0x20,0xae,0x62,0x80]
+          vfnmsub213ps -4096(%rdx), %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps -4128(%rdx), %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x35,0x20,0xae,0xa2,0xe0,0xef,0xff,0xff]
+          vfnmsub213ps -4128(%rdx), %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps 508(%rdx){1to8}, %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x35,0x30,0xae,0x62,0x7f]
+          vfnmsub213ps 508(%rdx){1to8}, %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps 512(%rdx){1to8}, %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x35,0x30,0xae,0xa2,0x00,0x02,0x00,0x00]
+          vfnmsub213ps 512(%rdx){1to8}, %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps -512(%rdx){1to8}, %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x35,0x30,0xae,0x62,0x80]
+          vfnmsub213ps -512(%rdx){1to8}, %ymm25, %ymm20
+
+// CHECK: vfnmsub213ps -516(%rdx){1to8}, %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x35,0x30,0xae,0xa2,0xfc,0xfd,0xff,0xff]
+          vfnmsub213ps -516(%rdx){1to8}, %ymm25, %ymm20
+
+// CHECK: vfnmsub213pd %xmm25, %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0x82,0xf5,0x00,0xae,0xe1]
+          vfnmsub213pd %xmm25, %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd %xmm25, %xmm17, %xmm20 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xf5,0x01,0xae,0xe1]
+          vfnmsub213pd %xmm25, %xmm17, %xmm20 {%k1}
+
+// CHECK: vfnmsub213pd %xmm25, %xmm17, %xmm20 {%k1} {z}
+// CHECK:  encoding: [0x62,0x82,0xf5,0x81,0xae,0xe1]
+          vfnmsub213pd %xmm25, %xmm17, %xmm20 {%k1} {z}
+
+// CHECK: vfnmsub213pd (%rcx), %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x00,0xae,0x21]
+          vfnmsub213pd (%rcx), %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd 291(%rax,%r14,8), %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xa2,0xf5,0x00,0xae,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub213pd 291(%rax,%r14,8), %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd (%rcx){1to2}, %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x10,0xae,0x21]
+          vfnmsub213pd (%rcx){1to2}, %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd 2032(%rdx), %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x00,0xae,0x62,0x7f]
+          vfnmsub213pd 2032(%rdx), %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd 2048(%rdx), %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x00,0xae,0xa2,0x00,0x08,0x00,0x00]
+          vfnmsub213pd 2048(%rdx), %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd -2048(%rdx), %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x00,0xae,0x62,0x80]
+          vfnmsub213pd -2048(%rdx), %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd -2064(%rdx), %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x00,0xae,0xa2,0xf0,0xf7,0xff,0xff]
+          vfnmsub213pd -2064(%rdx), %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd 1016(%rdx){1to2}, %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x10,0xae,0x62,0x7f]
+          vfnmsub213pd 1016(%rdx){1to2}, %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd 1024(%rdx){1to2}, %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x10,0xae,0xa2,0x00,0x04,0x00,0x00]
+          vfnmsub213pd 1024(%rdx){1to2}, %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd -1024(%rdx){1to2}, %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x10,0xae,0x62,0x80]
+          vfnmsub213pd -1024(%rdx){1to2}, %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd -1032(%rdx){1to2}, %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0xf5,0x10,0xae,0xa2,0xf8,0xfb,0xff,0xff]
+          vfnmsub213pd -1032(%rdx){1to2}, %xmm17, %xmm20
+
+// CHECK: vfnmsub213pd %ymm28, %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0x82,0xdd,0x20,0xae,0xdc]
+          vfnmsub213pd %ymm28, %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd %ymm28, %ymm20, %ymm19 {%k7}
+// CHECK:  encoding: [0x62,0x82,0xdd,0x27,0xae,0xdc]
+          vfnmsub213pd %ymm28, %ymm20, %ymm19 {%k7}
+
+// CHECK: vfnmsub213pd %ymm28, %ymm20, %ymm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0xdd,0xa7,0xae,0xdc]
+          vfnmsub213pd %ymm28, %ymm20, %ymm19 {%k7} {z}
+
+// CHECK: vfnmsub213pd (%rcx), %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xdd,0x20,0xae,0x19]
+          vfnmsub213pd (%rcx), %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd 291(%rax,%r14,8), %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xa2,0xdd,0x20,0xae,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub213pd 291(%rax,%r14,8), %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd (%rcx){1to4}, %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xdd,0x30,0xae,0x19]
+          vfnmsub213pd (%rcx){1to4}, %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd 4064(%rdx), %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xdd,0x20,0xae,0x5a,0x7f]
+          vfnmsub213pd 4064(%rdx), %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd 4096(%rdx), %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xdd,0x20,0xae,0x9a,0x00,0x10,0x00,0x00]
+          vfnmsub213pd 4096(%rdx), %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd -4096(%rdx), %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xdd,0x20,0xae,0x5a,0x80]
+          vfnmsub213pd -4096(%rdx), %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd -4128(%rdx), %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xdd,0x20,0xae,0x9a,0xe0,0xef,0xff,0xff]
+          vfnmsub213pd -4128(%rdx), %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd 1016(%rdx){1to4}, %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xdd,0x30,0xae,0x5a,0x7f]
+          vfnmsub213pd 1016(%rdx){1to4}, %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd 1024(%rdx){1to4}, %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xdd,0x30,0xae,0x9a,0x00,0x04,0x00,0x00]
+          vfnmsub213pd 1024(%rdx){1to4}, %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd -1024(%rdx){1to4}, %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xdd,0x30,0xae,0x5a,0x80]
+          vfnmsub213pd -1024(%rdx){1to4}, %ymm20, %ymm19
+
+// CHECK: vfnmsub213pd -1032(%rdx){1to4}, %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xdd,0x30,0xae,0x9a,0xf8,0xfb,0xff,0xff]
+          vfnmsub213pd -1032(%rdx){1to4}, %ymm20, %ymm19
+
+// CHECK: vfnmsub231ps %xmm26, %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0x82,0x25,0x00,0xbe,0xd2]
+          vfnmsub231ps %xmm26, %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps %xmm26, %xmm27, %xmm18 {%k2}
+// CHECK:  encoding: [0x62,0x82,0x25,0x02,0xbe,0xd2]
+          vfnmsub231ps %xmm26, %xmm27, %xmm18 {%k2}
+
+// CHECK: vfnmsub231ps %xmm26, %xmm27, %xmm18 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0x25,0x82,0xbe,0xd2]
+          vfnmsub231ps %xmm26, %xmm27, %xmm18 {%k2} {z}
+
+// CHECK: vfnmsub231ps (%rcx), %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0xbe,0x11]
+          vfnmsub231ps (%rcx), %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps 291(%rax,%r14,8), %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0xa2,0x25,0x00,0xbe,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub231ps 291(%rax,%r14,8), %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps (%rcx){1to4}, %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0xbe,0x11]
+          vfnmsub231ps (%rcx){1to4}, %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps 2032(%rdx), %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0xbe,0x52,0x7f]
+          vfnmsub231ps 2032(%rdx), %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps 2048(%rdx), %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0xbe,0x92,0x00,0x08,0x00,0x00]
+          vfnmsub231ps 2048(%rdx), %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps -2048(%rdx), %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0xbe,0x52,0x80]
+          vfnmsub231ps -2048(%rdx), %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps -2064(%rdx), %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0xbe,0x92,0xf0,0xf7,0xff,0xff]
+          vfnmsub231ps -2064(%rdx), %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps 508(%rdx){1to4}, %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0xbe,0x52,0x7f]
+          vfnmsub231ps 508(%rdx){1to4}, %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps 512(%rdx){1to4}, %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0xbe,0x92,0x00,0x02,0x00,0x00]
+          vfnmsub231ps 512(%rdx){1to4}, %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps -512(%rdx){1to4}, %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0xbe,0x52,0x80]
+          vfnmsub231ps -512(%rdx){1to4}, %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps -516(%rdx){1to4}, %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0xbe,0x92,0xfc,0xfd,0xff,0xff]
+          vfnmsub231ps -516(%rdx){1to4}, %xmm27, %xmm18
+
+// CHECK: vfnmsub231ps %ymm18, %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x22,0x5d,0x20,0xbe,0xf2]
+          vfnmsub231ps %ymm18, %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps %ymm18, %ymm20, %ymm30 {%k1}
+// CHECK:  encoding: [0x62,0x22,0x5d,0x21,0xbe,0xf2]
+          vfnmsub231ps %ymm18, %ymm20, %ymm30 {%k1}
+
+// CHECK: vfnmsub231ps %ymm18, %ymm20, %ymm30 {%k1} {z}
+// CHECK:  encoding: [0x62,0x22,0x5d,0xa1,0xbe,0xf2]
+          vfnmsub231ps %ymm18, %ymm20, %ymm30 {%k1} {z}
+
+// CHECK: vfnmsub231ps (%rcx), %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x20,0xbe,0x31]
+          vfnmsub231ps (%rcx), %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps 291(%rax,%r14,8), %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x22,0x5d,0x20,0xbe,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub231ps 291(%rax,%r14,8), %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps (%rcx){1to8}, %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x30,0xbe,0x31]
+          vfnmsub231ps (%rcx){1to8}, %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps 4064(%rdx), %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x20,0xbe,0x72,0x7f]
+          vfnmsub231ps 4064(%rdx), %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps 4096(%rdx), %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x20,0xbe,0xb2,0x00,0x10,0x00,0x00]
+          vfnmsub231ps 4096(%rdx), %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps -4096(%rdx), %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x20,0xbe,0x72,0x80]
+          vfnmsub231ps -4096(%rdx), %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps -4128(%rdx), %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x20,0xbe,0xb2,0xe0,0xef,0xff,0xff]
+          vfnmsub231ps -4128(%rdx), %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps 508(%rdx){1to8}, %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x30,0xbe,0x72,0x7f]
+          vfnmsub231ps 508(%rdx){1to8}, %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps 512(%rdx){1to8}, %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x30,0xbe,0xb2,0x00,0x02,0x00,0x00]
+          vfnmsub231ps 512(%rdx){1to8}, %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps -512(%rdx){1to8}, %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x30,0xbe,0x72,0x80]
+          vfnmsub231ps -512(%rdx){1to8}, %ymm20, %ymm30
+
+// CHECK: vfnmsub231ps -516(%rdx){1to8}, %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x5d,0x30,0xbe,0xb2,0xfc,0xfd,0xff,0xff]
+          vfnmsub231ps -516(%rdx){1to8}, %ymm20, %ymm30
+
+// CHECK: vfnmsub231pd %xmm25, %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0x82,0xe5,0x00,0xbe,0xf9]
+          vfnmsub231pd %xmm25, %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd %xmm25, %xmm19, %xmm23 {%k3}
+// CHECK:  encoding: [0x62,0x82,0xe5,0x03,0xbe,0xf9]
+          vfnmsub231pd %xmm25, %xmm19, %xmm23 {%k3}
+
+// CHECK: vfnmsub231pd %xmm25, %xmm19, %xmm23 {%k3} {z}
+// CHECK:  encoding: [0x62,0x82,0xe5,0x83,0xbe,0xf9]
+          vfnmsub231pd %xmm25, %xmm19, %xmm23 {%k3} {z}
+
+// CHECK: vfnmsub231pd (%rcx), %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x00,0xbe,0x39]
+          vfnmsub231pd (%rcx), %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd 291(%rax,%r14,8), %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xa2,0xe5,0x00,0xbe,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub231pd 291(%rax,%r14,8), %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd (%rcx){1to2}, %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x10,0xbe,0x39]
+          vfnmsub231pd (%rcx){1to2}, %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd 2032(%rdx), %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x00,0xbe,0x7a,0x7f]
+          vfnmsub231pd 2032(%rdx), %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd 2048(%rdx), %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x00,0xbe,0xba,0x00,0x08,0x00,0x00]
+          vfnmsub231pd 2048(%rdx), %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd -2048(%rdx), %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x00,0xbe,0x7a,0x80]
+          vfnmsub231pd -2048(%rdx), %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd -2064(%rdx), %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x00,0xbe,0xba,0xf0,0xf7,0xff,0xff]
+          vfnmsub231pd -2064(%rdx), %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd 1016(%rdx){1to2}, %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x10,0xbe,0x7a,0x7f]
+          vfnmsub231pd 1016(%rdx){1to2}, %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd 1024(%rdx){1to2}, %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x10,0xbe,0xba,0x00,0x04,0x00,0x00]
+          vfnmsub231pd 1024(%rdx){1to2}, %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd -1024(%rdx){1to2}, %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x10,0xbe,0x7a,0x80]
+          vfnmsub231pd -1024(%rdx){1to2}, %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd -1032(%rdx){1to2}, %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xe5,0x10,0xbe,0xba,0xf8,0xfb,0xff,0xff]
+          vfnmsub231pd -1032(%rdx){1to2}, %xmm19, %xmm23
+
+// CHECK: vfnmsub231pd %ymm20, %ymm18, %ymm22
+// CHECK:  encoding: [0x62,0xa2,0xed,0x20,0xbe,0xf4]
+          vfnmsub231pd %ymm20, %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd %ymm20, %ymm18, %ymm22 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xed,0x21,0xbe,0xf4]
+          vfnmsub231pd %ymm20, %ymm18, %ymm22 {%k1}
+
+// CHECK: vfnmsub231pd %ymm20, %ymm18, %ymm22 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0xed,0xa1,0xbe,0xf4]
+          vfnmsub231pd %ymm20, %ymm18, %ymm22 {%k1} {z}
+
+// CHECK: vfnmsub231pd (%rcx), %ymm18, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xed,0x20,0xbe,0x31]
+          vfnmsub231pd (%rcx), %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd 291(%rax,%r14,8), %ymm18, %ymm22
+// CHECK:  encoding: [0x62,0xa2,0xed,0x20,0xbe,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vfnmsub231pd 291(%rax,%r14,8), %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd (%rcx){1to4}, %ymm18, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xed,0x30,0xbe,0x31]
+          vfnmsub231pd (%rcx){1to4}, %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd 4064(%rdx), %ymm18, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xed,0x20,0xbe,0x72,0x7f]
+          vfnmsub231pd 4064(%rdx), %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd 4096(%rdx), %ymm18, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xed,0x20,0xbe,0xb2,0x00,0x10,0x00,0x00]
+          vfnmsub231pd 4096(%rdx), %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd -4096(%rdx), %ymm18, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xed,0x20,0xbe,0x72,0x80]
+          vfnmsub231pd -4096(%rdx), %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd -4128(%rdx), %ymm18, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xed,0x20,0xbe,0xb2,0xe0,0xef,0xff,0xff]
+          vfnmsub231pd -4128(%rdx), %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd 1016(%rdx){1to4}, %ymm18, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xed,0x30,0xbe,0x72,0x7f]
+          vfnmsub231pd 1016(%rdx){1to4}, %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd 1024(%rdx){1to4}, %ymm18, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xed,0x30,0xbe,0xb2,0x00,0x04,0x00,0x00]
+          vfnmsub231pd 1024(%rdx){1to4}, %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd -1024(%rdx){1to4}, %ymm18, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xed,0x30,0xbe,0x72,0x80]
+          vfnmsub231pd -1024(%rdx){1to4}, %ymm18, %ymm22
+
+// CHECK: vfnmsub231pd -1032(%rdx){1to4}, %ymm18, %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xed,0x30,0xbe,0xb2,0xf8,0xfb,0xff,0xff]
+          vfnmsub231pd -1032(%rdx){1to4}, %ymm18, %ymm22
+
 // CHECK: vpermi2d %xmm25, %xmm23, %xmm21
 // CHECK:  encoding: [0x62,0x82,0x45,0x00,0x76,0xe9]
           vpermi2d %xmm25, %xmm23, %xmm21
@@ -12028,3 +16060,227 @@ vaddpd  {rz-sae}, %zmm2, %zmm1, %zmm1
 // CHECK: vpermt2pd -1032(%rdx){1to4}, %ymm23, %ymm17
 // CHECK:  encoding: [0x62,0xe2,0xc5,0x30,0x7f,0x8a,0xf8,0xfb,0xff,0xff]
           vpermt2pd -1032(%rdx){1to4}, %ymm23, %ymm17
+
+// CHECK: vscalefpd %xmm17, %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x22,0xf5,0x00,0x2c,0xc1]
+          vscalefpd %xmm17, %xmm17, %xmm24
+
+// CHECK: vscalefpd %xmm17, %xmm17, %xmm24 {%k2}
+// CHECK:  encoding: [0x62,0x22,0xf5,0x02,0x2c,0xc1]
+          vscalefpd %xmm17, %xmm17, %xmm24 {%k2}
+
+// CHECK: vscalefpd %xmm17, %xmm17, %xmm24 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0xf5,0x82,0x2c,0xc1]
+          vscalefpd %xmm17, %xmm17, %xmm24 {%k2} {z}
+
+// CHECK: vscalefpd (%rcx), %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x2c,0x01]
+          vscalefpd (%rcx), %xmm17, %xmm24
+
+// CHECK: vscalefpd 291(%rax,%r14,8), %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x22,0xf5,0x00,0x2c,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vscalefpd 291(%rax,%r14,8), %xmm17, %xmm24
+
+// CHECK: vscalefpd (%rcx){1to2}, %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x2c,0x01]
+          vscalefpd (%rcx){1to2}, %xmm17, %xmm24
+
+// CHECK: vscalefpd 2032(%rdx), %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x2c,0x42,0x7f]
+          vscalefpd 2032(%rdx), %xmm17, %xmm24
+
+// CHECK: vscalefpd 2048(%rdx), %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x2c,0x82,0x00,0x08,0x00,0x00]
+          vscalefpd 2048(%rdx), %xmm17, %xmm24
+
+// CHECK: vscalefpd -2048(%rdx), %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x2c,0x42,0x80]
+          vscalefpd -2048(%rdx), %xmm17, %xmm24
+
+// CHECK: vscalefpd -2064(%rdx), %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x2c,0x82,0xf0,0xf7,0xff,0xff]
+          vscalefpd -2064(%rdx), %xmm17, %xmm24
+
+// CHECK: vscalefpd 1016(%rdx){1to2}, %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x2c,0x42,0x7f]
+          vscalefpd 1016(%rdx){1to2}, %xmm17, %xmm24
+
+// CHECK: vscalefpd 1024(%rdx){1to2}, %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x2c,0x82,0x00,0x04,0x00,0x00]
+          vscalefpd 1024(%rdx){1to2}, %xmm17, %xmm24
+
+// CHECK: vscalefpd -1024(%rdx){1to2}, %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x2c,0x42,0x80]
+          vscalefpd -1024(%rdx){1to2}, %xmm17, %xmm24
+
+// CHECK: vscalefpd -1032(%rdx){1to2}, %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x2c,0x82,0xf8,0xfb,0xff,0xff]
+          vscalefpd -1032(%rdx){1to2}, %xmm17, %xmm24
+
+// CHECK: vscalefpd %ymm26, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x02,0xdd,0x20,0x2c,0xea]
+          vscalefpd %ymm26, %ymm20, %ymm29
+
+// CHECK: vscalefpd %ymm26, %ymm20, %ymm29 {%k2}
+// CHECK:  encoding: [0x62,0x02,0xdd,0x22,0x2c,0xea]
+          vscalefpd %ymm26, %ymm20, %ymm29 {%k2}
+
+// CHECK: vscalefpd %ymm26, %ymm20, %ymm29 {%k2} {z}
+// CHECK:  encoding: [0x62,0x02,0xdd,0xa2,0x2c,0xea]
+          vscalefpd %ymm26, %ymm20, %ymm29 {%k2} {z}
+
+// CHECK: vscalefpd (%rcx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0x2c,0x29]
+          vscalefpd (%rcx), %ymm20, %ymm29
+
+// CHECK: vscalefpd 291(%rax,%r14,8), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x22,0xdd,0x20,0x2c,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vscalefpd 291(%rax,%r14,8), %ymm20, %ymm29
+
+// CHECK: vscalefpd (%rcx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0x2c,0x29]
+          vscalefpd (%rcx){1to4}, %ymm20, %ymm29
+
+// CHECK: vscalefpd 4064(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0x2c,0x6a,0x7f]
+          vscalefpd 4064(%rdx), %ymm20, %ymm29
+
+// CHECK: vscalefpd 4096(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0x2c,0xaa,0x00,0x10,0x00,0x00]
+          vscalefpd 4096(%rdx), %ymm20, %ymm29
+
+// CHECK: vscalefpd -4096(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0x2c,0x6a,0x80]
+          vscalefpd -4096(%rdx), %ymm20, %ymm29
+
+// CHECK: vscalefpd -4128(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0x2c,0xaa,0xe0,0xef,0xff,0xff]
+          vscalefpd -4128(%rdx), %ymm20, %ymm29
+
+// CHECK: vscalefpd 1016(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0x2c,0x6a,0x7f]
+          vscalefpd 1016(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vscalefpd 1024(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0x2c,0xaa,0x00,0x04,0x00,0x00]
+          vscalefpd 1024(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vscalefpd -1024(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0x2c,0x6a,0x80]
+          vscalefpd -1024(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vscalefpd -1032(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0x2c,0xaa,0xf8,0xfb,0xff,0xff]
+          vscalefpd -1032(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vscalefps %xmm22, %xmm27, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x25,0x00,0x2c,0xde]
+          vscalefps %xmm22, %xmm27, %xmm19
+
+// CHECK: vscalefps %xmm22, %xmm27, %xmm19 {%k4}
+// CHECK:  encoding: [0x62,0xa2,0x25,0x04,0x2c,0xde]
+          vscalefps %xmm22, %xmm27, %xmm19 {%k4}
+
+// CHECK: vscalefps %xmm22, %xmm27, %xmm19 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa2,0x25,0x84,0x2c,0xde]
+          vscalefps %xmm22, %xmm27, %xmm19 {%k4} {z}
+
+// CHECK: vscalefps (%rcx), %xmm27, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0x2c,0x19]
+          vscalefps (%rcx), %xmm27, %xmm19
+
+// CHECK: vscalefps 291(%rax,%r14,8), %xmm27, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x25,0x00,0x2c,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vscalefps 291(%rax,%r14,8), %xmm27, %xmm19
+
+// CHECK: vscalefps (%rcx){1to4}, %xmm27, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0x2c,0x19]
+          vscalefps (%rcx){1to4}, %xmm27, %xmm19
+
+// CHECK: vscalefps 2032(%rdx), %xmm27, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0x2c,0x5a,0x7f]
+          vscalefps 2032(%rdx), %xmm27, %xmm19
+
+// CHECK: vscalefps 2048(%rdx), %xmm27, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0x2c,0x9a,0x00,0x08,0x00,0x00]
+          vscalefps 2048(%rdx), %xmm27, %xmm19
+
+// CHECK: vscalefps -2048(%rdx), %xmm27, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0x2c,0x5a,0x80]
+          vscalefps -2048(%rdx), %xmm27, %xmm19
+
+// CHECK: vscalefps -2064(%rdx), %xmm27, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x25,0x00,0x2c,0x9a,0xf0,0xf7,0xff,0xff]
+          vscalefps -2064(%rdx), %xmm27, %xmm19
+
+// CHECK: vscalefps 508(%rdx){1to4}, %xmm27, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0x2c,0x5a,0x7f]
+          vscalefps 508(%rdx){1to4}, %xmm27, %xmm19
+
+// CHECK: vscalefps 512(%rdx){1to4}, %xmm27, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0x2c,0x9a,0x00,0x02,0x00,0x00]
+          vscalefps 512(%rdx){1to4}, %xmm27, %xmm19
+
+// CHECK: vscalefps -512(%rdx){1to4}, %xmm27, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0x2c,0x5a,0x80]
+          vscalefps -512(%rdx){1to4}, %xmm27, %xmm19
+
+// CHECK: vscalefps -516(%rdx){1to4}, %xmm27, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x25,0x10,0x2c,0x9a,0xfc,0xfd,0xff,0xff]
+          vscalefps -516(%rdx){1to4}, %xmm27, %xmm19
+
+// CHECK: vscalefps %ymm23, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x4d,0x20,0x2c,0xcf]
+          vscalefps %ymm23, %ymm22, %ymm25
+
+// CHECK: vscalefps %ymm23, %ymm22, %ymm25 {%k4}
+// CHECK:  encoding: [0x62,0x22,0x4d,0x24,0x2c,0xcf]
+          vscalefps %ymm23, %ymm22, %ymm25 {%k4}
+
+// CHECK: vscalefps %ymm23, %ymm22, %ymm25 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0x4d,0xa4,0x2c,0xcf]
+          vscalefps %ymm23, %ymm22, %ymm25 {%k4} {z}
+
+// CHECK: vscalefps (%rcx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0x2c,0x09]
+          vscalefps (%rcx), %ymm22, %ymm25
+
+// CHECK: vscalefps 291(%rax,%r14,8), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x4d,0x20,0x2c,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vscalefps 291(%rax,%r14,8), %ymm22, %ymm25
+
+// CHECK: vscalefps (%rcx){1to8}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x4d,0x30,0x2c,0x09]
+          vscalefps (%rcx){1to8}, %ymm22, %ymm25
+
+// CHECK: vscalefps 4064(%rdx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0x2c,0x4a,0x7f]
+          vscalefps 4064(%rdx), %ymm22, %ymm25
+
+// CHECK: vscalefps 4096(%rdx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0x2c,0x8a,0x00,0x10,0x00,0x00]
+          vscalefps 4096(%rdx), %ymm22, %ymm25
+
+// CHECK: vscalefps -4096(%rdx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0x2c,0x4a,0x80]
+          vscalefps -4096(%rdx), %ymm22, %ymm25
+
+// CHECK: vscalefps -4128(%rdx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0x2c,0x8a,0xe0,0xef,0xff,0xff]
+          vscalefps -4128(%rdx), %ymm22, %ymm25
+
+// CHECK: vscalefps 508(%rdx){1to8}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x4d,0x30,0x2c,0x4a,0x7f]
+          vscalefps 508(%rdx){1to8}, %ymm22, %ymm25
+
+// CHECK: vscalefps 512(%rdx){1to8}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x4d,0x30,0x2c,0x8a,0x00,0x02,0x00,0x00]
+          vscalefps 512(%rdx){1to8}, %ymm22, %ymm25
+
+// CHECK: vscalefps -512(%rdx){1to8}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x4d,0x30,0x2c,0x4a,0x80]
+          vscalefps -512(%rdx){1to8}, %ymm22, %ymm25
+
+// CHECK: vscalefps -516(%rdx){1to8}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x4d,0x30,0x2c,0x8a,0xfc,0xfd,0xff,0xff]
+          vscalefps -516(%rdx){1to8}, %ymm22, %ymm25
diff --git a/test/Object/ARM/nm-mapping-symbol.s b/test/Object/ARM/nm-mapping-symbol.s
new file mode 100644
index 0000000..485c1cc
--- /dev/null
+++ b/test/Object/ARM/nm-mapping-symbol.s
@@ -0,0 +1,11 @@
+// RUN: llvm-mc %s -o %t.o -filetype=obj -triple=armv7-pc-linux
+// RUN: llvm-readobj -t %t.o | FileCheck %s
+// RUN: llvm-nm %t.o | FileCheck -allow-empty --check-prefix=NM %s
+
+// Test that nm doesn't print the mapping symbols
+
+// CHECK: Name: $d.0
+// NM-NOT: $d.0
+
+        .section        .foobar,"",%progbits
+        .asciz  "foo"
diff --git a/test/Object/Inputs/invalid-section-index.elf b/test/Object/Inputs/invalid-section-index.elf
new file mode 100644
index 0000000..a019d8a
Binary files /dev/null and b/test/Object/Inputs/invalid-section-index.elf differ
diff --git a/test/Object/Inputs/invalid-section-size.elf b/test/Object/Inputs/invalid-section-size.elf
new file mode 100644
index 0000000..c111a4c
Binary files /dev/null and b/test/Object/Inputs/invalid-section-size.elf differ
diff --git a/test/Object/Inputs/invalid-sh_entsize.elf b/test/Object/Inputs/invalid-sh_entsize.elf
new file mode 100755
index 0000000..9ea8073
Binary files /dev/null and b/test/Object/Inputs/invalid-sh_entsize.elf differ
diff --git a/test/Object/Inputs/invalid-strtab-non-null.elf b/test/Object/Inputs/invalid-strtab-non-null.elf
new file mode 100644
index 0000000..f52c0a1
Binary files /dev/null and b/test/Object/Inputs/invalid-strtab-non-null.elf differ
diff --git a/test/Object/Inputs/invalid-strtab-size.elf b/test/Object/Inputs/invalid-strtab-size.elf
new file mode 100644
index 0000000..fb19746
Binary files /dev/null and b/test/Object/Inputs/invalid-strtab-size.elf differ
diff --git a/test/Object/Inputs/invalid-strtab-type.elf b/test/Object/Inputs/invalid-strtab-type.elf
new file mode 100644
index 0000000..2a072eb
Binary files /dev/null and b/test/Object/Inputs/invalid-strtab-type.elf differ
diff --git a/test/Object/Inputs/stackmap-test.macho-x86-64 b/test/Object/Inputs/stackmap-test.macho-x86-64
new file mode 100644
index 0000000..588c5aa
Binary files /dev/null and b/test/Object/Inputs/stackmap-test.macho-x86-64 differ
diff --git a/test/Object/X86/nm-print-size.s b/test/Object/X86/nm-print-size.s
new file mode 100644
index 0000000..b83360196
--- /dev/null
+++ b/test/Object/X86/nm-print-size.s
@@ -0,0 +1,12 @@
+// RUN: llvm-mc %s -o %t -filetype=obj -triple=x86_64-pc-linux
+// RUN: llvm-nm --print-size %t | FileCheck %s
+
+// CHECK: 0000000000000000 ffffffffffffffff n a
+// CHECK: 0000000000000000 0000000000000000 N b
+
+        .section foo
+a:
+        .size a, 0xffffffffffffffff
+
+        .global b
+b:
diff --git a/test/Object/dllimport-globalref.ll b/test/Object/dllimport-globalref.ll
new file mode 100644
index 0000000..dd518bc
--- /dev/null
+++ b/test/Object/dllimport-globalref.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as %s -o - | llvm-nm - | FileCheck %s
+
+; We should technically emit an unmangled reference to f here,
+; but no existing linker needs this.
+
+; XFAIL: *
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; CHECK: U f
+
+declare dllimport void @f()
+@fp = constant void ()* @f
diff --git a/test/Object/invalid.test b/test/Object/invalid.test
index 73bf62a..1d5a70b 100644
--- a/test/Object/invalid.test
+++ b/test/Object/invalid.test
@@ -1,2 +1,46 @@
 RUN: not llvm-dwarfdump %p/Inputs/invalid-bad-rel-type.elf 2>&1 | FileCheck %s
+RUN: not llvm-objdump -s %p/Inputs/invalid-strtab-type.elf 2>&1 | FileCheck %s
+RUN: not llvm-objdump -s %p/Inputs/invalid-strtab-size.elf 2>&1 | FileCheck %s
 CHECK: Invalid data was encountered while parsing the file
+
+RUN: not llvm-objdump -s %p/Inputs/invalid-strtab-non-null.elf 2>&1 | FileCheck --check-prefix=NON-NULL %s
+NON-NULL: String table must end with a null terminator
+
+Test the sh_entsize are invalid
+RUN: llvm-readobj -s %p/Inputs/invalid-sh_entsize.elf | FileCheck --check-prefix=SECTION %s
+
+SECTION:         Name: .dynsym
+SECTION-NEXT:    Type: SHT_DYNSYM
+SECTION-NEXT:    Flags [
+SECTION-NEXT:      SHF_ALLOC
+SECTION-NEXT:    ]
+SECTION-NEXT:    Address:
+SECTION-NEXT:    Offset:
+SECTION-NEXT:    Size:
+SECTION-NEXT:    Link:
+SECTION-NEXT:    Info:
+SECTION-NEXT:    AddressAlignment:
+SECTION-NEXT:    EntrySize: 32
+
+SECTION:         Name: .symtab
+SECTION-NEXT:    Type: SHT_SYMTAB
+SECTION-NEXT:    Flags [
+SECTION-NEXT:    ]
+SECTION-NEXT:    Address:
+SECTION-NEXT:    Offset:
+SECTION-NEXT:    Size:
+SECTION-NEXT:    Link:
+SECTION-NEXT:    Info:
+SECTION-NEXT:    AddressAlignment:
+SECTION-NEXT:    EntrySize: 32
+
+RUN: not llvm-readobj -t %p/Inputs/invalid-sh_entsize.elf 2>&1 | FileCheck --check-prefix=INVALID-SYM-SIZE %s
+RUN: not llvm-readobj --dyn-symbols %p/Inputs/invalid-sh_entsize.elf 2>&1 | FileCheck --check-prefix=INVALID-SYM-SIZE %s
+INVALID-SYM-SIZE: Invalid symbol size
+
+RUN: not llvm-readobj -t %p/Inputs/invalid-section-index.elf 2>&1 | FileCheck --check-prefix=INVALID-SECTION-INDEX %s
+
+INVALID-SECTION-INDEX: Invalid section index
+
+RUN: not llvm-readobj -s %p/Inputs/invalid-section-size.elf 2>&1 | FileCheck --check-prefix=INVALID-SECTION-SIZE %s
+INVALID-SECTION-SIZE: Invalid section header entry size (e_shentsize) in ELF header
diff --git a/test/Object/lit.local.cfg b/test/Object/lit.local.cfg
index d74d039..ec8ad45 100644
--- a/test/Object/lit.local.cfg
+++ b/test/Object/lit.local.cfg
@@ -1 +1 @@
-config.suffixes = ['.test', '.ll', '.yaml']
+config.suffixes = ['.test', '.ll', '.s', '.yaml']
diff --git a/test/Object/objdump-symbol-table.test b/test/Object/objdump-symbol-table.test
index 3d09e1a..e66faec 100644
--- a/test/Object/objdump-symbol-table.test
+++ b/test/Object/objdump-symbol-table.test
@@ -30,9 +30,9 @@ ELF-i386: 00000000         *UND*  00000000 puts
 
 macho-i386: trivial-object-test.macho-i386:        file format Mach-O 32-bit i386
 macho-i386: SYMBOL TABLE:
-macho-i386: 00000000 g     F __TEXT,__text  00000000 _main
-macho-i386: 00000000         *UND*  00000000 _SomeOtherFunction
-macho-i386: 00000000         *UND*  00000000 _puts
+macho-i386: 00000000 g     F __TEXT,__text  _main
+macho-i386: 00000000         *UND*          _SomeOtherFunction
+macho-i386: 00000000         *UND*          _puts
 
 ELF-shared: shared-object-test.elf-i386:     file format
 ELF-shared: SYMBOL TABLE:
diff --git a/test/Object/relocation-executable.test b/test/Object/relocation-executable.test
index 1236035..38ad596 100644
--- a/test/Object/relocation-executable.test
+++ b/test/Object/relocation-executable.test
@@ -1,5 +1,7 @@
 RUN: llvm-readobj -r -expand-relocs %p/Inputs/hello-world.elf-x86-64 \
 RUN:   | FileCheck %s
+RUN: llvm-readobj -dyn-relocations -expand-relocs \
+RUN:   %p/Inputs/hello-world.elf-x86-64 | FileCheck %s --check-prefix=DYN
 
 // CHECK:     Relocations [
 // CHECK-NEXT:  Section (8) .rela.dyn {
@@ -24,3 +26,12 @@ RUN:   | FileCheck %s
 // CHECK-NEXT:      Addend: 0x0
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
+
+// DYN:      Dynamic Relocations {
+// DYN-NEXT:   Relocation {
+// DYN-NEXT:     Offset: 0x4018D8
+// DYN-NEXT:     Type: R_X86_64_GLOB_DAT (6)
+// DYN-NEXT:     Symbol: __gmon_start__
+// DYN-NEXT:     Addend: 0x0
+// DYN-NEXT:   }
+// DYN-NEXT: }
diff --git a/test/Object/stackmap-dump.test b/test/Object/stackmap-dump.test
new file mode 100644
index 0000000..71710fb
--- /dev/null
+++ b/test/Object/stackmap-dump.test
@@ -0,0 +1,16 @@
+RUN: llvm-readobj -stackmap %p/Inputs/stackmap-test.macho-x86-64 | FileCheck %s
+
+CHECK:      LLVM StackMap Version: 1
+CHECK-NEXT: Num Functions: 1
+CHECK-NEXT:   Function address: 0, stack size: 16
+CHECK-NEXT: Num Constants: 1
+CHECK-NEXT:   #1: 10000000000
+CHECK-NEXT: Num Records: 1
+CHECK-NEXT:   Record ID: 2, instruction offset: 1
+CHECK-NEXT:     5 locations:
+CHECK-NEXT:       #1: Register R#5
+CHECK-NEXT:       #2: Constant 10
+CHECK-NEXT:       #3: ConstantIndex #0 (10000000000)
+CHECK-NEXT:       #4: Direct R#4 + -8
+CHECK-NEXT:       #5: Indirect [R#6 + -16]
+CHECK-NEXT:     1 live-outs: [ R#7 (8-bytes) ]
diff --git a/test/Transforms/GVN/br-identical.ll b/test/Transforms/GVN/br-identical.ll
new file mode 100644
index 0000000..dfb7abe
--- /dev/null
+++ b/test/Transforms/GVN/br-identical.ll
@@ -0,0 +1,38 @@
+; RUN: opt -gvn -S -o - %s | FileCheck %s
+
+; If a branch has two identical successors, we cannot declare either dead.
+
+define void @widget(i1 %p) {
+entry:
+  br label %bb2
+
+bb2:
+  %t1 = phi i64 [ 0, %entry ], [ %t5, %bb7 ]
+  %t2 = add i64 %t1, 1
+  %t3 = icmp ult i64 0, %t2
+  br i1 %t3, label %bb3, label %bb4
+
+bb3:
+  %t4 = call i64 @f()
+  br label %bb4
+
+bb4:
+  ; CHECK-NOT: phi {{.*}} undef
+  %foo = phi i64 [ %t4, %bb3 ], [ 0, %bb2 ]
+  br i1 %p, label %bb5, label %bb6
+
+bb5:
+  br i1 true, label %bb7, label %bb7
+
+bb6:
+  br i1 true, label %bb7, label %bb7
+
+bb7:
+  %t5 = add i64 %t1, 1
+  br i1 %p, label %bb2, label %bb8
+
+bb8:
+  ret void
+}
+
+declare i64 @f()
diff --git a/test/Transforms/GVN/pr12979.ll b/test/Transforms/GVN/pr12979.ll
index 0198a56..919c22d 100644
--- a/test/Transforms/GVN/pr12979.ll
+++ b/test/Transforms/GVN/pr12979.ll
@@ -77,3 +77,17 @@ define i32 @test7(i32 %x, i32 %y) {
   %foo = add i32 %add1, %add2
   ret i32 %foo
 }
+
+declare void @mumble(i2, i2)
+
+define void @test8(i2 %x) {
+; CHECK-LABEL: @test8(
+; CHECK:      %[[ashr:.*]] = ashr i2 %x, 1
+; CHECK-NEXT: call void @mumble(i2 %[[ashr]], i2 %[[ashr]])
+; CHECK-NEXT: ret void
+
+  %ashr0 = ashr exact i2 %x, 1
+  %ashr1 = ashr i2 %x, 1
+  call void @mumble(i2 %ashr0, i2 %ashr1)
+  ret void
+}
diff --git a/test/Transforms/Inline/X86/inline-target-attr.ll b/test/Transforms/Inline/X86/inline-target-attr.ll
new file mode 100644
index 0000000..c59f4b4
--- /dev/null
+++ b/test/Transforms/Inline/X86/inline-target-attr.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -S -inline | FileCheck %s
+; Check that we only inline when we have compatible target attributes.
+; X86 has implemented a target attribute that will verify that the attribute
+; sets are compatible.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo() #0 {
+entry:
+  %call = call i32 (...) @baz()
+  ret i32 %call
+; CHECK-LABEL: foo
+; CHECK: call i32 (...) @baz()
+}
+declare i32 @baz(...) #0
+
+define i32 @bar() #1 {
+entry:
+  %call = call i32 @foo()
+  ret i32 %call
+; CHECK-LABEL: bar
+; CHECK: call i32 (...) @baz()
+}
+
+define i32 @qux() #0 {
+entry:
+  %call = call i32 @bar()
+  ret i32 %call
+; CHECK-LABEL: qux
+; CHECK: call i32 @bar()
+}
+
+attributes #0 = { "target-cpu"="x86-64" "target-features"="+sse,+sse2" }
+attributes #1 = { "target-cpu"="x86-64" "target-features"="+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3" }
diff --git a/test/Transforms/Inline/X86/lit.local.cfg b/test/Transforms/Inline/X86/lit.local.cfg
new file mode 100644
index 0000000..e71f3cc
--- /dev/null
+++ b/test/Transforms/Inline/X86/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/Inline/nonnull.ll b/test/Transforms/Inline/nonnull.ll
new file mode 100644
index 0000000..4aa0c28
--- /dev/null
+++ b/test/Transforms/Inline/nonnull.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -inline %s | FileCheck %s
+
+declare void @foo()
+declare void @bar()
+
+define void @callee(i8* %arg) {
+  %cmp = icmp eq i8* %arg, null
+  br i1 %cmp, label %expensive, label %done
+
+; This block is designed to be too expensive to inline.  We can only inline
+; callee if this block is known to be dead.
+expensive:
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  ret void
+
+done:
+  call void @bar()
+  ret void
+}
+
+; Positive test - arg is known non null
+define void @caller(i8* nonnull %arg) {
+; CHECK-LABEL: @caller
+; CHECK: call void @bar()
+  call void @callee(i8* nonnull %arg)
+  ret void
+}
+
+; Negative test - arg is not known to be non null
+define void @caller2(i8* %arg) {
+; CHECK-LABEL: @caller2
+; CHECK: call void @callee(
+  call void @callee(i8* %arg)
+  ret void
+}
+
diff --git a/test/Transforms/InstCombine/pr23809.ll b/test/Transforms/InstCombine/pr23809.ll
new file mode 100644
index 0000000..06c7ce2
--- /dev/null
+++ b/test/Transforms/InstCombine/pr23809.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; InstCombine should preserve the call to @llvm.assume.
+define i32 @icmp(i32 %a, i32 %b) {
+; CHECK-LABEL: @icmp(
+  %sum = add i32 %a, %b
+  %1 = icmp sge i32 %sum, 0
+  call void @llvm.assume(i1 %1)
+; CHECK: call void @llvm.assume
+  ret i32 %sum
+}
+
+define float @fcmp(float %a, float %b) {
+; CHECK-LABEL: @fcmp(
+  %sum = fadd float %a, %b
+  %1 = fcmp oge float %sum, 0.0
+  call void @llvm.assume(i1 %1)
+; CHECK: call void @llvm.assume
+  ret float %sum
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 8be2472..fdf1199 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -1296,6 +1296,23 @@ entry:
   ret i32 %v
 }
 
+define i32 @test78_neg(i1 %flag, i32* %x, i32* %y, i32* %z) {
+; The same as @test78 but we can't speculate the load because it can trap
+; if under-aligned.
+; CHECK-LABEL: @test78_neg(
+; CHECK: %p = select i1 %flag, i32* %x, i32* %y
+; CHECK-NEXT: %v = load i32, i32* %p, align 16
+; CHECK-NEXT: ret i32 %v
+entry:
+  store i32 0, i32* %x
+  store i32 0, i32* %y
+  ; Block forwarding by storing to %z which could alias either %x or %y.
+  store i32 42, i32* %z
+  %p = select i1 %flag, i32* %x, i32* %y
+  %v = load i32, i32* %p, align 16
+  ret i32 %v
+}
+
 define float @test79(i1 %flag, float* %x, i32* %y, i32* %z) {
 ; Test that we can speculate the loads around the select even when we can't
 ; fold the load completely away.
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index c76d8d0..b1c7b72 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -550,3 +550,25 @@ define i32 @test46(i32 %x, i32 %y) {
 ; CHECK-NEXT: %sub = and i32 %y, %x.not
 ; CHECK: ret i32 %sub
 }
+
+define i32 @test47(i1 %A, i32 %B, i32 %C, i32 %D) {
+  %sel0 = select i1 %A, i32 %D, i32 %B
+  %sel1 = select i1 %A, i32 %C, i32 %B
+  %sub = sub i32 %sel0, %sel1
+  ret i32 %sub
+; CHECK-LABEL: @test47(
+; CHECK-NEXT: %[[sub:.*]] = sub i32 %D, %C
+; CHECK-NEXT: %[[sel:.*]] = select i1 %A, i32 %[[sub]], i32 0
+; CHECK-NEXT: ret i32 %[[sel]]
+}
+
+define i32 @test48(i1 %A, i32 %B, i32 %C, i32 %D) {
+  %sel0 = select i1 %A, i32 %B, i32 %D
+  %sel1 = select i1 %A, i32 %B, i32 %C
+  %sub = sub i32 %sel0, %sel1
+  ret i32 %sub
+; CHECK-LABEL: @test48(
+; CHECK-NEXT: %[[sub:.*]] = sub i32 %D, %C
+; CHECK-NEXT: %[[sel:.*]] = select i1 %A, i32 0, i32 %[[sub]]
+; CHECK-NEXT: ret i32 %[[sel]]
+}
diff --git a/test/Transforms/LoopDistribute/basic-with-memchecks.ll b/test/Transforms/LoopDistribute/basic-with-memchecks.ll
index 4c1c1b8..fde06d3 100644
--- a/test/Transforms/LoopDistribute/basic-with-memchecks.ll
+++ b/test/Transforms/LoopDistribute/basic-with-memchecks.ll
@@ -35,7 +35,7 @@ entry:
 ; We have two compares for each array overlap check which is a total of 10
 ; compares.
 ;
-; CHECK: for.body.ldist.memcheck:
+; CHECK: for.body.lver.memcheck:
 ; CHECK:     = icmp
 ; CHECK:     = icmp
 
@@ -52,14 +52,14 @@ entry:
 ; CHECK:     = icmp
 
 ; CHECK-NOT: = icmp
-; CHECK:     br i1 %memcheck.conflict, label %for.body.ph.ldist.nondist, label %for.body.ph.ldist1
+; CHECK:     br i1 %memcheck.conflict, label %for.body.ph.lver.orig, label %for.body.ph.ldist1
 
 ; The non-distributed loop that the memchecks fall back on.
 
-; CHECK: for.body.ph.ldist.nondist:
-; CHECK:     br label %for.body.ldist.nondist
-; CHECK: for.body.ldist.nondist:
-; CHECK:    br i1 %exitcond.ldist.nondist, label %for.end, label %for.body.ldist.nondist
+; CHECK: for.body.ph.lver.orig:
+; CHECK:     br label %for.body.lver.orig
+; CHECK: for.body.lver.orig:
+; CHECK:    br i1 %exitcond.lver.orig, label %for.end, label %for.body.lver.orig
 
 ; Verify the two distributed loops.
 
diff --git a/test/Transforms/LoopDistribute/outside-use.ll b/test/Transforms/LoopDistribute/outside-use.ll
index 546050d..7a3fe1b 100644
--- a/test/Transforms/LoopDistribute/outside-use.ll
+++ b/test/Transforms/LoopDistribute/outside-use.ll
@@ -37,7 +37,7 @@ entry:
 ; CHECK: for.body:
 ; CHECK:   %sum_add = add nuw nsw i32 %sum, %loadC
 ; CHECK: for.end:
-; CHECK:   %sum_add.ldist = phi i32 [ %sum_add, %for.body ], [ %sum_add.ldist.nondist, %for.body.ldist.nondist ]
+; CHECK:   %sum_add.lver = phi i32 [ %sum_add, %for.body ], [ %sum_add.lver.orig, %for.body.lver.orig ]
 
 for.body:                                         ; preds = %for.body, %entry
   %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
index a8a2c8e..c633ae9 100644
--- a/test/Transforms/LoopIdiom/basic.ll
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -69,7 +69,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 ; CHECK-LABEL: @test2(
 ; CHECK: br i1 %cmp10,
-; CHECK: %0 = mul i64 %Size, 4
+; CHECK: %0 = shl i64 %Size, 2
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false)
 ; CHECK-NOT: store
 }
diff --git a/test/Transforms/LoopReroll/nonconst_lb.ll b/test/Transforms/LoopReroll/nonconst_lb.ll
index 5effa42..96090e8 100644
--- a/test/Transforms/LoopReroll/nonconst_lb.ll
+++ b/test/Transforms/LoopReroll/nonconst_lb.ll
@@ -52,7 +52,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK:   %0 = add i32 %n, -1
 ; CHECK:   %1 = sub i32 %0, %m
 ; CHECK:   %2 = lshr i32 %1, 2
-; CHECK:   %3 = mul i32 %2, 4
+; CHECK:   %3 = shl i32 %2, 2
 ; CHECK:   %4 = add i32 %m, %3
 ; CHECK:   %5 = add i32 %4, 3
 ; CHECK:   br label %for.body
@@ -132,7 +132,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK:   %0 = add i32 %n, -1
 ; CHECK:   %1 = sub i32 %0, %rem
 ; CHECK:   %2 = lshr i32 %1, 2
-; CHECK:   %3 = mul i32 %2, 4
+; CHECK:   %3 = shl i32 %2, 2
 ; CHECK:   %4 = add i32 %rem, %3
 ; CHECK:   %5 = add i32 %4, 3
 ; CHECK:   br label %for.body
diff --git a/test/Transforms/LoopSimplify/single-backedge.ll b/test/Transforms/LoopSimplify/single-backedge.ll
index aedd6f2..92fbdca 100644
--- a/test/Transforms/LoopSimplify/single-backedge.ll
+++ b/test/Transforms/LoopSimplify/single-backedge.ll
@@ -5,19 +5,35 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
 ; CHECK: Loop.backedge:
 ; CHECK-NOT: br
-; CHECK: br label %Loop
+; CHECK: br label %Loop, !dbg [[BACKEDGE_LOC:![0-9]+]]
+
+; CHECK: [[BACKEDGE_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
 
 define i32 @test(i1 %C) {
 ; <label>:0
-	br label %Loop
-Loop:		; preds = %BE2, %BE1, %0
-	%IV = phi i32 [ 1, %0 ], [ %IV2, %BE1 ], [ %IV2, %BE2 ]		; <i32> [#uses=2]
-	store i32 %IV, i32* null
-	%IV2 = add i32 %IV, 2		; <i32> [#uses=2]
-	br i1 %C, label %BE1, label %BE2
-BE1:		; preds = %Loop
-	br label %Loop
-BE2:		; preds = %Loop
-	br label %Loop
+  br label %Loop, !dbg !6
+Loop: ; preds = %BE2, %BE1, %0
+  %IV = phi i32 [ 1, %0 ], [ %IV2, %BE1 ], [ %IV2, %BE2 ] ; <i32> [#uses=2]
+  store i32 %IV, i32* null, !dbg !7
+  %IV2 = add i32 %IV, 2, !dbg !8 ; <i32> [#uses=2]
+  br i1 %C, label %BE1, label %BE2, !dbg !9
+BE1:  ; preds = %Loop
+  br label %Loop, !dbg !10
+BE2:    ; preds = %n br label %Loop
+  br label %Loop, !dbg !11
 }
 
+!llvm.module.flags = !{!0, !1}
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+
+!2 = !{}
+!3 = !DISubroutineType(types: !2)
+!4 = !DIFile(filename: "atomic.cpp", directory: "/tmp")
+!5 = !DISubprogram(name: "test", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!6 = !DILocation(line: 100, column: 1, scope: !5)
+!7 = !DILocation(line: 101, column: 1, scope: !5)
+!8 = !DILocation(line: 102, column: 1, scope: !5)
+!9 = !DILocation(line: 103, column: 1, scope: !5)
+!10 = !DILocation(line: 104, column: 1, scope: !5)
+!11 = !DILocation(line: 105, column: 1, scope: !5)
diff --git a/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll b/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll
index cc8226e..5923a42 100644
--- a/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll
+++ b/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll
@@ -12,8 +12,8 @@ target datalayout = "n8:16:32:64"
 ; CHECK-LABEL: @test(
 ; multiplies are hoisted out of the loop
 ; CHECK: while.body.lr.ph:
-; CHECK: mul i64
-; CHECK: mul i64
+; CHECK: shl i64
+; CHECK: shl i64
 ; GEPs are ugly
 ; CHECK: while.body:
 ; CHECK: phi
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
index 7925bf0..24be0dc 100644
--- a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
@@ -23,7 +23,7 @@
 ; X32: add
 ; X32: add
 ; X32: add
-; X32: add
+; X32: leal
 ; X32: %for.body.3
 define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 092b274..466566e 100644
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -6,7 +6,7 @@
 
 ; CHECK:   [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
 ; CHECK:   [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1
-; CHECK:   [[r3:%[a-z0-9]+]] = mul i64 [[r2]], 2
+; CHECK:   [[r3:%[a-z0-9]+]] = shl i64 [[r2]], 1
 ; CHECK:   br label %for.body
 ; CHECK: for.body:
 ; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r3]], %for.body.lr.ph ]
diff --git a/test/Transforms/LoopStrengthReduce/shl.ll b/test/Transforms/LoopStrengthReduce/shl.ll
new file mode 100644
index 0000000..bb9cb39
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/shl.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-reduce -gvn -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+
+define void @_Z3fooPfll(float* nocapture readonly %input, i64 %n, i64 %s) {
+; CHECK-LABEL: @_Z3fooPfll(
+entry:
+  %mul = shl nsw i64 %s, 2
+; CHECK: %mul = shl i64 %s, 2
+  tail call void @_Z3bazl(i64 %mul) #2
+; CHECK-NEXT: call void @_Z3bazl(i64 %mul)
+  %cmp.5 = icmp sgt i64 %n, 0
+  br i1 %cmp.5, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.06 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds float, float* %input, i64 %i.06
+; LoopStrengthReduce should reuse %mul as the stride.
+; CHECK: getelementptr i1, i1* {{[^,]+}}, i64 %mul
+  %0 = load float, float* %arrayidx, align 4
+  tail call void @_Z3barf(float %0) #2
+  %add = add nsw i64 %i.06, %s
+  %cmp = icmp slt i64 %add, %n
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+declare void @_Z3bazl(i64)
+
+declare void @_Z3barf(float)
diff --git a/test/Transforms/LoopUnroll/X86/mmx.ll b/test/Transforms/LoopUnroll/X86/mmx.ll
new file mode 100644
index 0000000..2c4aa08
--- /dev/null
+++ b/test/Transforms/LoopUnroll/X86/mmx.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -S -loop-unroll | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define x86_mmx @f() #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %phi = phi i32 [ 1, %entry ], [ %add, %for.body ]
+  %add = add i32 %phi, 1
+  %cmp = icmp eq i32 %phi, 0
+  br i1 %cmp, label %exit, label %for.body
+
+exit:                                             ; preds = %for.body
+  %ret = phi x86_mmx [ undef, %for.body ]
+  ; CHECK: %[[ret_unr:.*]] = phi x86_mmx [ undef,
+  ; CHECK: %[[ret_ph:.*]]  = phi x86_mmx [ undef,
+  ; CHECK: %[[ret:.*]] = phi x86_mmx [ %[[ret_unr]], {{.*}} ], [ %[[ret_ph]]
+  ; CHECK: ret x86_mmx %[[ret]]
+  ret x86_mmx %ret
+}
+
+attributes #0 = { "target-cpu"="x86-64" }
diff --git a/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll b/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll
new file mode 100644
index 0000000..d536da1
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll
@@ -0,0 +1,77 @@
+;RUN: opt  -loop-unswitch -simplifycfg -S < %s | FileCheck %s
+
+define i32 @foo(i32 %a, i32 %b) {
+;CHECK-LABEL: foo
+entry:
+  br label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %cmp0 = icmp sgt i32 %b, 0
+  br i1 %cmp0, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %for.inc, %for.body.lr.ph
+  %inc.i = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %mul.i = phi i32 [ 3, %for.body.lr.ph ], [ %mul.p, %for.inc ]
+  %add.i = phi i32 [ %a, %for.body.lr.ph ], [ %add.p, %for.inc ]
+  %cmp1 = icmp eq i32 %a, 12345
+  br i1 %cmp1, label %if.then, label %if.else, !prof !0
+; CHECK: %cmp1 = icmp eq i32 %a, 12345
+; CHECK-NEXT: br i1 %cmp1, label %if.then.us, label %if.else, !prof !0
+if.then:                                          ; preds = %for.body
+; CHECK: if.then.us:
+; CHECK: add nsw i32 %{{.*}}, 123
+; CHECK: %exitcond.us = icmp eq i32 %inc.us, %b
+; CHECK: br i1 %exitcond.us, label %for.cond.cleanup, label %if.then.us
+  %add = add nsw i32 %add.i, 123
+  br label %for.inc
+
+if.else:                                          ; preds = %for.body
+  %mul = mul nsw i32 %mul.i, %b
+  br label %for.inc
+; CHECK: if.else:
+; CHECK: %mul = mul nsw i32 %mul.i, %b
+; CHECK: %inc = add nuw nsw i32 %inc.i, 1
+; CHECK: %exitcond = icmp eq i32 %inc, %b
+; CHECK: br i1 %exitcond, label %for.cond.cleanup, label %if.else
+for.inc:                                          ; preds = %if.then, %if.else
+  %mul.p = phi i32 [ %b, %if.then ], [ %mul, %if.else ]
+  %add.p = phi i32 [ %add, %if.then ], [ %a, %if.else ]
+  %inc = add nuw nsw i32 %inc.i, 1
+  %exitcond = icmp eq i32 %inc, %b
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.inc, %for.body.lr.ph
+  %t2 = phi i32 [ %b, %for.body.lr.ph ], [ %mul.p, %for.inc ]
+  %t1 = phi i32 [ %a, %for.body.lr.ph ], [ %add.p, %for.inc ]
+  %add3 = add nsw i32 %t2, %t1
+  ret i32 %add3
+}
+
+define void @foo_swapped(i32 %a, i32 %b) {
+;CHECK-LABEL: foo_swapped
+entry:
+  br label %for.body
+;CHECK: entry:
+;CHECK-NEXT: %cmp1 = icmp eq i32 1, 2
+;CHECK-NEXT: br i1 %cmp1, label %for.body, label %for.cond.cleanup.split, !prof !1
+;CHECK: for.body:
+for.body:                                         ; preds = %for.inc, %entry
+  %inc.i = phi i32 [ 0, %entry ], [ %inc, %if.then ]
+  %add.i = phi i32 [ 100, %entry ], [ %add, %if.then ]
+  %inc = add nuw nsw i32 %inc.i, 1
+  %cmp1 = icmp eq i32 1, 2
+  br i1 %cmp1, label %if.then, label  %for.cond.cleanup, !prof !0
+
+if.then:                                          ; preds = %for.body
+  %add = add nsw i32 %a, %add.i
+
+  %exitcond = icmp eq i32 %inc, %b
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.inc, %for.body.lr.ph, %for.body
+  ret void
+}
+!0 = !{!"branch_weights", i32 64, i32 4}
+
+;CHECK: !0 = !{!"branch_weights", i32 64, i32 4}
+;CHECK: !1 = !{!"branch_weights", i32 4, i32 64}
diff --git a/test/Transforms/LoopVectorize/X86/ptr-indvar-crash.ll b/test/Transforms/LoopVectorize/X86/ptr-indvar-crash.ll
new file mode 100644
index 0000000..13ceaef
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/ptr-indvar-crash.ll
@@ -0,0 +1,20 @@
+; RUN: opt -loop-vectorize -S %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(i128 %p1) {
+entry:
+  br label %while.body
+
+while.body:
+  %p.05 = phi i8* [ %add.ptr, %while.body ], [ null, %entry ]
+  %p1.addr.04 = phi i128 [ %sub, %while.body ], [ %p1, %entry ]
+  %add.ptr = getelementptr inbounds i8, i8* %p.05, i32 2
+  %sub = add nsw i128 %p1.addr.04, -2
+  %tobool = icmp eq i128 %sub, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/optsize.ll b/test/Transforms/LoopVectorize/optsize.ll
new file mode 100644
index 0000000..e183fda
--- /dev/null
+++ b/test/Transforms/LoopVectorize/optsize.ll
@@ -0,0 +1,34 @@
+; This test verifies that the loop vectorizer will NOT produce a tail
+; loop with Optimize for size attibute.
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -Os -debug -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
+
+;CHECK-NOT: <2 x i8>
+;CHECK-NOT: <4 x i8>
+;CHECK: Aborting. A tail loop is required in Os.
+
+target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
+
+@tab = common global [32 x i8] zeroinitializer, align 1
+
+; Function Attrs: nounwind optsize
+define i32 @foo() #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+  %0 = load i8, i8* %arrayidx, align 1
+  %cmp1 = icmp eq i8 %0, 0
+  %. = select i1 %cmp1, i8 2, i8 1
+  store i8 %., i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %i.08, 202
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index b1c5d40..1f07d3f 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -9,30 +9,31 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;     a[i] = b[i] * 3;
 ; }
 
+;CHECK-LABEL: define i32 @foo
 ;CHECK: for.body.preheader:
-;CHECK: br i1 %cmp.zero, label %middle.block, label %vector.memcheck
+;CHECK: br i1 %cmp.zero, label %middle.block, label %vector.memcheck, !dbg [[BODY_LOC:![0-9]+]]
 ;CHECK: vector.memcheck:
-;CHECK: br i1 %memcheck.conflict, label %middle.block, label %vector.ph
+;CHECK: br i1 %memcheck.conflict, label %middle.block, label %vector.ph, !dbg [[BODY_LOC]]
 ;CHECK: load <4 x float>
 define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp {
 entry:
-  %cmp6 = icmp sgt i32 %n, 0
-  br i1 %cmp6, label %for.body, label %for.end
+  %cmp6 = icmp sgt i32 %n, 0, !dbg !6
+  br i1 %cmp6, label %for.body, label %for.end, !dbg !6
 
 for.body:                                         ; preds = %entry, %for.body
-  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %mul = fmul float %0, 3.000000e+00
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %mul, float* %arrayidx2, align 4
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %for.end, label %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ], !dbg !7
+  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv, !dbg !7
+  %0 = load float, float* %arrayidx, align 4, !dbg !7
+  %mul = fmul float %0, 3.000000e+00, !dbg !7
+  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv, !dbg !7
+  store float %mul, float* %arrayidx2, align 4, !dbg !7
+  %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !7
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !7
+  %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !7
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !7
 
 for.end:                                          ; preds = %for.body, %entry
-  ret i32 undef
+  ret i32 undef, !dbg !8
 }
 
 ; Make sure that we try to vectorize loops with a runtime check if the
@@ -62,3 +63,17 @@ for.body:
 loopexit:
   ret void
 }
+
+; CHECK: [[BODY_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
+
+!llvm.module.flags = !{!0, !1}
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+
+!2 = !{}
+!3 = !DISubroutineType(types: !2)
+!4 = !DIFile(filename: "test.cpp", directory: "/tmp")
+!5 = !DISubprogram(name: "foo", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!6 = !DILocation(line: 100, column: 1, scope: !5)
+!7 = !DILocation(line: 101, column: 1, scope: !5)
+!8 = !DILocation(line: 102, column: 1, scope: !5)
diff --git a/test/Transforms/LowerBitSets/nonglobal.ll b/test/Transforms/LowerBitSets/nonglobal.ll
new file mode 100644
index 0000000..7591e31
--- /dev/null
+++ b/test/Transforms/LowerBitSets/nonglobal.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+
+target datalayout = "e-p:32:32"
+
+; CHECK-NOT: @b = alias
+@a = constant i32 1
+@b = constant [2 x i32] [i32 2, i32 3]
+
+!0 = !{!"bitset1", i32* @a, i32 0}
+!1 = !{!"bitset1", i32* bitcast ([2 x i32]* @b to i32*), i32 0}
+
+!llvm.bitsets = !{ !0, !1 }
+
+declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+define i1 @foo(i8* %p) {
+  %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset1")
+  ret i1 %x
+}
diff --git a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
index d08c6f6..92fbd20 100644
--- a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
+++ b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
@@ -61,6 +61,40 @@ define void @reassociate_gep_nsw(float* %a, i32 %i, i32 %j) {
   ret void
 }
 
+; assume(j >= 0);
+; foo(&a[zext(j)]);
+; assume(i + j >= 0);
+; foo(&a[zext(i + j)]);
+;   =>
+; t1 = &a[zext(j)];
+; foo(t1);
+; t2 = t1 + sext(i);
+; foo(t2);
+define void @reassociate_gep_assume(float* %a, i32 %i, i32 %j) {
+; CHECK-LABEL: @reassociate_gep_assume(
+  ; assume(j >= 0)
+  %cmp = icmp sgt i32 %j, -1
+  call void @llvm.assume(i1 %cmp)
+  %1 = add i32 %i, %j
+  %cmp2 = icmp sgt i32 %1, -1
+  call void @llvm.assume(i1 %cmp2)
+
+  %idxprom.j = zext i32 %j to i64
+  %2 = getelementptr float, float* %a, i64 %idxprom.j
+; CHECK: [[t1:[^ ]+]] = getelementptr float, float* %a, i64 %idxprom.j
+  call void @foo(float* %2)
+; CHECK: call void @foo(float* [[t1]])
+
+  %idxprom.1 = zext i32 %1 to i64
+  %3 = getelementptr float, float* %a, i64 %idxprom.1
+; CHECK: [[sexti:[^ ]+]] = sext i32 %i to i64
+; CHECK: [[t2:[^ ]+]] = getelementptr float, float* [[t1]], i64 [[sexti]]
+  call void @foo(float* %3)
+; CHECK: call void @foo(float* [[t2]])
+
+  ret void
+}
+
 ; Do not split the second GEP because sext(i + j) != sext(i) + sext(j).
 define void @reassociate_gep_no_nsw(float* %a, i32 %i, i32 %j) {
 ; CHECK-LABEL: @reassociate_gep_no_nsw(
@@ -88,3 +122,5 @@ define void @reassociate_gep_128(float* %a, i128 %i, i128 %j) {
 ; CHECK: call void @foo(float* [[t2]])
   ret void
 }
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/PruneEH/pr23971.ll b/test/Transforms/PruneEH/pr23971.ll
new file mode 100644
index 0000000..8a8a591
--- /dev/null
+++ b/test/Transforms/PruneEH/pr23971.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -prune-eh < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f() #0 {
+entry:
+  call void asm sideeffect "ret\0A\09", "~{dirflag},~{fpsr},~{flags}"()
+  unreachable
+}
+
+define i32 @g() {
+entry:
+  call void @f()
+  ret i32 42
+}
+
+; CHECK-LABEL: define i32 @g()
+; CHECK: ret i32 42
+
+attributes #0 = { naked noinline }
diff --git a/test/Transforms/Reassociate/basictest.ll b/test/Transforms/Reassociate/basictest.ll
index caaf772..c557017 100644
--- a/test/Transforms/Reassociate/basictest.ll
+++ b/test/Transforms/Reassociate/basictest.ll
@@ -169,7 +169,11 @@ define i32 @test11(i32 %W) {
 ; CHECK-NEXT: ret i32
 }
 
+declare void @mumble(i32)
+
 define i32 @test12(i32 %X) {
+  %X.neg = sub nsw nuw i32 0, %X
+  call void @mumble(i32 %X.neg)
   %A = sub i32 1, %X
   %B = sub i32 2, %X
   %C = sub i32 3, %X
@@ -177,8 +181,8 @@ define i32 @test12(i32 %X) {
   %Z = add i32 %Y, %C
   ret i32 %Z
 ; CHECK-LABEL: @test12
-; CHECK-NEXT: mul i32 %X, -3
-; CHECK-NEXT: add i32{{.*}}, 6
+; CHECK: %[[mul:.*]] = mul i32 %X, -3
+; CHECK-NEXT: add i32 %[[mul]], 6
 ; CHECK-NEXT: ret i32
 }
 
diff --git a/test/Transforms/Reassociate/wrap-flags.ll b/test/Transforms/Reassociate/wrap-flags.ll
index e3304b6..f56719d 100644
--- a/test/Transforms/Reassociate/wrap-flags.ll
+++ b/test/Transforms/Reassociate/wrap-flags.ll
@@ -32,3 +32,14 @@ entry:
   %mul2 = add i32 %mul, 1
   ret i32 %mul2
 }
+
+; CHECK-LABEL: @pr23926(
+; CHECK:       %[[X1_neg:.*]] = sub i2 0, %X1
+; CHECK-NEXT:  %[[sub_one:.*]] = add i2 %[[X1_neg]], -1
+; CHECK-NEXT:  %[[add:.*]] = add i2 %[[sub_one]], %X2
+; CHECK-NEXT:  ret i2 %[[add]]
+define i2 @pr23926(i2 %X1, i2 %X2) {
+  %add = add nuw i2 %X1, 1
+  %sub = sub nuw nsw i2 %X2, %add
+  ret i2 %sub
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/live-vector.ll b/test/Transforms/RewriteStatepointsForGC/live-vector.ll
index 0a4456a..26ad737 100644
--- a/test/Transforms/RewriteStatepointsForGC/live-vector.ll
+++ b/test/Transforms/RewriteStatepointsForGC/live-vector.ll
@@ -105,8 +105,6 @@ define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p)
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: gc.relocate
 ; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
 ; CHECK-NEXT: insertelement
 ; CHECK-NEXT: insertelement
 ; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
@@ -116,6 +114,48 @@ entry:
   ret <2 x i64 addrspace(1)*> %vec
 }
 
+
+; A base vector from a load
+define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) 
+    gc "statepoint-example" {
+; CHECK-LABEL: test6
+; CHECK-LABEL: merge:
+; CHECK-NEXT: = phi
+; CHECK-NEXT: = phi
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
+entry:
+  br i1 %cnd, label %taken, label %untaken
+taken:
+  %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+  br label %merge
+untaken:
+  %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+  br label %merge
+
+merge:
+  %obj = phi <2 x i64 addrspace(1)*> [%obja, %taken], [%objb, %untaken]
+  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  ret <2 x i64 addrspace(1)*> %obj
+}
+
+
 declare void @do_safepoint()
 
 declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/SCCP/crash.ll b/test/Transforms/SCCP/crash.ll
index 8852890..3ec1fd2 100644
--- a/test/Transforms/SCCP/crash.ll
+++ b/test/Transforms/SCCP/crash.ll
@@ -27,3 +27,8 @@ define i32 @test2([4 x i32] %A) {
   %B = extractvalue [4 x i32] %A, 1
   ret i32 %B
 }
+
+define x86_mmx @test3() {
+  %load = load x86_mmx, x86_mmx* null
+  ret x86_mmx %load
+}
diff --git a/test/Transforms/SafeStack/no-attr.ll b/test/Transforms/SafeStack/no-attr.ll
index ca3c21a..d9bcefd 100644
--- a/test/Transforms/SafeStack/no-attr.ll
+++ b/test/Transforms/SafeStack/no-attr.ll
@@ -6,6 +6,8 @@
 ; no safestack attribute
 ; Requires no protector.
 
+; CHECK-NOT: __safestack_unsafe_stack_ptr
+
 ; CHECK: @foo
 define void @foo(i8* %a) nounwind uwtable {
 entry:
diff --git a/test/Transforms/StraightLineStrengthReduce/AMDGPU/pr23975.ll b/test/Transforms/StraightLineStrengthReduce/AMDGPU/pr23975.ll
new file mode 100644
index 0000000..f587a93
--- /dev/null
+++ b/test/Transforms/StraightLineStrengthReduce/AMDGPU/pr23975.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -slsr -S | FileCheck %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+target triple = "amdgcn--"
+
+%struct.Matrix4x4 = type { [4 x [4 x float]] }
+
+; Function Attrs: nounwind
+define fastcc void @Accelerator_Intersect(%struct.Matrix4x4 addrspace(1)* nocapture readonly %leafTransformations) #0 {
+; CHECK-LABEL:  @Accelerator_Intersect(
+entry:
+  %tmp = sext i32 undef to i64
+  %arrayidx114 = getelementptr inbounds %struct.Matrix4x4, %struct.Matrix4x4 addrspace(1)* %leafTransformations, i64 %tmp
+  %tmp1 = getelementptr %struct.Matrix4x4, %struct.Matrix4x4 addrspace(1)* %leafTransformations, i64 %tmp, i32 0, i64 0, i64 0
+; CHECK: %tmp1 = getelementptr %struct.Matrix4x4, %struct.Matrix4x4 addrspace(1)* %leafTransformations, i64 %tmp, i32 0, i64 0, i64 0
+  %tmp2 = load <4 x float>, <4 x float> addrspace(1)* undef, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "target-cpu"="tahiti" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/tools/gold/slp-vectorize.ll b/test/tools/gold/slp-vectorize.ll
index a75f0b3..30950b2 100644
--- a/test/tools/gold/slp-vectorize.ll
+++ b/test/tools/gold/slp-vectorize.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as %s -o %t.o
 
-; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
 ; RUN:    --plugin-opt=save-temps \
 ; RUN:    -shared %t.o -o %t2.o
 ; RUN: llvm-dis %t2.o.opt.bc -o - | FileCheck %s
@@ -8,7 +8,7 @@
 ; test that the vectorizer is run.
 ; CHECK: fadd <4 x float>
 
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 define void @f(float* nocapture %x) {
   %tmp = load float, float* %x, align 4
diff --git a/test/tools/llvm-cxxdump/X86/lit.local.cfg b/test/tools/llvm-cxxdump/X86/lit.local.cfg
new file mode 100644
index 0000000..c8625f4
--- /dev/null
+++ b/test/tools/llvm-cxxdump/X86/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
diff --git a/test/tools/llvm-cxxdump/X86/sym-size.s b/test/tools/llvm-cxxdump/X86/sym-size.s
new file mode 100644
index 0000000..c252752
--- /dev/null
+++ b/test/tools/llvm-cxxdump/X86/sym-size.s
@@ -0,0 +1,47 @@
+// RUN: llvm-mc %s -o %t -filetype=obj -triple=x86_64-pc-win32
+// RUN: llvm-cxxdump %t | FileCheck %s
+
+// CHECK:      ??_8B@@7B@[0]: 8
+// CHECK-NEXT: ??_8B@@7B@[4]: 9
+// CHECK-NEXT: ??_8C@@7B@[0]: 10
+// CHECK-NEXT: ??_8C@@7B@[4]: 11
+// CHECK-NEXT: ??_8D@@7B0@@[0]: 0
+// CHECK-NEXT: ??_8D@@7B0@@[4]: 1
+// CHECK-NEXT: ??_8D@@7B0@@[8]: 2
+// CHECK-NEXT: ??_8D@@7B0@@[12]: 3
+// CHECK-NEXT: ??_8D@@7BB@@@[0]: 4
+// CHECK-NEXT: ??_8D@@7BB@@@[4]: 5
+// CHECK-NEXT: ??_8D@@7BC@@@[0]: 6
+// CHECK-NEXT: ??_8D@@7BC@@@[4]: 7
+// CHECK-NEXT: ??_8XYZ[0]: 10
+// CHECK-NEXT: ??_8XYZ[4]: 11
+
+	.section	.rdata,"dr"
+	.globl	"??_8D@@7B0@@"
+"??_8D@@7B0@@":
+	.long	0
+	.long	1
+	.long	2
+	.long	3
+
+	.globl	"??_8D@@7BB@@@"
+"??_8D@@7BB@@@":
+	.long	4
+	.long	5
+
+	.globl	"??_8D@@7BC@@@"
+"??_8D@@7BC@@@":
+	.long	6
+	.long	7
+
+	.globl	"??_8B@@7B@"
+"??_8B@@7B@":
+	.long	8
+	.long	9
+
+	.globl	"??_8C@@7B@"
+"??_8C@@7B@":
+	.long	10
+	.long	11
+
+"??_8XYZ" = "??_8C@@7B@"
diff --git a/test/tools/llvm-objdump/X86/macho-symbol-table.test b/test/tools/llvm-objdump/X86/macho-symbol-table.test
index 3fe5aea..826d78a 100644
--- a/test/tools/llvm-objdump/X86/macho-symbol-table.test
+++ b/test/tools/llvm-objdump/X86/macho-symbol-table.test
@@ -1,8 +1,8 @@
 RUN: llvm-objdump -macho -t %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s
 
 CHECK: SYMBOL TABLE:
-CHECK: 000000000000003b l     F __TEXT,__cstring	00000000 L_.str
-CHECK: 0000000000000068 l     F __TEXT,__eh_frame	00000000 EH_frame0
-CHECK: 0000000000000000 g     F __TEXT,__text	00000000 _main
-CHECK: 0000000000000080 g     F __TEXT,__eh_frame	00000000 _main.eh
-CHECK: 0000000000000000         *UND*	00000000 _printf
+CHECK: 000000000000003b l     F __TEXT,__cstring	L_.str
+CHECK: 0000000000000068 l     F __TEXT,__eh_frame	EH_frame0
+CHECK: 0000000000000000 g     F __TEXT,__text	        _main
+CHECK: 0000000000000080 g     F __TEXT,__eh_frame	_main.eh
+CHECK: 0000000000000000         *UND*	                _printf
diff --git a/test/tools/llvm-symbolizer/Inputs/fat.c b/test/tools/llvm-symbolizer/Inputs/fat.c
new file mode 100644
index 0000000..0331c09
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/fat.c
@@ -0,0 +1,15 @@
+/* Compile with:
+   clang -arch armv7 -arch armv7m -arch armv7em -arch x86_64 -arch x86_64h -c
+*/
+
+#ifdef __x86_64h__
+void x86_64h_function() {}
+#elif defined(__x86_64__)
+void x86_64_function() {}
+#elif defined(__ARM_ARCH_7EM__)
+void armv7em_function() {}
+#elif defined(__ARM_ARCH_7M__)
+void armv7m_function() {}
+#elif defined(__ARM_ARCH_7A__)
+void armv7_function() {}
+#endif
diff --git a/test/tools/llvm-symbolizer/Inputs/fat.o b/test/tools/llvm-symbolizer/Inputs/fat.o
new file mode 100644
index 0000000..947cfc2
Binary files /dev/null and b/test/tools/llvm-symbolizer/Inputs/fat.o differ
diff --git a/test/tools/llvm-symbolizer/fat.test b/test/tools/llvm-symbolizer/fat.test
new file mode 100644
index 0000000..1ecd1ab
--- /dev/null
+++ b/test/tools/llvm-symbolizer/fat.test
@@ -0,0 +1,11 @@
+RUN: echo 0 | llvm-symbolizer -obj=%p/Inputs/fat.o -default-arch=x86_64 | FileCheck --check-prefix=X86_64 %s
+RUN: echo 0 | llvm-symbolizer -obj=%p/Inputs/fat.o -default-arch=x86_64h | FileCheck --check-prefix=X86_64H %s
+RUN: echo 0 | llvm-symbolizer -obj=%p/Inputs/fat.o -default-arch=armv7 | FileCheck --check-prefix=ARMV7 %s
+RUN: echo 0 | llvm-symbolizer -obj=%p/Inputs/fat.o -default-arch=armv7em | FileCheck --check-prefix=ARMV7EM %s
+RUN: echo 0 | llvm-symbolizer -obj=%p/Inputs/fat.o -default-arch=armv7m | FileCheck --check-prefix=ARMV7M %s
+
+X86_64: x86_64_function
+X86_64H: x86_64h_function
+ARMV7: armv7_function
+ARMV7EM: armv7em_function
+ARMV7M: armv7m_function
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 7859b49..c9c5a1f 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -75,14 +75,11 @@ add_llvm_tool_subdirectory(gold)
 
 add_llvm_external_project(clang)
 add_llvm_external_project(llgo)
+add_llvm_external_project(lld)
+add_llvm_external_project(lldb)
 
-if( NOT LLVM_INCLUDE_TOOLS STREQUAL "bootstrap-only" )
-  add_llvm_external_project(lld)
-  add_llvm_external_project(lldb)
-
-  # Automatically add remaining sub-directories containing a 'CMakeLists.txt'
-  # file as external projects.
-  add_llvm_implicit_external_projects()
-endif()
+# Automatically add remaining sub-directories containing a 'CMakeLists.txt'
+# file as external projects.
+add_llvm_implicit_external_projects()
 
 set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE)
diff --git a/tools/dsymutil/DebugMap.cpp b/tools/dsymutil/DebugMap.cpp
index 1a81848..cc7c0dc 100644
--- a/tools/dsymutil/DebugMap.cpp
+++ b/tools/dsymutil/DebugMap.cpp
@@ -216,11 +216,13 @@ MappingTraits<dsymutil::DebugMapObject>::YamlDMO::denormalize(IO &IO) {
     // during the test, we can't hardcode the symbols addresses, so
     // look them up here and rewrite them.
     for (const auto &Sym : ErrOrObjectFile->symbols()) {
-      StringRef Name;
       uint64_t Address;
-      if (Sym.getName(Name) || Sym.getAddress(Address))
+      if (Sym.getAddress(Address))
         continue;
-      SymbolAddresses[Name] = Address;
+      ErrorOr<StringRef> Name = Sym.getName();
+      if (!Name)
+        continue;
+      SymbolAddresses[*Name] = Address;
     }
   }
 
diff --git a/tools/dsymutil/DwarfLinker.cpp b/tools/dsymutil/DwarfLinker.cpp
index 052c1da..6e9087c 100644
--- a/tools/dsymutil/DwarfLinker.cpp
+++ b/tools/dsymutil/DwarfLinker.cpp
@@ -60,33 +60,23 @@ using HalfOpenIntervalMap =
 
 typedef HalfOpenIntervalMap<uint64_t, int64_t> FunctionIntervals;
 
-// FIXME: Delete this structure once DIE::Values has a stable iterator we can
-// use instead.
+// FIXME: Delete this structure.
 struct PatchLocation {
-  DIE *Die;
-  unsigned Index;
+  DIE::value_iterator I;
 
-  PatchLocation() : Die(nullptr), Index(0) {}
-  PatchLocation(DIE &Die, unsigned Index) : Die(&Die), Index(Index) {}
-  PatchLocation(DIE &Die)
-      : Die(&Die), Index(std::distance(Die.values_begin(), Die.values_end())) {}
+  PatchLocation() = default;
+  PatchLocation(DIE::value_iterator I) : I(I) {}
 
   void set(uint64_t New) const {
-    assert(Die);
-    assert((signed)Index <
-           std::distance(Die->values_begin(), Die->values_end()));
-    const auto &Old = Die->values_begin()[Index];
+    assert(I);
+    const auto &Old = *I;
     assert(Old.getType() == DIEValue::isInteger);
-    Die->setValue(Index,
-                  DIEValue(Old.getAttribute(), Old.getForm(), DIEInteger(New)));
+    *I = DIEValue(Old.getAttribute(), Old.getForm(), DIEInteger(New));
   }
 
   uint64_t get() const {
-    assert(Die);
-    assert((signed)Index <
-           std::distance(Die->values_begin(), Die->values_end()));
-    assert(Die->values_begin()[Index].getType() == DIEValue::isInteger);
-    return Die->values_begin()[Index].getDIEInteger().getValue();
+    assert(I);
+    return I->getDIEInteger().getValue();
   }
 };
 
@@ -123,8 +113,8 @@ public:
 
   unsigned getUniqueID() const { return ID; }
 
-  DIE *getOutputUnitDIE() const { return CUDie.get(); }
-  void setOutputUnitDIE(DIE *Die) { CUDie.reset(Die); }
+  DIE *getOutputUnitDIE() const { return CUDie; }
+  void setOutputUnitDIE(DIE *Die) { CUDie = Die; }
 
   DIEInfo &getInfo(unsigned Idx) { return Info[Idx]; }
   const DIEInfo &getInfo(unsigned Idx) const { return Info[Idx]; }
@@ -204,7 +194,7 @@ private:
   DWARFUnit &OrigUnit;
   unsigned ID;
   std::vector<DIEInfo> Info;  ///< DIE info indexed by DIE index.
-  std::unique_ptr<DIE> CUDie; ///< Root of the linked DIE tree.
+  DIE *CUDie;                 ///< Root of the linked DIE tree.
 
   uint64_t StartOffset;
   uint64_t NextUnitOffset;
@@ -1437,10 +1427,10 @@ void DwarfLinker::endDebugObject() {
   ValidRelocs.clear();
   Ranges.clear();
 
-  for (auto *Block : DIEBlocks)
-    Block->~DIEBlock();
-  for (auto *Loc : DIELocs)
-    Loc->~DIELoc();
+  for (auto I = DIEBlocks.begin(), E = DIEBlocks.end(); I != E; ++I)
+    (*I)->~DIEBlock();
+  for (auto I = DIELocs.begin(), E = DIELocs.end(); I != E; ++I)
+    (*I)->~DIELoc();
 
   DIEBlocks.clear();
   DIELocs.clear();
@@ -1461,8 +1451,8 @@ void DwarfLinker::findValidRelocsMachO(const object::SectionRef &Section,
     object::DataRefImpl RelocDataRef = Reloc.getRawDataRefImpl();
     MachO::any_relocation_info MachOReloc = Obj.getRelocation(RelocDataRef);
     unsigned RelocSize = 1 << Obj.getAnyRelocationLength(MachOReloc);
-    uint64_t Offset64;
-    if ((RelocSize != 4 && RelocSize != 8) || Reloc.getOffset(Offset64)) {
+    uint64_t Offset64 = Reloc.getOffset();
+    if ((RelocSize != 4 && RelocSize != 8)) {
       reportWarning(" unsupported relocation in debug_info section.");
       continue;
     }
@@ -1472,12 +1462,12 @@ void DwarfLinker::findValidRelocsMachO(const object::SectionRef &Section,
 
     auto Sym = Reloc.getSymbol();
     if (Sym != Obj.symbol_end()) {
-      StringRef SymbolName;
-      if (Sym->getName(SymbolName)) {
+      ErrorOr<StringRef> SymbolName = Sym->getName();
+      if (!SymbolName) {
         reportWarning("error getting relocation symbol name.");
         continue;
       }
-      if (const auto *Mapping = DMO.lookupSymbol(SymbolName))
+      if (const auto *Mapping = DMO.lookupSymbol(*SymbolName))
         ValidRelocs.emplace_back(Offset64, RelocSize, Addend, Mapping);
     } else if (const auto *Mapping = DMO.lookupObjectAddress(Addend)) {
       // Do not store the addend. The addend was the address of the
@@ -1837,7 +1827,7 @@ unsigned DwarfLinker::cloneStringAttribute(DIE &Die, AttributeSpec AttrSpec,
   // Switch everything to out of line strings.
   const char *String = *Val.getAsCString(&U);
   unsigned Offset = StringPool.getStringOffset(String);
-  Die.addValue(dwarf::Attribute(AttrSpec.Attr), dwarf::DW_FORM_strp,
+  Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr), dwarf::DW_FORM_strp,
                DIEInteger(Offset));
   return 4;
 }
@@ -1871,7 +1861,7 @@ unsigned DwarfLinker::cloneDieReferenceAttribute(
     assert(Ref > InputDIE.getOffset());
     // We haven't cloned this DIE yet. Just create an empty one and
     // store it. It'll get really cloned when we process it.
-    RefInfo.Clone = new DIE(dwarf::Tag(RefDie->getTag()));
+    RefInfo.Clone = DIE::get(DIEAlloc, dwarf::Tag(RefDie->getTag()));
   }
   NewRefDie = RefInfo.Clone;
 
@@ -1887,18 +1877,21 @@ unsigned DwarfLinker::cloneDieReferenceAttribute(
       uint32_t NewRefOffset =
           RefUnit->getStartOffset() + NewRefDie->getOffset();
       Attr = NewRefOffset;
+      Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
+                   dwarf::DW_FORM_ref_addr, DIEInteger(Attr));
     } else {
       // A forward reference. Note and fixup later.
       Attr = 0xBADDEF;
-      Unit.noteForwardReference(NewRefDie, RefUnit, PatchLocation(Die));
+      Unit.noteForwardReference(
+          NewRefDie, RefUnit,
+          Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
+                       dwarf::DW_FORM_ref_addr, DIEInteger(Attr)));
     }
-    Die.addValue(dwarf::Attribute(AttrSpec.Attr), dwarf::DW_FORM_ref_addr,
-                 DIEInteger(Attr));
     return AttrSize;
   }
 
-  Die.addValue(dwarf::Attribute(AttrSpec.Attr), dwarf::Form(AttrSpec.Form),
-               DIEEntry(*NewRefDie));
+  Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
+               dwarf::Form(AttrSpec.Form), DIEEntry(*NewRefDie));
   return AttrSize;
 }
 
@@ -1930,8 +1923,8 @@ unsigned DwarfLinker::cloneBlockAttribute(DIE &Die, AttributeSpec AttrSpec,
                      dwarf::Form(AttrSpec.Form), Block);
   ArrayRef<uint8_t> Bytes = *Val.getAsBlock();
   for (auto Byte : Bytes)
-    Attr->addValue(static_cast<dwarf::Attribute>(0), dwarf::DW_FORM_data1,
-                   DIEInteger(Byte));
+    Attr->addValue(DIEAlloc, static_cast<dwarf::Attribute>(0),
+                   dwarf::DW_FORM_data1, DIEInteger(Byte));
   // FIXME: If DIEBlock and DIELoc just reuses the Size field of
   // the DIE class, this if could be replaced by
   // Attr->setSize(Bytes.size()).
@@ -1941,7 +1934,7 @@ unsigned DwarfLinker::cloneBlockAttribute(DIE &Die, AttributeSpec AttrSpec,
     else
       Block->ComputeSize(&Streamer->getAsmPrinter());
   }
-  Die.addValue(Value);
+  Die.addValue(DIEAlloc, Value);
   return AttrSize;
 }
 
@@ -1975,7 +1968,7 @@ unsigned DwarfLinker::cloneAddressAttribute(DIE &Die, AttributeSpec AttrSpec,
       Addr = (Info.OrigHighPc ? Info.OrigHighPc : Addr) + Info.PCOffset;
   }
 
-  Die.addValue(static_cast<dwarf::Attribute>(AttrSpec.Attr),
+  Die.addValue(DIEAlloc, static_cast<dwarf::Attribute>(AttrSpec.Attr),
                static_cast<dwarf::Form>(AttrSpec.Form), DIEInteger(Addr));
   return Unit.getOrigUnit().getAddressByteSize();
 }
@@ -2004,20 +1997,20 @@ unsigned DwarfLinker::cloneScalarAttribute(
                   &Unit.getOrigUnit(), &InputDIE);
     return 0;
   }
-  DIEInteger Attr(Value);
+  PatchLocation Patch =
+      Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
+                   dwarf::Form(AttrSpec.Form), DIEInteger(Value));
   if (AttrSpec.Attr == dwarf::DW_AT_ranges)
-    Unit.noteRangeAttribute(Die, PatchLocation(Die));
+    Unit.noteRangeAttribute(Die, Patch);
   // A more generic way to check for location attributes would be
   // nice, but it's very unlikely that any other attribute needs a
   // location list.
   else if (AttrSpec.Attr == dwarf::DW_AT_location ||
            AttrSpec.Attr == dwarf::DW_AT_frame_base)
-    Unit.noteLocationAttribute(PatchLocation(Die), Info.PCOffset);
+    Unit.noteLocationAttribute(Patch, Info.PCOffset);
   else if (AttrSpec.Attr == dwarf::DW_AT_declaration && Value)
     Info.IsDeclaration = true;
 
-  Die.addValue(dwarf::Attribute(AttrSpec.Attr), dwarf::Form(AttrSpec.Form),
-               Attr);
   return AttrSize;
 }
 
@@ -2170,7 +2163,7 @@ DIE *DwarfLinker::cloneDIE(const DWARFDebugInfoEntryMinimal &InputDIE,
   // (see cloneDieReferenceAttribute()).
   DIE *Die = Info.Clone;
   if (!Die)
-    Die = Info.Clone = new DIE(dwarf::Tag(InputDIE.getTag()));
+    Die = Info.Clone = DIE::get(DIEAlloc, dwarf::Tag(InputDIE.getTag()));
   assert(Die->getTag() == InputDIE.getTag());
   Die->setOffset(OutOffset);
 
@@ -2262,7 +2255,7 @@ DIE *DwarfLinker::cloneDIE(const DWARFDebugInfoEntryMinimal &InputDIE,
   for (auto *Child = InputDIE.getFirstChild(); Child && !Child->isNULL();
        Child = Child->getSibling()) {
     if (DIE *Clone = cloneDIE(*Child, Unit, PCOffset, OutOffset)) {
-      Die->addChild(std::unique_ptr<DIE>(Clone));
+      Die->addChild(Clone);
       OutOffset = Clone->getOffset() + Clone->getSize();
     }
   }
@@ -2364,6 +2357,16 @@ static void insertLineSequence(std::vector<DWARFDebugLine::Row> &Seq,
   Seq.clear();
 }
 
+static void patchStmtList(DIE &Die, DIEInteger Offset) {
+  for (auto &V : Die.values())
+    if (V.getAttribute() == dwarf::DW_AT_stmt_list) {
+      V = DIEValue(V.getAttribute(), V.getForm(), Offset);
+      return;
+    }
+
+  llvm_unreachable("Didn't find DW_AT_stmt_list in cloned DIE!");
+}
+
 /// \brief Extract the line table for \p Unit from \p OrigDwarf, and
 /// recreate a relocated version of these for the address ranges that
 /// are present in the binary.
@@ -2376,18 +2379,8 @@ void DwarfLinker::patchLineTableForUnit(CompileUnit &Unit,
     return;
 
   // Update the cloned DW_AT_stmt_list with the correct debug_line offset.
-  if (auto *OutputDIE = Unit.getOutputUnitDIE()) {
-    auto Stmt =
-        std::find_if(OutputDIE->values_begin(), OutputDIE->values_end(),
-                     [](const DIEValue &Value) {
-                       return Value.getAttribute() == dwarf::DW_AT_stmt_list;
-                     });
-    assert(Stmt != OutputDIE->values_end() &&
-           "Didn't find DW_AT_stmt_list in cloned DIE!");
-    OutputDIE->setValue(Stmt - OutputDIE->values_begin(),
-                        DIEValue(Stmt->getAttribute(), Stmt->getForm(),
-                                 DIEInteger(Streamer->getLineSectionSize())));
-  }
+  if (auto *OutputDIE = Unit.getOutputUnitDIE())
+    patchStmtList(*OutputDIE, DIEInteger(Streamer->getLineSectionSize()));
 
   // Parse the original line info for the unit.
   DWARFDebugLine::LineTable LineTable;
diff --git a/tools/dsymutil/MachODebugMapParser.cpp b/tools/dsymutil/MachODebugMapParser.cpp
index b803e41..c58545a 100644
--- a/tools/dsymutil/MachODebugMapParser.cpp
+++ b/tools/dsymutil/MachODebugMapParser.cpp
@@ -160,7 +160,7 @@ void MachODebugMapParser::handleStabSymbolTableEntry(uint32_t StringIndex,
     // symbol table to find its address as it might not be in the
     // debug map (for common symbols).
     Value = getMainBinarySymbolAddress(Name);
-    if (Value == UnknownAddressOrSize)
+    if (Value == UnknownAddress)
       return;
     break;
   case MachO::N_FUN:
@@ -197,12 +197,14 @@ void MachODebugMapParser::loadCurrentObjectFileSymbols() {
   CurrentObjectAddresses.clear();
 
   for (auto Sym : CurrentObjectHolder.Get().symbols()) {
-    StringRef Name;
+
     uint64_t Addr;
-    if (Sym.getAddress(Addr) || Addr == UnknownAddressOrSize ||
-        Sym.getName(Name))
+    if (Sym.getAddress(Addr) || Addr == UnknownAddress)
+      continue;
+    ErrorOr<StringRef> Name = Sym.getName();
+    if (!Name)
       continue;
-    CurrentObjectAddresses[Name] = Addr;
+    CurrentObjectAddresses[*Name] = Addr;
   }
 }
 
@@ -212,7 +214,7 @@ void MachODebugMapParser::loadCurrentObjectFileSymbols() {
 uint64_t MachODebugMapParser::getMainBinarySymbolAddress(StringRef Name) {
   auto Sym = MainBinarySymbolAddresses.find(Name);
   if (Sym == MainBinarySymbolAddresses.end())
-    return UnknownAddressOrSize;
+    return UnknownAddress;
   return Sym->second;
 }
 
@@ -222,21 +224,24 @@ void MachODebugMapParser::loadMainBinarySymbols() {
   const MachOObjectFile &MainBinary = MainBinaryHolder.GetAs<MachOObjectFile>();
   section_iterator Section = MainBinary.section_end();
   for (const auto &Sym : MainBinary.symbols()) {
-    SymbolRef::Type Type;
+    SymbolRef::Type Type = Sym.getType();
     // Skip undefined and STAB entries.
-    if (Sym.getType(Type) || (Type & SymbolRef::ST_Debug) ||
-        (Type & SymbolRef::ST_Unknown))
+    if ((Type & SymbolRef::ST_Debug) || (Type & SymbolRef::ST_Unknown))
       continue;
-    StringRef Name;
     uint64_t Addr;
     // The only symbols of interest are the global variables. These
     // are the only ones that need to be queried because the address
     // of common data won't be described in the debug map. All other
     // addresses should be fetched for the debug map.
-    if (Sym.getAddress(Addr) || Addr == UnknownAddressOrSize ||
+    if (Sym.getAddress(Addr) || Addr == UnknownAddress ||
         !(Sym.getFlags() & SymbolRef::SF_Global) || Sym.getSection(Section) ||
-        Section->isText() || Sym.getName(Name) || Name.size() == 0 ||
-        Name[0] == '\0')
+        Section->isText())
+      continue;
+    ErrorOr<StringRef> NameOrErr = Sym.getName();
+    if (!NameOrErr)
+      continue;
+    StringRef Name = *NameOrErr;
+    if (Name.size() == 0 || Name[0] == '\0')
       continue;
     MainBinarySymbolAddresses[Name] = Addr;
   }
diff --git a/tools/lli/OrcLazyJIT.h b/tools/lli/OrcLazyJIT.h
index 9257225..fe86adb 100644
--- a/tools/lli/OrcLazyJIT.h
+++ b/tools/lli/OrcLazyJIT.h
@@ -50,7 +50,6 @@ public:
   OrcLazyJIT(std::unique_ptr<TargetMachine> TM, LLVMContext &Context,
              CallbackManagerBuilder &BuildCallbackMgr)
     : TM(std::move(TM)),
-      Mang(this->TM->getDataLayout()),
       ObjectLayer(),
       CompileLayer(ObjectLayer, orc::SimpleCompiler(*this->TM)),
       IRDumpLayer(CompileLayer, createDebugDumper()),
@@ -137,7 +136,7 @@ private:
     std::string MangledName;
     {
       raw_string_ostream MangledNameStream(MangledName);
-      Mang.getNameWithPrefix(MangledNameStream, Name);
+      Mangler::getNameWithPrefix(MangledNameStream, Name, *TM->getDataLayout());
     }
     return MangledName;
   }
@@ -145,7 +144,6 @@ private:
   static TransformFtor createDebugDumper();
 
   std::unique_ptr<TargetMachine> TM;
-  Mangler Mang;
   SectionMemoryManager CCMgrMemMgr;
 
   ObjLayerT ObjectLayer;
diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index 6782b9c..0fd2df4 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -724,7 +724,7 @@ int main(int argc, char **argv) {
   StringRef Stem = sys::path::stem(ToolName);
   if (Stem.find("ranlib") == StringRef::npos &&
       Stem.find("lib") != StringRef::npos)
-    return libDriverMain(argc, const_cast<const char **>(argv));
+    return libDriverMain(makeArrayRef(argv, argc));
 
   // Have the command line options parsed and handle things
   // like --help and --version.
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 58b02be..7672951 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -146,26 +146,30 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
 
   if (CurStreamType != LLVMIRBitstream) return nullptr;
 
+#define STRINGIFY_CODE(PREFIX, CODE)                                           \
+  case bitc::PREFIX##_##CODE:                                                  \
+    return #CODE;
   switch (BlockID) {
   default: return nullptr;
   case bitc::MODULE_BLOCK_ID:
     switch (CodeID) {
     default: return nullptr;
-    case bitc::MODULE_CODE_VERSION:     return "VERSION";
-    case bitc::MODULE_CODE_TRIPLE:      return "TRIPLE";
-    case bitc::MODULE_CODE_DATALAYOUT:  return "DATALAYOUT";
-    case bitc::MODULE_CODE_ASM:         return "ASM";
-    case bitc::MODULE_CODE_SECTIONNAME: return "SECTIONNAME";
-    case bitc::MODULE_CODE_DEPLIB:      return "DEPLIB"; // FIXME: Remove in 4.0
-    case bitc::MODULE_CODE_GLOBALVAR:   return "GLOBALVAR";
-    case bitc::MODULE_CODE_FUNCTION:    return "FUNCTION";
-    case bitc::MODULE_CODE_ALIAS:       return "ALIAS";
-    case bitc::MODULE_CODE_PURGEVALS:   return "PURGEVALS";
-    case bitc::MODULE_CODE_GCNAME:      return "GCNAME";
+      STRINGIFY_CODE(MODULE_CODE, VERSION)
+      STRINGIFY_CODE(MODULE_CODE, TRIPLE)
+      STRINGIFY_CODE(MODULE_CODE, DATALAYOUT)
+      STRINGIFY_CODE(MODULE_CODE, ASM)
+      STRINGIFY_CODE(MODULE_CODE, SECTIONNAME)
+      STRINGIFY_CODE(MODULE_CODE, DEPLIB) // FIXME: Remove in 4.0
+      STRINGIFY_CODE(MODULE_CODE, GLOBALVAR)
+      STRINGIFY_CODE(MODULE_CODE, FUNCTION)
+      STRINGIFY_CODE(MODULE_CODE, ALIAS)
+      STRINGIFY_CODE(MODULE_CODE, PURGEVALS)
+      STRINGIFY_CODE(MODULE_CODE, GCNAME)
     }
   case bitc::PARAMATTR_BLOCK_ID:
     switch (CodeID) {
     default: return nullptr;
+    // FIXME: Should these be different?
     case bitc::PARAMATTR_CODE_ENTRY_OLD: return "ENTRY";
     case bitc::PARAMATTR_CODE_ENTRY:     return "ENTRY";
     case bitc::PARAMATTR_GRP_CODE_ENTRY: return "ENTRY";
@@ -173,112 +177,129 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
   case bitc::TYPE_BLOCK_ID_NEW:
     switch (CodeID) {
     default: return nullptr;
-    case bitc::TYPE_CODE_NUMENTRY:     return "NUMENTRY";
-    case bitc::TYPE_CODE_VOID:         return "VOID";
-    case bitc::TYPE_CODE_FLOAT:        return "FLOAT";
-    case bitc::TYPE_CODE_DOUBLE:       return "DOUBLE";
-    case bitc::TYPE_CODE_LABEL:        return "LABEL";
-    case bitc::TYPE_CODE_OPAQUE:       return "OPAQUE";
-    case bitc::TYPE_CODE_INTEGER:      return "INTEGER";
-    case bitc::TYPE_CODE_POINTER:      return "POINTER";
-    case bitc::TYPE_CODE_ARRAY:        return "ARRAY";
-    case bitc::TYPE_CODE_VECTOR:       return "VECTOR";
-    case bitc::TYPE_CODE_X86_FP80:     return "X86_FP80";
-    case bitc::TYPE_CODE_FP128:        return "FP128";
-    case bitc::TYPE_CODE_PPC_FP128:    return "PPC_FP128";
-    case bitc::TYPE_CODE_METADATA:     return "METADATA";
-    case bitc::TYPE_CODE_STRUCT_ANON:  return "STRUCT_ANON";
-    case bitc::TYPE_CODE_STRUCT_NAME:  return "STRUCT_NAME";
-    case bitc::TYPE_CODE_STRUCT_NAMED: return "STRUCT_NAMED";
-    case bitc::TYPE_CODE_FUNCTION:     return "FUNCTION";
+      STRINGIFY_CODE(TYPE_CODE, NUMENTRY)
+      STRINGIFY_CODE(TYPE_CODE, VOID)
+      STRINGIFY_CODE(TYPE_CODE, FLOAT)
+      STRINGIFY_CODE(TYPE_CODE, DOUBLE)
+      STRINGIFY_CODE(TYPE_CODE, LABEL)
+      STRINGIFY_CODE(TYPE_CODE, OPAQUE)
+      STRINGIFY_CODE(TYPE_CODE, INTEGER)
+      STRINGIFY_CODE(TYPE_CODE, POINTER)
+      STRINGIFY_CODE(TYPE_CODE, ARRAY)
+      STRINGIFY_CODE(TYPE_CODE, VECTOR)
+      STRINGIFY_CODE(TYPE_CODE, X86_FP80)
+      STRINGIFY_CODE(TYPE_CODE, FP128)
+      STRINGIFY_CODE(TYPE_CODE, PPC_FP128)
+      STRINGIFY_CODE(TYPE_CODE, METADATA)
+      STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON)
+      STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME)
+      STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED)
+      STRINGIFY_CODE(TYPE_CODE, FUNCTION)
     }
 
   case bitc::CONSTANTS_BLOCK_ID:
     switch (CodeID) {
     default: return nullptr;
-    case bitc::CST_CODE_SETTYPE:         return "SETTYPE";
-    case bitc::CST_CODE_NULL:            return "NULL";
-    case bitc::CST_CODE_UNDEF:           return "UNDEF";
-    case bitc::CST_CODE_INTEGER:         return "INTEGER";
-    case bitc::CST_CODE_WIDE_INTEGER:    return "WIDE_INTEGER";
-    case bitc::CST_CODE_FLOAT:           return "FLOAT";
-    case bitc::CST_CODE_AGGREGATE:       return "AGGREGATE";
-    case bitc::CST_CODE_STRING:          return "STRING";
-    case bitc::CST_CODE_CSTRING:         return "CSTRING";
-    case bitc::CST_CODE_CE_BINOP:        return "CE_BINOP";
-    case bitc::CST_CODE_CE_CAST:         return "CE_CAST";
-    case bitc::CST_CODE_CE_GEP:          return "CE_GEP";
-    case bitc::CST_CODE_CE_INBOUNDS_GEP: return "CE_INBOUNDS_GEP";
-    case bitc::CST_CODE_CE_SELECT:       return "CE_SELECT";
-    case bitc::CST_CODE_CE_EXTRACTELT:   return "CE_EXTRACTELT";
-    case bitc::CST_CODE_CE_INSERTELT:    return "CE_INSERTELT";
-    case bitc::CST_CODE_CE_SHUFFLEVEC:   return "CE_SHUFFLEVEC";
-    case bitc::CST_CODE_CE_CMP:          return "CE_CMP";
-    case bitc::CST_CODE_INLINEASM:       return "INLINEASM";
-    case bitc::CST_CODE_CE_SHUFVEC_EX:   return "CE_SHUFVEC_EX";
+      STRINGIFY_CODE(CST_CODE, SETTYPE)
+      STRINGIFY_CODE(CST_CODE, NULL)
+      STRINGIFY_CODE(CST_CODE, UNDEF)
+      STRINGIFY_CODE(CST_CODE, INTEGER)
+      STRINGIFY_CODE(CST_CODE, WIDE_INTEGER)
+      STRINGIFY_CODE(CST_CODE, FLOAT)
+      STRINGIFY_CODE(CST_CODE, AGGREGATE)
+      STRINGIFY_CODE(CST_CODE, STRING)
+      STRINGIFY_CODE(CST_CODE, CSTRING)
+      STRINGIFY_CODE(CST_CODE, CE_BINOP)
+      STRINGIFY_CODE(CST_CODE, CE_CAST)
+      STRINGIFY_CODE(CST_CODE, CE_GEP)
+      STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP)
+      STRINGIFY_CODE(CST_CODE, CE_SELECT)
+      STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT)
+      STRINGIFY_CODE(CST_CODE, CE_INSERTELT)
+      STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC)
+      STRINGIFY_CODE(CST_CODE, CE_CMP)
+      STRINGIFY_CODE(CST_CODE, INLINEASM)
+      STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX)
     case bitc::CST_CODE_BLOCKADDRESS:    return "CST_CODE_BLOCKADDRESS";
-    case bitc::CST_CODE_DATA:            return "DATA";
+      STRINGIFY_CODE(CST_CODE, DATA)
     }
   case bitc::FUNCTION_BLOCK_ID:
     switch (CodeID) {
     default: return nullptr;
-    case bitc::FUNC_CODE_DECLAREBLOCKS: return "DECLAREBLOCKS";
-
-    case bitc::FUNC_CODE_INST_BINOP:        return "INST_BINOP";
-    case bitc::FUNC_CODE_INST_CAST:         return "INST_CAST";
-    case bitc::FUNC_CODE_INST_GEP_OLD:
-      return "INST_GEP_OLD";
-    case bitc::FUNC_CODE_INST_INBOUNDS_GEP_OLD:
-      return "INST_INBOUNDS_GEP_OLD";
-    case bitc::FUNC_CODE_INST_SELECT:       return "INST_SELECT";
-    case bitc::FUNC_CODE_INST_EXTRACTELT:   return "INST_EXTRACTELT";
-    case bitc::FUNC_CODE_INST_INSERTELT:    return "INST_INSERTELT";
-    case bitc::FUNC_CODE_INST_SHUFFLEVEC:   return "INST_SHUFFLEVEC";
-    case bitc::FUNC_CODE_INST_CMP:          return "INST_CMP";
-
-    case bitc::FUNC_CODE_INST_RET:          return "INST_RET";
-    case bitc::FUNC_CODE_INST_BR:           return "INST_BR";
-    case bitc::FUNC_CODE_INST_SWITCH:       return "INST_SWITCH";
-    case bitc::FUNC_CODE_INST_INVOKE:       return "INST_INVOKE";
-    case bitc::FUNC_CODE_INST_UNREACHABLE:  return "INST_UNREACHABLE";
-
-    case bitc::FUNC_CODE_INST_PHI:          return "INST_PHI";
-    case bitc::FUNC_CODE_INST_ALLOCA:       return "INST_ALLOCA";
-    case bitc::FUNC_CODE_INST_LOAD:         return "INST_LOAD";
-    case bitc::FUNC_CODE_INST_VAARG:        return "INST_VAARG";
-    case bitc::FUNC_CODE_INST_STORE:        return "INST_STORE";
-    case bitc::FUNC_CODE_INST_EXTRACTVAL:   return "INST_EXTRACTVAL";
-    case bitc::FUNC_CODE_INST_INSERTVAL:    return "INST_INSERTVAL";
-    case bitc::FUNC_CODE_INST_CMP2:         return "INST_CMP2";
-    case bitc::FUNC_CODE_INST_VSELECT:      return "INST_VSELECT";
-    case bitc::FUNC_CODE_DEBUG_LOC_AGAIN:   return "DEBUG_LOC_AGAIN";
-    case bitc::FUNC_CODE_INST_CALL:         return "INST_CALL";
-    case bitc::FUNC_CODE_DEBUG_LOC:         return "DEBUG_LOC";
-    case bitc::FUNC_CODE_INST_GEP:
-      return "INST_GEP";
+      STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS)
+      STRINGIFY_CODE(FUNC_CODE, INST_BINOP)
+      STRINGIFY_CODE(FUNC_CODE, INST_CAST)
+      STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD)
+      STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD)
+      STRINGIFY_CODE(FUNC_CODE, INST_SELECT)
+      STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT)
+      STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT)
+      STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC)
+      STRINGIFY_CODE(FUNC_CODE, INST_CMP)
+      STRINGIFY_CODE(FUNC_CODE, INST_RET)
+      STRINGIFY_CODE(FUNC_CODE, INST_BR)
+      STRINGIFY_CODE(FUNC_CODE, INST_SWITCH)
+      STRINGIFY_CODE(FUNC_CODE, INST_INVOKE)
+      STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE)
+      STRINGIFY_CODE(FUNC_CODE, INST_PHI)
+      STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA)
+      STRINGIFY_CODE(FUNC_CODE, INST_LOAD)
+      STRINGIFY_CODE(FUNC_CODE, INST_VAARG)
+      STRINGIFY_CODE(FUNC_CODE, INST_STORE)
+      STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL)
+      STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL)
+      STRINGIFY_CODE(FUNC_CODE, INST_CMP2)
+      STRINGIFY_CODE(FUNC_CODE, INST_VSELECT)
+      STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN)
+      STRINGIFY_CODE(FUNC_CODE, INST_CALL)
+      STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC)
+      STRINGIFY_CODE(FUNC_CODE, INST_GEP)
     }
   case bitc::VALUE_SYMTAB_BLOCK_ID:
     switch (CodeID) {
     default: return nullptr;
-    case bitc::VST_CODE_ENTRY: return "ENTRY";
-    case bitc::VST_CODE_BBENTRY: return "BBENTRY";
+    STRINGIFY_CODE(VST_CODE, ENTRY)
+    STRINGIFY_CODE(VST_CODE, BBENTRY)
     }
   case bitc::METADATA_ATTACHMENT_ID:
     switch(CodeID) {
     default:return nullptr;
-    case bitc::METADATA_ATTACHMENT: return "METADATA_ATTACHMENT";
+      STRINGIFY_CODE(METADATA, ATTACHMENT)
     }
   case bitc::METADATA_BLOCK_ID:
     switch(CodeID) {
     default:return nullptr;
-    case bitc::METADATA_STRING:      return "METADATA_STRING";
-    case bitc::METADATA_NAME:        return "METADATA_NAME";
-    case bitc::METADATA_KIND:        return "METADATA_KIND";
-    case bitc::METADATA_NODE:        return "METADATA_NODE";
-    case bitc::METADATA_VALUE:       return "METADATA_VALUE";
-    case bitc::METADATA_OLD_NODE:    return "METADATA_OLD_NODE";
-    case bitc::METADATA_OLD_FN_NODE: return "METADATA_OLD_FN_NODE";
-    case bitc::METADATA_NAMED_NODE:  return "METADATA_NAMED_NODE";
+      STRINGIFY_CODE(METADATA, STRING)
+      STRINGIFY_CODE(METADATA, NAME)
+      STRINGIFY_CODE(METADATA, KIND)
+      STRINGIFY_CODE(METADATA, NODE)
+      STRINGIFY_CODE(METADATA, VALUE)
+      STRINGIFY_CODE(METADATA, OLD_NODE)
+      STRINGIFY_CODE(METADATA, OLD_FN_NODE)
+      STRINGIFY_CODE(METADATA, NAMED_NODE)
+      STRINGIFY_CODE(METADATA, DISTINCT_NODE)
+      STRINGIFY_CODE(METADATA, LOCATION)
+      STRINGIFY_CODE(METADATA, GENERIC_DEBUG)
+      STRINGIFY_CODE(METADATA, SUBRANGE)
+      STRINGIFY_CODE(METADATA, ENUMERATOR)
+      STRINGIFY_CODE(METADATA, BASIC_TYPE)
+      STRINGIFY_CODE(METADATA, FILE)
+      STRINGIFY_CODE(METADATA, DERIVED_TYPE)
+      STRINGIFY_CODE(METADATA, COMPOSITE_TYPE)
+      STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE)
+      STRINGIFY_CODE(METADATA, COMPILE_UNIT)
+      STRINGIFY_CODE(METADATA, SUBPROGRAM)
+      STRINGIFY_CODE(METADATA, LEXICAL_BLOCK)
+      STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE)
+      STRINGIFY_CODE(METADATA, NAMESPACE)
+      STRINGIFY_CODE(METADATA, TEMPLATE_TYPE)
+      STRINGIFY_CODE(METADATA, TEMPLATE_VALUE)
+      STRINGIFY_CODE(METADATA, GLOBAL_VAR)
+      STRINGIFY_CODE(METADATA, LOCAL_VAR)
+      STRINGIFY_CODE(METADATA, EXPRESSION)
+      STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
+      STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
+      STRINGIFY_CODE(METADATA, MODULE)
     }
   case bitc::USELIST_BLOCK_ID:
     switch(CodeID) {
@@ -287,6 +308,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::USELIST_CODE_BB:      return "USELIST_CODE_BB";
     }
   }
+#undef STRINGIFY_CODE
 }
 
 struct PerRecordStats {
diff --git a/tools/llvm-cov/CodeCoverage.cpp b/tools/llvm-cov/CodeCoverage.cpp
index 4ff5330..8dc4d66 100644
--- a/tools/llvm-cov/CodeCoverage.cpp
+++ b/tools/llvm-cov/CodeCoverage.cpp
@@ -89,7 +89,7 @@ public:
       LoadedSourceFiles;
   bool CompareFilenamesOnly;
   StringMap<std::string> RemappedFilenames;
-  llvm::Triple::ArchType CoverageArch;
+  std::string CoverageArch;
 };
 }
 
@@ -349,15 +349,12 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
       Filters.push_back(std::unique_ptr<CoverageFilter>(StatFilterer));
     }
 
-    if (Arch.empty())
-      CoverageArch = llvm::Triple::ArchType::UnknownArch;
-    else {
-      CoverageArch = Triple(Arch).getArch();
-      if (CoverageArch == llvm::Triple::ArchType::UnknownArch) {
-        errs() << "error: Unknown architecture: " << Arch << "\n";
-        return 1;
-      }
+    if (!Arch.empty() &&
+        Triple(Arch).getArch() == llvm::Triple::ArchType::UnknownArch) {
+      errs() << "error: Unknown architecture: " << Arch << "\n";
+      return 1;
     }
+    CoverageArch = Arch;
 
     for (const auto &File : InputSourceFiles) {
       SmallString<128> Path(File);
diff --git a/tools/llvm-cxxdump/llvm-cxxdump.cpp b/tools/llvm-cxxdump/llvm-cxxdump.cpp
index ef42211..c627a66 100644
--- a/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolSize.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/FileSystem.h"
@@ -96,14 +97,12 @@ static bool collectRelocatedSymbols(const ObjectFile *Obj,
       const object::symbol_iterator RelocSymI = Reloc.getSymbol();
       if (RelocSymI == Obj->symbol_end())
         continue;
-      StringRef RelocSymName;
-      if (error(RelocSymI->getName(RelocSymName)))
-        return true;
-      uint64_t Offset;
-      if (error(Reloc.getOffset(Offset)))
+      ErrorOr<StringRef> RelocSymName = RelocSymI->getName();
+      if (error(RelocSymName.getError()))
         return true;
+      uint64_t Offset = Reloc.getOffset();
       if (Offset >= SymOffset && Offset < SymEnd) {
-        *I = RelocSymName;
+        *I = *RelocSymName;
         ++I;
       }
     }
@@ -122,14 +121,12 @@ static bool collectRelocationOffsets(
       const object::symbol_iterator RelocSymI = Reloc.getSymbol();
       if (RelocSymI == Obj->symbol_end())
         continue;
-      StringRef RelocSymName;
-      if (error(RelocSymI->getName(RelocSymName)))
-        return true;
-      uint64_t Offset;
-      if (error(Reloc.getOffset(Offset)))
+      ErrorOr<StringRef> RelocSymName = RelocSymI->getName();
+      if (error(RelocSymName.getError()))
         return true;
+      uint64_t Offset = Reloc.getOffset();
       if (Offset >= SymOffset && Offset < SymEnd)
-        Collection[std::make_pair(SymName, Offset - SymOffset)] = RelocSymName;
+        Collection[std::make_pair(SymName, Offset - SymOffset)] = *RelocSymName;
     }
   }
   return false;
@@ -187,10 +184,16 @@ static void dumpCXXData(const ObjectFile *Obj) {
 
   uint8_t BytesInAddress = Obj->getBytesInAddress();
 
-  for (const object::SymbolRef &Sym : Obj->symbols()) {
-    StringRef SymName;
-    if (error(Sym.getName(SymName)))
+  std::vector<std::pair<SymbolRef, uint64_t>> SymAddr =
+      object::computeSymbolSizes(*Obj);
+
+  for (auto &P : SymAddr) {
+    object::SymbolRef Sym = P.first;
+    uint64_t SymSize = P.second;
+    ErrorOr<StringRef> SymNameOrErr = Sym.getName();
+    if (error(SymNameOrErr.getError()))
       return;
+    StringRef SymName = *SymNameOrErr;
     object::section_iterator SecI(Obj->section_begin());
     if (error(Sym.getSection(SecI)))
       return;
@@ -207,7 +210,6 @@ static void dumpCXXData(const ObjectFile *Obj) {
     uint64_t SymAddress;
     if (error(Sym.getAddress(SymAddress)))
       return;
-    uint64_t SymSize = Sym.getSize();
     uint64_t SecAddress = Sec.getAddress();
     uint64_t SecSize = Sec.getSize();
     uint64_t SymOffset = SymAddress - SecAddress;
diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index c1cb021..db3fcf6 100644
--- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -69,22 +69,27 @@ DumpType("debug-dump", cl::init(DIDT_All),
         clEnumValN(DIDT_StrOffsetsDwo, "str_offsets.dwo", ".debug_str_offsets.dwo"),
         clEnumValEnd));
 
+static int ReturnValue = EXIT_SUCCESS;
+
+static bool error(StringRef Filename, std::error_code EC) {
+  if (!EC)
+    return false;
+  errs() << Filename << ": " << EC.message() << "\n";
+  ReturnValue = EXIT_FAILURE;
+  return true;
+}
+
 static void DumpInput(StringRef Filename) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
       MemoryBuffer::getFileOrSTDIN(Filename);
-
-  if (std::error_code EC = BuffOrErr.getError()) {
-    errs() << Filename << ": " << EC.message() << "\n";
+  if (error(Filename, BuffOrErr.getError()))
     return;
-  }
   std::unique_ptr<MemoryBuffer> Buff = std::move(BuffOrErr.get());
 
   ErrorOr<std::unique_ptr<ObjectFile>> ObjOrErr =
       ObjectFile::createObjectFile(Buff->getMemBufferRef());
-  if (std::error_code EC = ObjOrErr.getError()) {
-    errs() << Filename << ": " << EC.message() << '\n';
+  if (error(Filename, ObjOrErr.getError()))
     return;
-  }
   ObjectFile &Obj = *ObjOrErr.get();
 
   std::unique_ptr<DIContext> DICtx(new DWARFContextInMemory(Obj));
@@ -109,5 +114,5 @@ int main(int argc, char **argv) {
 
   std::for_each(InputFilenames.begin(), InputFilenames.end(), DumpInput);
 
-  return 0;
+  return ReturnValue;
 }
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index 8013f58..c88f373 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -250,15 +250,7 @@ static char isSymbolList64Bit(SymbolicFile &Obj) {
     return false;
   if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
     return MachO->is64Bit();
-  if (isa<ELF32LEObjectFile>(Obj))
-    return false;
-  if (isa<ELF64LEObjectFile>(Obj))
-    return true;
-  if (isa<ELF32BEObjectFile>(Obj))
-    return false;
-  if (isa<ELF64BEObjectFile>(Obj))
-    return true;
-  return false;
+  return cast<ELFObjectFileBase>(Obj).getBytesInAddress() == 8;
 }
 
 static StringRef CurrentFilename;
@@ -569,7 +561,7 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
       continue;
     if ((I->TypeChar == 'U') && DefinedOnly)
       continue;
-    if (SizeSort && !PrintAddress && I->Size == UnknownAddressOrSize)
+    if (SizeSort && !PrintAddress)
       continue;
     if (PrintFileName) {
       if (!ArchitectureName.empty())
@@ -586,16 +578,15 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
     char SymbolAddrStr[18] = "";
     char SymbolSizeStr[18] = "";
 
-    if (OutputFormat == sysv || I->Address == UnknownAddressOrSize)
+    if (OutputFormat == sysv || I->Address == UnknownAddress)
       strcpy(SymbolAddrStr, printBlanks);
     if (OutputFormat == sysv)
       strcpy(SymbolSizeStr, printBlanks);
 
-    if (I->Address != UnknownAddressOrSize)
+    if (I->Address != UnknownAddress)
       format(printFormat, I->Address)
           .print(SymbolAddrStr, sizeof(SymbolAddrStr));
-    if (I->Size != UnknownAddressOrSize)
-      format(printFormat, I->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
+    format(printFormat, I->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
 
     // If OutputFormat is darwin or we are printing Mach-O symbols in hex and
     // we have a MachOObjectFile, call darwinPrintSymbol to print as darwin's
@@ -613,8 +604,7 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
         outs() << SymbolAddrStr << ' ';
       if (PrintSize) {
         outs() << SymbolSizeStr;
-        if (I->Size != UnknownAddressOrSize)
-          outs() << ' ';
+        outs() << ' ';
       }
       outs() << I->TypeChar;
       if (I->TypeChar == '-' && MachO)
@@ -632,25 +622,20 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
   SymbolList.clear();
 }
 
-template <class ELFT>
-static char getSymbolNMTypeChar(ELFObjectFile<ELFT> &Obj,
+static char getSymbolNMTypeChar(ELFObjectFileBase &Obj,
                                 basic_symbol_iterator I) {
-  typedef typename ELFObjectFile<ELFT>::Elf_Sym Elf_Sym;
-  typedef typename ELFObjectFile<ELFT>::Elf_Shdr Elf_Shdr;
-
   // OK, this is ELF
-  symbol_iterator SymI(I);
+  elf_symbol_iterator SymI(I);
 
-  DataRefImpl Symb = I->getRawDataRefImpl();
-  const Elf_Sym *ESym = Obj.getSymbol(Symb);
-  const ELFFile<ELFT> &EF = *Obj.getELFFile();
-  const Elf_Shdr *ESec = EF.getSection(ESym);
+  elf_section_iterator SecI = Obj.section_end();
+  if (error(SymI->getSection(SecI)))
+    return '?';
 
-  if (ESec) {
-    switch (ESec->sh_type) {
+  if (SecI != Obj.section_end()) {
+    switch (SecI->getType()) {
     case ELF::SHT_PROGBITS:
     case ELF::SHT_DYNAMIC:
-      switch (ESec->sh_flags) {
+      switch (SecI->getFlags()) {
       case (ELF::SHF_ALLOC | ELF::SHF_EXECINSTR):
         return 't';
       case (ELF::SHF_TLS | ELF::SHF_ALLOC | ELF::SHF_WRITE):
@@ -667,17 +652,17 @@ static char getSymbolNMTypeChar(ELFObjectFile<ELFT> &Obj,
     }
   }
 
-  if (ESym->getType() == ELF::STT_SECTION) {
-    StringRef Name;
-    if (error(SymI->getName(Name)))
+  if (SymI->getELFType() == ELF::STT_SECTION) {
+    ErrorOr<StringRef> Name = SymI->getName();
+    if (error(Name.getError()))
       return '?';
-    return StringSwitch<char>(Name)
+    return StringSwitch<char>(*Name)
         .StartsWith(".debug", 'N')
         .StartsWith(".note", 'n')
         .Default('?');
   }
 
-  return '?';
+  return 'n';
 }
 
 static char getSymbolNMTypeChar(COFFObjectFile &Obj, symbol_iterator I) {
@@ -685,11 +670,11 @@ static char getSymbolNMTypeChar(COFFObjectFile &Obj, symbol_iterator I) {
   // OK, this is COFF.
   symbol_iterator SymI(I);
 
-  StringRef Name;
-  if (error(SymI->getName(Name)))
+  ErrorOr<StringRef> Name = SymI->getName();
+  if (error(Name.getError()))
     return '?';
 
-  char Ret = StringSwitch<char>(Name)
+  char Ret = StringSwitch<char>(*Name)
                  .StartsWith(".debug", 'N')
                  .StartsWith(".sxdata", 'N')
                  .Default('?');
@@ -784,26 +769,12 @@ static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I) {
   return getSymbolNMTypeChar(*GV);
 }
 
-template <class ELFT>
-static bool isELFObject(ELFObjectFile<ELFT> &Obj, symbol_iterator I) {
-  typedef typename ELFObjectFile<ELFT>::Elf_Sym Elf_Sym;
-
-  DataRefImpl Symb = I->getRawDataRefImpl();
-  const Elf_Sym *ESym = Obj.getSymbol(Symb);
-
-  return ESym->getType() == ELF::STT_OBJECT;
-}
-
 static bool isObject(SymbolicFile &Obj, basic_symbol_iterator I) {
-  if (ELF32LEObjectFile *ELF = dyn_cast<ELF32LEObjectFile>(&Obj))
-    return isELFObject(*ELF, I);
-  if (ELF64LEObjectFile *ELF = dyn_cast<ELF64LEObjectFile>(&Obj))
-    return isELFObject(*ELF, I);
-  if (ELF32BEObjectFile *ELF = dyn_cast<ELF32BEObjectFile>(&Obj))
-    return isELFObject(*ELF, I);
-  if (ELF64BEObjectFile *ELF = dyn_cast<ELF64BEObjectFile>(&Obj))
-    return isELFObject(*ELF, I);
-  return false;
+  auto *ELF = dyn_cast<ELFObjectFileBase>(&Obj);
+  if (!ELF)
+    return false;
+
+  return elf_symbol_iterator(I)->getELFType() == ELF::STT_OBJECT;
 }
 
 static char getNMTypeChar(SymbolicFile &Obj, basic_symbol_iterator I) {
@@ -830,14 +801,8 @@ static char getNMTypeChar(SymbolicFile &Obj, basic_symbol_iterator I) {
     Ret = getSymbolNMTypeChar(*COFF, I);
   else if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
     Ret = getSymbolNMTypeChar(*MachO, I);
-  else if (ELF32LEObjectFile *ELF = dyn_cast<ELF32LEObjectFile>(&Obj))
-    Ret = getSymbolNMTypeChar(*ELF, I);
-  else if (ELF64LEObjectFile *ELF = dyn_cast<ELF64LEObjectFile>(&Obj))
-    Ret = getSymbolNMTypeChar(*ELF, I);
-  else if (ELF32BEObjectFile *ELF = dyn_cast<ELF32BEObjectFile>(&Obj))
-    Ret = getSymbolNMTypeChar(*ELF, I);
   else
-    Ret = getSymbolNMTypeChar(cast<ELF64BEObjectFile>(Obj), I);
+    Ret = getSymbolNMTypeChar(cast<ELFObjectFileBase>(Obj), I);
 
   if (Symflags & object::SymbolRef::SF_Global)
     Ret = toupper(Ret);
@@ -871,8 +836,8 @@ static unsigned getNsectForSegSect(MachOObjectFile *Obj) {
 // It is called once for each symbol in a Mach-O file from
 // dumpSymbolNamesFromObject() and returns the section number for that symbol
 // if it is in a section, else it returns 0.
-static unsigned getNsectInMachO(MachOObjectFile &Obj, basic_symbol_iterator I) {
-  DataRefImpl Symb = I->getRawDataRefImpl();
+static unsigned getNsectInMachO(MachOObjectFile &Obj, BasicSymbolRef Sym) {
+  DataRefImpl Symb = Sym.getRawDataRefImpl();
   if (Obj.is64Bit()) {
     MachO::nlist_64 STE = Obj.getSymbol64TableEntry(Symb);
     if ((STE.n_type & MachO::N_TYPE) == MachO::N_SECT)
@@ -889,17 +854,16 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
                                       std::string ArchiveName = std::string(),
                                       std::string ArchitectureName =
                                         std::string()) {
-  basic_symbol_iterator IBegin = Obj.symbol_begin();
-  basic_symbol_iterator IEnd = Obj.symbol_end();
+  auto Symbols = Obj.symbols();
   if (DynamicSyms) {
-    if (!Obj.isELF()) {
+    const auto *E = dyn_cast<ELFObjectFileBase>(&Obj);
+    if (!E) {
       error("File format has no dynamic symbol table", Obj.getFileName());
       return;
     }
-    std::pair<symbol_iterator, symbol_iterator> IDyn =
-        getELFDynamicSymbolIterators(&Obj);
-    IBegin = IDyn.first;
-    IEnd = IDyn.second;
+    auto DynSymbols = E->getDynamicSymbolIterators();
+    Symbols =
+        make_range<basic_symbol_iterator>(DynSymbols.begin(), DynSymbols.end());
   }
   std::string NameBuffer;
   raw_string_ostream OS(NameBuffer);
@@ -913,13 +877,13 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
     if (Nsect == 0)
       return;
   }
-  for (basic_symbol_iterator I = IBegin; I != IEnd; ++I) {
-    uint32_t SymFlags = I->getFlags();
+  for (BasicSymbolRef Sym : Symbols) {
+    uint32_t SymFlags = Sym.getFlags();
     if (!DebugSyms && (SymFlags & SymbolRef::SF_FormatSpecific))
       continue;
     if (WithoutAliases) {
       if (IRObjectFile *IR = dyn_cast<IRObjectFile>(&Obj)) {
-        const GlobalValue *GV = IR->getSymbolGV(I->getRawDataRefImpl());
+        const GlobalValue *GV = IR->getSymbolGV(Sym.getRawDataRefImpl());
         if (GV && isa<GlobalAlias>(GV))
           continue;
       }
@@ -927,23 +891,24 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
     // If a "-s segname sectname" option was specified and this is a Mach-O
     // file and this section appears in this file, Nsect will be non-zero then
     // see if this symbol is a symbol from that section and if not skip it.
-    if (Nsect && Nsect != getNsectInMachO(*MachO, I))
+    if (Nsect && Nsect != getNsectInMachO(*MachO, Sym))
       continue;
     NMSymbol S;
-    S.Size = UnknownAddressOrSize;
-    S.Address = UnknownAddressOrSize;
-    if (PrintSize && isa<ELFObjectFileBase>(Obj)) {
-      symbol_iterator SymI = I;
-      S.Size = SymI->getSize();
+    S.Size = 0;
+    S.Address = UnknownAddress;
+    if (PrintSize) {
+      if (isa<ELFObjectFileBase>(&Obj))
+        S.Size = ELFSymbolRef(Sym).getSize();
     }
-    if (PrintAddress && isa<ObjectFile>(Obj))
-      if (error(symbol_iterator(I)->getAddress(S.Address)))
+    if (PrintAddress && isa<ObjectFile>(Obj)) {
+      if (error(SymbolRef(Sym).getAddress(S.Address)))
         break;
-    S.TypeChar = getNMTypeChar(Obj, I);
-    if (error(I->printName(OS)))
+    }
+    S.TypeChar = getNMTypeChar(Obj, Sym);
+    if (error(Sym.printName(OS)))
       break;
     OS << '\0';
-    S.Symb = I->getRawDataRefImpl();
+    S.Symb = Sym.getRawDataRefImpl();
     SymbolList.push_back(S);
   }
 
diff --git a/tools/llvm-objdump/CMakeLists.txt b/tools/llvm-objdump/CMakeLists.txt
index d717653..1f2721a 100644
--- a/tools/llvm-objdump/CMakeLists.txt
+++ b/tools/llvm-objdump/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(LLVM_LINK_COMPONENTS
   ${LLVM_TARGETS_TO_BUILD}
+  CodeGen
   DebugInfoDWARF
   MC
   MCDisassembler
diff --git a/tools/llvm-objdump/COFFDump.cpp b/tools/llvm-objdump/COFFDump.cpp
index 976a921..58bdddf 100644
--- a/tools/llvm-objdump/COFFDump.cpp
+++ b/tools/llvm-objdump/COFFDump.cpp
@@ -177,9 +177,7 @@ static std::error_code resolveSymbol(const std::vector<RelocationRef> &Rels,
   for (std::vector<RelocationRef>::const_iterator I = Rels.begin(),
                                                   E = Rels.end();
                                                   I != E; ++I) {
-    uint64_t Ofs;
-    if (std::error_code EC = I->getOffset(Ofs))
-      return EC;
+    uint64_t Ofs = I->getOffset();
     if (Ofs == Offset) {
       Sym = *I->getSymbol();
       return std::error_code();
@@ -215,8 +213,10 @@ static std::error_code resolveSymbolName(const std::vector<RelocationRef> &Rels,
   SymbolRef Sym;
   if (std::error_code EC = resolveSymbol(Rels, Offset, Sym))
     return EC;
-  if (std::error_code EC = Sym.getName(Name))
+  ErrorOr<StringRef> NameOrErr = Sym.getName();
+  if (std::error_code EC = NameOrErr.getError())
     return EC;
+  Name = *NameOrErr;
   return std::error_code();
 }
 
diff --git a/tools/llvm-objdump/ELFDump.cpp b/tools/llvm-objdump/ELFDump.cpp
index 9c091a4..2d0d7d7 100644
--- a/tools/llvm-objdump/ELFDump.cpp
+++ b/tools/llvm-objdump/ELFDump.cpp
@@ -24,9 +24,9 @@ using namespace llvm::object;
 template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
   typedef ELFFile<ELFT> ELFO;
   outs() << "Program Header:\n";
-  for (typename ELFO::Elf_Phdr_Iter pi = o->begin_program_headers(),
-                                    pe = o->end_program_headers();
-                                    pi != pe; ++pi) {
+  for (typename ELFO::Elf_Phdr_Iter pi = o->program_header_begin(),
+                                    pe = o->program_header_end();
+       pi != pe; ++pi) {
     switch (pi->p_type) {
     case ELF::PT_LOAD:
       outs() << "    LOAD ";
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index 1730bf3..5263c33 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -178,9 +178,8 @@ static const Target *GetTarget(const MachOObjectFile *MachOObj,
 
 struct SymbolSorter {
   bool operator()(const SymbolRef &A, const SymbolRef &B) {
-    SymbolRef::Type AType, BType;
-    A.getType(AType);
-    B.getType(BType);
+    SymbolRef::Type AType = A.getType();
+    SymbolRef::Type BType = B.getType();
 
     uint64_t AAddr, BAddr;
     if (AType != SymbolRef::ST_Function)
@@ -283,9 +282,10 @@ static void getSectionsAndSymbols(MachOObjectFile *MachOObj,
                                   SmallVectorImpl<uint64_t> &FoundFns,
                                   uint64_t &BaseSegmentAddress) {
   for (const SymbolRef &Symbol : MachOObj->symbols()) {
-    StringRef SymName;
-    Symbol.getName(SymName);
-    if (!SymName.startswith("ltmp"))
+    ErrorOr<StringRef> SymName = Symbol.getName();
+    if (std::error_code EC = SymName.getError())
+      report_fatal_error(EC.message());
+    if (!SymName->startswith("ltmp"))
       Symbols.push_back(Symbol);
   }
 
@@ -362,9 +362,10 @@ static void PrintIndirectSymbolTable(MachOObjectFile *O, bool verbose,
       if (indirect_symbol < Symtab.nsyms) {
         symbol_iterator Sym = O->getSymbolByIndex(indirect_symbol);
         SymbolRef Symbol = *Sym;
-        StringRef SymName;
-        Symbol.getName(SymName);
-        outs() << SymName;
+        ErrorOr<StringRef> SymName = Symbol.getName();
+        if (std::error_code EC = SymName.getError())
+          report_fatal_error(EC.message());
+        outs() << *SymName;
       } else {
         outs() << "?";
       }
@@ -588,14 +589,15 @@ static void CreateSymbolAddressMap(MachOObjectFile *O,
                                    SymbolAddressMap *AddrMap) {
   // Create a map of symbol addresses to symbol names.
   for (const SymbolRef &Symbol : O->symbols()) {
-    SymbolRef::Type ST;
-    Symbol.getType(ST);
+    SymbolRef::Type ST = Symbol.getType();
     if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data ||
         ST == SymbolRef::ST_Other) {
       uint64_t Address;
       Symbol.getAddress(Address);
-      StringRef SymName;
-      Symbol.getName(SymName);
+      ErrorOr<StringRef> SymNameOrErr = Symbol.getName();
+      if (std::error_code EC = SymNameOrErr.getError())
+        report_fatal_error(EC.message());
+      StringRef SymName = *SymNameOrErr;
       if (!SymName.startswith(".objc"))
         (*AddrMap)[Address] = SymName;
     }
@@ -798,8 +800,7 @@ static void DumpLiteralPointerSection(MachOObjectFile *O,
     RE = O->getRelocation(Rel);
     isExtern = O->getPlainRelocationExternal(RE);
     if (isExtern) {
-      uint64_t RelocOffset;
-      Reloc.getOffset(RelocOffset);
+      uint64_t RelocOffset = Reloc.getOffset();
       symbol_iterator RelocSym = Reloc.getSymbol();
       Relocs.push_back(std::make_pair(RelocOffset, *RelocSym));
     }
@@ -833,9 +834,10 @@ static void DumpLiteralPointerSection(MachOObjectFile *O,
         [&](const std::pair<uint64_t, SymbolRef> &P) { return P.first == i; });
     if (Reloc != Relocs.end()) {
       symbol_iterator RelocSym = Reloc->second;
-      StringRef SymName;
-      RelocSym->getName(SymName);
-      outs() << "external relocation entry for symbol:" << SymName << "\n";
+      ErrorOr<StringRef> SymName = RelocSym->getName();
+      if (std::error_code EC = SymName.getError())
+        report_fatal_error(EC.message());
+      outs() << "external relocation entry for symbol:" << *SymName << "\n";
       continue;
     }
 
@@ -1765,8 +1767,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     bool r_scattered = false;
     uint32_t r_value, pair_r_value, r_type;
     for (const RelocationRef &Reloc : info->S.relocations()) {
-      uint64_t RelocOffset;
-      Reloc.getOffset(RelocOffset);
+      uint64_t RelocOffset = Reloc.getOffset();
       if (RelocOffset == sect_offset) {
         Rel = Reloc.getRawDataRefImpl();
         RE = info->O->getRelocation(Rel);
@@ -1797,9 +1798,10 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
       }
     }
     if (reloc_found && isExtern) {
-      StringRef SymName;
-      Symbol.getName(SymName);
-      const char *name = SymName.data();
+      ErrorOr<StringRef> SymName = Symbol.getName();
+      if (std::error_code EC = SymName.getError())
+        report_fatal_error(EC.message());
+      const char *name = SymName->data();
       op_info->AddSymbol.Present = 1;
       op_info->AddSymbol.Name = name;
       // For i386 extern relocation entries the value in the instruction is
@@ -1843,8 +1845,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     bool isExtern = false;
     SymbolRef Symbol;
     for (const RelocationRef &Reloc : info->S.relocations()) {
-      uint64_t RelocOffset;
-      Reloc.getOffset(RelocOffset);
+      uint64_t RelocOffset = Reloc.getOffset();
       if (RelocOffset == sect_offset) {
         Rel = Reloc.getRawDataRefImpl();
         RE = info->O->getRelocation(Rel);
@@ -1864,9 +1865,10 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
       // is the offset from the external symbol.
       if (info->O->getAnyRelocationPCRel(RE))
         op_info->Value -= Pc + Offset + Size;
-      StringRef SymName;
-      Symbol.getName(SymName);
-      const char *name = SymName.data();
+      ErrorOr<StringRef> SymName = Symbol.getName();
+      if (std::error_code EC = SymName.getError())
+        report_fatal_error(EC.message());
+      const char *name = SymName->data();
       unsigned Type = info->O->getAnyRelocationType(RE);
       if (Type == MachO::X86_64_RELOC_SUBTRACTOR) {
         DataRefImpl RelNext = Rel;
@@ -1880,9 +1882,10 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
           op_info->SubtractSymbol.Name = name;
           symbol_iterator RelocSymNext = info->O->getSymbolByIndex(SymbolNum);
           Symbol = *RelocSymNext;
-          StringRef SymNameNext;
-          Symbol.getName(SymNameNext);
-          name = SymNameNext.data();
+          ErrorOr<StringRef> SymNameNext = Symbol.getName();
+          if (std::error_code EC = SymNameNext.getError())
+            report_fatal_error(EC.message());
+          name = SymNameNext->data();
         }
       }
       // TODO: add the VariantKinds to op_info->VariantKind for relocation types
@@ -1913,8 +1916,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     auto Reloc =
         std::find_if(info->S.relocations().begin(), info->S.relocations().end(),
                      [&](const RelocationRef &Reloc) {
-                       uint64_t RelocOffset;
-                       Reloc.getOffset(RelocOffset);
+                       uint64_t RelocOffset = Reloc.getOffset();
                        return RelocOffset == sect_offset;
                      });
 
@@ -1950,9 +1952,10 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     }
 
     if (isExtern) {
-      StringRef SymName;
-      Symbol.getName(SymName);
-      const char *name = SymName.data();
+      ErrorOr<StringRef> SymName = Symbol.getName();
+      if (std::error_code EC = SymName.getError())
+        report_fatal_error(EC.message());
+      const char *name = SymName->data();
       op_info->AddSymbol.Present = 1;
       op_info->AddSymbol.Name = name;
       switch (r_type) {
@@ -2040,8 +2043,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     auto Reloc =
         std::find_if(info->S.relocations().begin(), info->S.relocations().end(),
                      [&](const RelocationRef &Reloc) {
-                       uint64_t RelocOffset;
-                       Reloc.getOffset(RelocOffset);
+                       uint64_t RelocOffset = Reloc.getOffset();
                        return RelocOffset == sect_offset;
                      });
 
@@ -2063,9 +2065,10 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     // NOTE: Scattered relocations don't exist on arm64.
     if (!info->O->getPlainRelocationExternal(RE))
       return 0;
-    StringRef SymName;
-    Reloc->getSymbol()->getName(SymName);
-    const char *name = SymName.data();
+    ErrorOr<StringRef> SymName = Reloc->getSymbol()->getName();
+    if (std::error_code EC = SymName.getError())
+      report_fatal_error(EC.message());
+    const char *name = SymName->data();
     op_info->AddSymbol.Present = 1;
     op_info->AddSymbol.Name = name;
 
@@ -2192,9 +2195,10 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
             if (indirect_symbol < Symtab.nsyms) {
               symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
               SymbolRef Symbol = *Sym;
-              StringRef SymName;
-              Symbol.getName(SymName);
-              const char *name = SymName.data();
+              ErrorOr<StringRef> SymName = Symbol.getName();
+              if (std::error_code EC = SymName.getError())
+                report_fatal_error(EC.message());
+              const char *name = SymName->data();
               return name;
             }
           }
@@ -2226,9 +2230,10 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
             if (indirect_symbol < Symtab.nsyms) {
               symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
               SymbolRef Symbol = *Sym;
-              StringRef SymName;
-              Symbol.getName(SymName);
-              const char *name = SymName.data();
+              ErrorOr<StringRef> SymName = Symbol.getName();
+              if (std::error_code EC = SymName.getError())
+                report_fatal_error(EC.message());
+              const char *name = SymName->data();
               return name;
             }
           }
@@ -2417,10 +2422,9 @@ static const char *get_pointer_32(uint32_t Address, uint32_t &offset,
 // for the specified section offset in the specified section reference.
 // If no relocation information is found and a non-zero ReferenceValue for the
 // symbol is passed, look up that address in the info's AddrMap.
-static const char *
-get_symbol_64(uint32_t sect_offset, SectionRef S, DisassembleInfo *info,
-              uint64_t &n_value,
-              uint64_t ReferenceValue = UnknownAddressOrSize) {
+static const char *get_symbol_64(uint32_t sect_offset, SectionRef S,
+                                 DisassembleInfo *info, uint64_t &n_value,
+                                 uint64_t ReferenceValue = UnknownAddress) {
   n_value = 0;
   if (!info->verbose)
     return nullptr;
@@ -2432,8 +2436,7 @@ get_symbol_64(uint32_t sect_offset, SectionRef S, DisassembleInfo *info,
   bool isExtern = false;
   SymbolRef Symbol;
   for (const RelocationRef &Reloc : S.relocations()) {
-    uint64_t RelocOffset;
-    Reloc.getOffset(RelocOffset);
+    uint64_t RelocOffset = Reloc.getOffset();
     if (RelocOffset == sect_offset) {
       Rel = Reloc.getRawDataRefImpl();
       RE = info->O->getRelocation(Rel);
@@ -2454,12 +2457,14 @@ get_symbol_64(uint32_t sect_offset, SectionRef S, DisassembleInfo *info,
   const char *SymbolName = nullptr;
   if (reloc_found && isExtern) {
     Symbol.getAddress(n_value);
-    if (n_value == UnknownAddressOrSize)
+    if (n_value == UnknownAddress)
       n_value = 0;
-    StringRef name;
-    Symbol.getName(name);
-    if (!name.empty()) {
-      SymbolName = name.data();
+    ErrorOr<StringRef> NameOrError = Symbol.getName();
+    if (std::error_code EC = NameOrError.getError())
+      report_fatal_error(EC.message());
+    StringRef Name = *NameOrError;
+    if (!Name.empty()) {
+      SymbolName = Name.data();
       return SymbolName;
     }
   }
@@ -2475,7 +2480,7 @@ get_symbol_64(uint32_t sect_offset, SectionRef S, DisassembleInfo *info,
 
   // We did not find an external relocation entry so look up the ReferenceValue
   // as an address of a symbol and if found return that symbol's name.
-  if (ReferenceValue != UnknownAddressOrSize)
+  if (ReferenceValue != UnknownAddress)
     SymbolName = GuessSymbolName(ReferenceValue, info->AddrMap);
 
   return SymbolName;
@@ -5614,8 +5619,7 @@ static const char *GuessLiteralPointer(uint64_t ReferenceValue,
   bool isExtern = false;
   SymbolRef Symbol;
   for (const RelocationRef &Reloc : info->S.relocations()) {
-    uint64_t RelocOffset;
-    Reloc.getOffset(RelocOffset);
+    uint64_t RelocOffset = Reloc.getOffset();
     if (RelocOffset == sect_offset) {
       Rel = Reloc.getRawDataRefImpl();
       RE = info->O->getRelocation(Rel);
@@ -6109,8 +6113,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
     // Parse relocations.
     std::vector<std::pair<uint64_t, SymbolRef>> Relocs;
     for (const RelocationRef &Reloc : Sections[SectIdx].relocations()) {
-      uint64_t RelocOffset;
-      Reloc.getOffset(RelocOffset);
+      uint64_t RelocOffset = Reloc.getOffset();
       uint64_t SectionAddress = Sections[SectIdx].getAddress();
       RelocOffset -= SectionAddress;
 
@@ -6125,14 +6128,15 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
     SymbolAddressMap AddrMap;
     bool DisSymNameFound = false;
     for (const SymbolRef &Symbol : MachOOF->symbols()) {
-      SymbolRef::Type ST;
-      Symbol.getType(ST);
+      SymbolRef::Type ST = Symbol.getType();
       if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data ||
           ST == SymbolRef::ST_Other) {
         uint64_t Address;
         Symbol.getAddress(Address);
-        StringRef SymName;
-        Symbol.getName(SymName);
+        ErrorOr<StringRef> SymNameOrErr = Symbol.getName();
+        if (std::error_code EC = SymNameOrErr.getError())
+          report_fatal_error(EC.message());
+        StringRef SymName = *SymNameOrErr;
         AddrMap[Address] = SymName;
         if (!DisSymName.empty() && DisSymName == SymName)
           DisSymNameFound = true;
@@ -6171,11 +6175,12 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
 
     // Disassemble symbol by symbol.
     for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
-      StringRef SymName;
-      Symbols[SymIdx].getName(SymName);
+      ErrorOr<StringRef> SymNameOrErr = Symbols[SymIdx].getName();
+      if (std::error_code EC = SymNameOrErr.getError())
+        report_fatal_error(EC.message());
+      StringRef SymName = *SymNameOrErr;
 
-      SymbolRef::Type ST;
-      Symbols[SymIdx].getType(ST);
+      SymbolRef::Type ST = Symbols[SymIdx].getType();
       if (ST != SymbolRef::ST_Function)
         continue;
 
@@ -6200,8 +6205,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
       uint64_t NextSym = 0;
       uint64_t NextSymIdx = SymIdx + 1;
       while (Symbols.size() > NextSymIdx) {
-        SymbolRef::Type NextSymType;
-        Symbols[NextSymIdx].getType(NextSymType);
+        SymbolRef::Type NextSymType = Symbols[NextSymIdx].getType();
         if (NextSymType == SymbolRef::ST_Function) {
           containsNextSym =
               Sections[SectIdx].containsSymbol(Symbols[NextSymIdx]);
@@ -6437,7 +6441,10 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
                                       const RelocationRef &Reloc, uint64_t Addr,
                                       StringRef &Name, uint64_t &Addend) {
   if (Reloc.getSymbol() != Obj->symbol_end()) {
-    Reloc.getSymbol()->getName(Name);
+    ErrorOr<StringRef> NameOrErr = Reloc.getSymbol()->getName();
+    if (std::error_code EC = NameOrErr.getError())
+      report_fatal_error(EC.message());
+    Name = *NameOrErr;
     Addend = Addr;
     return;
   }
@@ -6463,7 +6470,10 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
   Sym->second.getSection(SymSection);
   if (RelocSection == *SymSection) {
     // There's a valid symbol in the same section before this reference.
-    Sym->second.getName(Name);
+    ErrorOr<StringRef> NameOrErr = Sym->second.getName();
+    if (std::error_code EC = NameOrErr.getError())
+      report_fatal_error(EC.message());
+    Name = *NameOrErr;
     Addend = Addr - Sym->first;
     return;
   }
@@ -6480,7 +6490,7 @@ static void printUnwindRelocDest(const MachOObjectFile *Obj,
   StringRef Name;
   uint64_t Addend;
 
-  if (!Reloc.getObjectFile())
+  if (!Reloc.getObject())
     return;
 
   findUnwindRelocNameAddend(Obj, Symbols, Reloc, Addr, Name, Addend);
@@ -6516,8 +6526,7 @@ printMachOCompactUnwindSection(const MachOObjectFile *Obj,
   // Next we need to look at the relocations to find out what objects are
   // actually being referred to.
   for (const RelocationRef &Reloc : CompactUnwind.relocations()) {
-    uint64_t RelocAddress;
-    Reloc.getOffset(RelocAddress);
+    uint64_t RelocAddress = Reloc.getOffset();
 
     uint32_t EntryIdx = RelocAddress / EntrySize;
     uint32_t OffsetInEntry = RelocAddress - EntryIdx * EntrySize;
@@ -6553,7 +6562,7 @@ printMachOCompactUnwindSection(const MachOObjectFile *Obj,
            << format("0x%08" PRIx32, Entry.CompactEncoding) << '\n';
 
     // 4. The personality function, if present.
-    if (Entry.PersonalityReloc.getObjectFile()) {
+    if (Entry.PersonalityReloc.getObject()) {
       outs() << "    personality function: "
              << format("0x%" PRIx64, Entry.PersonalityAddr) << ' ';
       printUnwindRelocDest(Obj, Symbols, Entry.PersonalityReloc,
@@ -6562,7 +6571,7 @@ printMachOCompactUnwindSection(const MachOObjectFile *Obj,
     }
 
     // 5. This entry's language-specific data area.
-    if (Entry.LSDAReloc.getObjectFile()) {
+    if (Entry.LSDAReloc.getObject()) {
       outs() << "    LSDA:                 " << format("0x%" PRIx64,
                                                        Entry.LSDAAddr) << ' ';
       printUnwindRelocDest(Obj, Symbols, Entry.LSDAReloc, Entry.LSDAAddr);
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 1152a15..7869818 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -17,9 +17,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-objdump.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/FaultMaps.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler.h"
@@ -153,6 +155,9 @@ cl::opt<bool>
     llvm::PrintImmHex("print-imm-hex",
                       cl::desc("Use hex format for immediate values"));
 
+cl::opt<bool> PrintFaultMaps("fault-map-section",
+                             cl::desc("Display contents of faultmap section"));
+
 static StringRef ToolName;
 static int ReturnValue = EXIT_SUCCESS;
 
@@ -207,9 +212,8 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) {
 }
 
 bool llvm::RelocAddressLess(RelocationRef a, RelocationRef b) {
-  uint64_t a_addr, b_addr;
-  if (error(a.getOffset(a_addr))) return false;
-  if (error(b.getOffset(b_addr))) return false;
+  uint64_t a_addr = a.getOffset();
+  uint64_t b_addr = b.getOffset();
   return a_addr < b_addr;
 }
 
@@ -294,60 +298,68 @@ PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
 }
 
 template <class ELFT>
-static const typename ELFObjectFile<ELFT>::Elf_Rel *
-getRel(const ELFFile<ELFT> &EF, DataRefImpl Rel) {
-  typedef typename ELFObjectFile<ELFT>::Elf_Rel Elf_Rel;
-  return EF.template getEntry<Elf_Rel>(Rel.d.a, Rel.d.b);
-}
-
-template <class ELFT>
-static const typename ELFObjectFile<ELFT>::Elf_Rela *
-getRela(const ELFFile<ELFT> &EF, DataRefImpl Rela) {
-  typedef typename ELFObjectFile<ELFT>::Elf_Rela Elf_Rela;
-  return EF.template getEntry<Elf_Rela>(Rela.d.a, Rela.d.b);
-}
-
-template <class ELFT>
 static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
                                                 DataRefImpl Rel,
                                                 SmallVectorImpl<char> &Result) {
   typedef typename ELFObjectFile<ELFT>::Elf_Sym Elf_Sym;
   typedef typename ELFObjectFile<ELFT>::Elf_Shdr Elf_Shdr;
+  typedef typename ELFObjectFile<ELFT>::Elf_Rel Elf_Rel;
+  typedef typename ELFObjectFile<ELFT>::Elf_Rela Elf_Rela;
+
   const ELFFile<ELFT> &EF = *Obj->getELFFile();
 
-  const Elf_Shdr *sec = EF.getSection(Rel.d.a);
+  ErrorOr<const Elf_Shdr *> SecOrErr = EF.getSection(Rel.d.a);
+  if (std::error_code EC = SecOrErr.getError())
+    return EC;
+  const Elf_Shdr *Sec = *SecOrErr;
+  ErrorOr<const Elf_Shdr *> SymTabOrErr = EF.getSection(Sec->sh_link);
+  if (std::error_code EC = SymTabOrErr.getError())
+    return EC;
+  const Elf_Shdr *SymTab = *SymTabOrErr;
+  assert(SymTab->sh_type == ELF::SHT_SYMTAB ||
+         SymTab->sh_type == ELF::SHT_DYNSYM);
+  ErrorOr<const Elf_Shdr *> StrTabSec = EF.getSection(SymTab->sh_link);
+  if (std::error_code EC = StrTabSec.getError())
+    return EC;
+  ErrorOr<StringRef> StrTabOrErr = EF.getStringTable(*StrTabSec);
+  if (std::error_code EC = StrTabOrErr.getError())
+    return EC;
+  StringRef StrTab = *StrTabOrErr;
   uint8_t type;
   StringRef res;
   int64_t addend = 0;
   uint16_t symbol_index = 0;
-  switch (sec->sh_type) {
+  switch (Sec->sh_type) {
   default:
     return object_error::parse_failed;
   case ELF::SHT_REL: {
-    type = getRel(EF, Rel)->getType(EF.isMips64EL());
-    symbol_index = getRel(EF, Rel)->getSymbol(EF.isMips64EL());
+    const Elf_Rel *ERel = Obj->getRel(Rel);
+    type = ERel->getType(EF.isMips64EL());
+    symbol_index = ERel->getSymbol(EF.isMips64EL());
     // TODO: Read implicit addend from section data.
     break;
   }
   case ELF::SHT_RELA: {
-    type = getRela(EF, Rel)->getType(EF.isMips64EL());
-    symbol_index = getRela(EF, Rel)->getSymbol(EF.isMips64EL());
-    addend = getRela(EF, Rel)->r_addend;
+    const Elf_Rela *ERela = Obj->getRela(Rel);
+    type = ERela->getType(EF.isMips64EL());
+    symbol_index = ERela->getSymbol(EF.isMips64EL());
+    addend = ERela->r_addend;
     break;
   }
   }
   const Elf_Sym *symb =
-      EF.template getEntry<Elf_Sym>(sec->sh_link, symbol_index);
+      EF.template getEntry<Elf_Sym>(Sec->sh_link, symbol_index);
   StringRef Target;
-  const Elf_Shdr *SymSec = EF.getSection(symb);
+  ErrorOr<const Elf_Shdr *> SymSec = EF.getSection(symb);
+  if (std::error_code EC = SymSec.getError())
+    return EC;
   if (symb->getType() == ELF::STT_SECTION) {
-    ErrorOr<StringRef> SecName = EF.getSectionName(SymSec);
+    ErrorOr<StringRef> SecName = EF.getSectionName(*SymSec);
     if (std::error_code EC = SecName.getError())
       return EC;
     Target = *SecName;
   } else {
-    ErrorOr<StringRef> SymName =
-        EF.getSymbolName(EF.getSection(sec->sh_link), symb);
+    ErrorOr<StringRef> SymName = symb->getName(StrTab);
     if (!SymName)
       return SymName.getError();
     Target = *SymName;
@@ -421,9 +433,10 @@ static std::error_code getRelocationValueString(const COFFObjectFile *Obj,
                                                 const RelocationRef &Rel,
                                                 SmallVectorImpl<char> &Result) {
   symbol_iterator SymI = Rel.getSymbol();
-  StringRef SymName;
-  if (std::error_code EC = SymI->getName(SymName))
+  ErrorOr<StringRef> SymNameOrErr = SymI->getName();
+  if (std::error_code EC = SymNameOrErr.getError())
     return EC;
+  StringRef SymName = *SymNameOrErr;
   Result.append(SymName.begin(), SymName.end());
   return std::error_code();
 }
@@ -443,15 +456,15 @@ static void printRelocationTargetName(const MachOObjectFile *O,
     for (const SymbolRef &Symbol : O->symbols()) {
       std::error_code ec;
       uint64_t Addr;
-      StringRef Name;
+      ErrorOr<StringRef> Name = Symbol.getName();
 
       if ((ec = Symbol.getAddress(Addr)))
         report_fatal_error(ec.message());
       if (Addr != Val)
         continue;
-      if ((ec = Symbol.getName(Name)))
-        report_fatal_error(ec.message());
-      fmt << Name;
+      if (std::error_code EC = Name.getError())
+        report_fatal_error(EC.message());
+      fmt << *Name;
       return;
     }
 
@@ -481,7 +494,9 @@ static void printRelocationTargetName(const MachOObjectFile *O,
   if (isExtern) {
     symbol_iterator SI = O->symbol_begin();
     advance(SI, Val);
-    SI->getName(S);
+    ErrorOr<StringRef> SOrErr = SI->getName();
+    if (!error(SOrErr.getError()))
+      S = *SOrErr;
   } else {
     section_iterator SI = O->section_begin();
     // Adjust for the fact that sections are 1-indexed.
@@ -672,7 +687,7 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
 
 static std::error_code getRelocationValueString(const RelocationRef &Rel,
                                                 SmallVectorImpl<char> &Result) {
-  const ObjectFile *Obj = Rel.getObjectFile();
+  const ObjectFile *Obj = Rel.getObject();
   if (auto *ELF = dyn_cast<ELFObjectFileBase>(Obj))
     return getRelocationValueString(ELF, Rel, Result);
   if (auto *COFF = dyn_cast<COFFObjectFile>(Obj))
@@ -681,6 +696,39 @@ static std::error_code getRelocationValueString(const RelocationRef &Rel,
   return getRelocationValueString(MachO, Rel, Result);
 }
 
+/// @brief Indicates whether this relocation should hidden when listing
+/// relocations, usually because it is the trailing part of a multipart
+/// relocation that will be printed as part of the leading relocation.
+static bool getHidden(RelocationRef RelRef) {
+  const ObjectFile *Obj = RelRef.getObject();
+  auto *MachO = dyn_cast<MachOObjectFile>(Obj);
+  if (!MachO)
+    return false;
+
+  unsigned Arch = MachO->getArch();
+  DataRefImpl Rel = RelRef.getRawDataRefImpl();
+  uint64_t Type = MachO->getRelocationType(Rel);
+
+  // On arches that use the generic relocations, GENERIC_RELOC_PAIR
+  // is always hidden.
+  if (Arch == Triple::x86 || Arch == Triple::arm || Arch == Triple::ppc) {
+    if (Type == MachO::GENERIC_RELOC_PAIR)
+      return true;
+  } else if (Arch == Triple::x86_64) {
+    // On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows
+    // an X86_64_RELOC_SUBTRACTOR.
+    if (Type == MachO::X86_64_RELOC_UNSIGNED && Rel.d.a > 0) {
+      DataRefImpl RelPrev = Rel;
+      RelPrev.d.a--;
+      uint64_t PrevType = MachO->getRelocationType(RelPrev);
+      if (PrevType == MachO::X86_64_RELOC_SUBTRACTOR)
+        return true;
+    }
+  }
+
+  return false;
+}
+
 static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
   const Target *TheTarget = getTarget(Obj);
   // getTarget() will have already issued a diagnostic if necessary, so
@@ -779,16 +827,16 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
         uint64_t Address;
         if (error(Symbol.getAddress(Address)))
           break;
-        if (Address == UnknownAddressOrSize)
+        if (Address == UnknownAddress)
           continue;
         Address -= SectionAddr;
         if (Address >= SectSize)
           continue;
 
-        StringRef Name;
-        if (error(Symbol.getName(Name)))
+        ErrorOr<StringRef> Name = Symbol.getName();
+        if (error(Name.getError()))
           break;
-        Symbols.push_back(std::make_pair(Address, Name));
+        Symbols.push_back(std::make_pair(Address, *Name));
       }
     }
 
@@ -877,19 +925,17 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
 
         // Print relocation for instruction.
         while (rel_cur != rel_end) {
-          bool hidden = false;
-          uint64_t addr;
+          bool hidden = getHidden(*rel_cur);
+          uint64_t addr = rel_cur->getOffset();
           SmallString<16> name;
           SmallString<32> val;
 
           // If this relocation is hidden, skip it.
-          if (error(rel_cur->getHidden(hidden))) goto skip_print_rel;
           if (hidden) goto skip_print_rel;
 
-          if (error(rel_cur->getOffset(addr))) goto skip_print_rel;
           // Stop when rel_cur's address is past the current instruction.
           if (addr >= Index + Size) break;
-          if (error(rel_cur->getTypeName(name))) goto skip_print_rel;
+          rel_cur->getTypeName(name);
           if (error(getRelocationValueString(*rel_cur, val)))
             goto skip_print_rel;
           outs() << format(Fmt.data(), SectionAddr + addr) << name
@@ -919,18 +965,13 @@ void llvm::PrintRelocations(const ObjectFile *Obj) {
       continue;
     outs() << "RELOCATION RECORDS FOR [" << secname << "]:\n";
     for (const RelocationRef &Reloc : Section.relocations()) {
-      bool hidden;
-      uint64_t address;
+      bool hidden = getHidden(Reloc);
+      uint64_t address = Reloc.getOffset();
       SmallString<32> relocname;
       SmallString<32> valuestr;
-      if (error(Reloc.getHidden(hidden)))
-        continue;
       if (hidden)
         continue;
-      if (error(Reloc.getTypeName(relocname)))
-        continue;
-      if (error(Reloc.getOffset(address)))
-        continue;
+      Reloc.getTypeName(relocname);
       if (error(getRelocationValueString(Reloc, valuestr)))
         continue;
       outs() << format(Fmt.data(), address) << " " << relocname << " "
@@ -1073,21 +1114,21 @@ void llvm::PrintSymbolTable(const ObjectFile *o) {
   }
   for (const SymbolRef &Symbol : o->symbols()) {
     uint64_t Address;
-    SymbolRef::Type Type;
+    SymbolRef::Type Type = Symbol.getType();
     uint32_t Flags = Symbol.getFlags();
     section_iterator Section = o->section_end();
     if (error(Symbol.getAddress(Address)))
       continue;
-    if (error(Symbol.getType(Type)))
-      continue;
-    uint64_t Size = Symbol.getSize();
     if (error(Symbol.getSection(Section)))
       continue;
     StringRef Name;
     if (Type == SymbolRef::ST_Debug && Section != o->section_end()) {
       Section->getName(Name);
-    } else if (error(Symbol.getName(Name))) {
-      continue;
+    } else {
+      ErrorOr<StringRef> NameOrErr = Symbol.getName();
+      if (error(NameOrErr.getError()))
+        continue;
+      Name = *NameOrErr;
     }
 
     bool Global = Flags & SymbolRef::SF_Global;
@@ -1096,15 +1137,11 @@ void llvm::PrintSymbolTable(const ObjectFile *o) {
     bool Common = Flags & SymbolRef::SF_Common;
     bool Hidden = Flags & SymbolRef::SF_Hidden;
 
-    if (Common) {
-      uint32_t Alignment = Symbol.getAlignment();
-      Address = Size;
-      Size = Alignment;
-    }
-    if (Address == UnknownAddressOrSize)
+    if (Common)
+      Address = Symbol.getCommonSize();
+
+    if (Address == UnknownAddress)
       Address = 0;
-    if (Size == UnknownAddressOrSize)
-      Size = 0;
     char GlobLoc = ' ';
     if (Type != SymbolRef::ST_Unknown)
       GlobLoc = Global ? 'g' : 'l';
@@ -1146,8 +1183,14 @@ void llvm::PrintSymbolTable(const ObjectFile *o) {
         SectionName = "";
       outs() << SectionName;
     }
-    outs() << '\t'
-           << format("%08" PRIx64 " ", Size);
+
+    outs() << '\t';
+    if (Common || isa<ELFObjectFileBase>(o)) {
+      uint64_t Val =
+          Common ? Symbol.getAlignment() : ELFSymbolRef(Symbol).getSize();
+      outs() << format("\t %08" PRIx64 " ", Val);
+    }
+
     if (Hidden) {
       outs() << ".hidden ";
     }
@@ -1226,6 +1269,49 @@ void llvm::printWeakBindTable(const ObjectFile *o) {
   }
 }
 
+static void printFaultMaps(const ObjectFile *Obj) {
+  const char *FaultMapSectionName = nullptr;
+
+  if (isa<ELFObjectFileBase>(Obj)) {
+    FaultMapSectionName = ".llvm_faultmaps";
+  } else if (isa<MachOObjectFile>(Obj)) {
+    FaultMapSectionName = "__llvm_faultmaps";
+  } else {
+    errs() << "This operation is only currently supported "
+              "for ELF and Mach-O executable files.\n";
+    return;
+  }
+
+  Optional<object::SectionRef> FaultMapSection;
+
+  for (auto Sec : Obj->sections()) {
+    StringRef Name;
+    Sec.getName(Name);
+    if (Name == FaultMapSectionName) {
+      FaultMapSection = Sec;
+      break;
+    }
+  }
+
+  outs() << "FaultMap table:\n";
+
+  if (!FaultMapSection.hasValue()) {
+    outs() << "<not found>\n";
+    return;
+  }
+
+  StringRef FaultMapContents;
+  if (error(FaultMapSection.getValue().getContents(FaultMapContents))) {
+    errs() << "Could not read the " << FaultMapContents << " section!\n";
+    return;
+  }
+
+  FaultMapParser FMP(FaultMapContents.bytes_begin(),
+                     FaultMapContents.bytes_end());
+
+  outs() << FMP;
+}
+
 static void printPrivateFileHeader(const ObjectFile *o) {
   if (o->isELF()) {
     printELFFileHeader(o);
@@ -1265,6 +1351,8 @@ static void DumpObject(const ObjectFile *o) {
     printLazyBindTable(o);
   if (WeakBind)
     printWeakBindTable(o);
+  if (PrintFaultMaps)
+    printFaultMaps(o);
 }
 
 /// @brief Dump each object file in \a a;
@@ -1362,7 +1450,8 @@ int main(int argc, char **argv) {
       && !(DylibsUsed && MachOOpt)
       && !(DylibId && MachOOpt)
       && !(ObjcMetaData && MachOOpt)
-      && !(DumpSections.size() != 0 && MachOOpt)) {
+      && !(DumpSections.size() != 0 && MachOOpt)
+      && !PrintFaultMaps) {
     cl::PrintHelpMessage();
     return 2;
   }
diff --git a/tools/llvm-readobj/ARMEHABIPrinter.h b/tools/llvm-readobj/ARMEHABIPrinter.h
index b15421d..dd2490d 100644
--- a/tools/llvm-readobj/ARMEHABIPrinter.h
+++ b/tools/llvm-readobj/ARMEHABIPrinter.h
@@ -312,8 +312,6 @@ class PrinterContext {
   typedef typename object::ELFFile<ET>::Elf_Shdr Elf_Shdr;
 
   typedef typename object::ELFFile<ET>::Elf_Rel_Iter Elf_Rel_iterator;
-  typedef typename object::ELFFile<ET>::Elf_Sym_Iter Elf_Sym_iterator;
-  typedef typename object::ELFFile<ET>::Elf_Shdr_Iter Elf_Shdr_iterator;
 
   static const size_t IndexTableEntrySize;
 
@@ -344,13 +342,13 @@ template <typename ET>
 const size_t PrinterContext<ET>::IndexTableEntrySize = 8;
 
 template <typename ET>
-ErrorOr<StringRef> PrinterContext<ET>::FunctionAtAddress(unsigned Section,
-                                                         uint64_t Address) const {
-  for (Elf_Sym_iterator SI = ELF->begin_symbols(), SE = ELF->end_symbols();
-       SI != SE; ++SI)
-    if (SI->st_shndx == Section && SI->st_value == Address &&
-        SI->getType() == ELF::STT_FUNC)
-      return ELF->getSymbolName(SI);
+ErrorOr<StringRef>
+PrinterContext<ET>::FunctionAtAddress(unsigned Section,
+                                      uint64_t Address) const {
+  for (const Elf_Sym &Sym : ELF->symbols())
+    if (Sym.st_shndx == Section && Sym.st_value == Address &&
+        Sym.getType() == ELF::STT_FUNC)
+      return ELF->getSymbolName(&Sym, false);
   return readobj_error::unknown_symbol;
 }
 
@@ -366,10 +364,9 @@ PrinterContext<ET>::FindExceptionTable(unsigned IndexSectionIndex,
   /// handling table.  Use this symbol to recover the actual exception handling
   /// table.
 
-  for (Elf_Shdr_iterator SI = ELF->begin_sections(), SE = ELF->end_sections();
-       SI != SE; ++SI) {
-    if (SI->sh_type == ELF::SHT_REL && SI->sh_info == IndexSectionIndex) {
-      for (Elf_Rel_iterator RI = ELF->begin_rel(&*SI), RE = ELF->end_rel(&*SI);
+  for (const Elf_Shdr &Sec : ELF->sections()) {
+    if (Sec.sh_type == ELF::SHT_REL && Sec.sh_info == IndexSectionIndex) {
+      for (Elf_Rel_iterator RI = ELF->rel_begin(&Sec), RE = ELF->rel_end(&Sec);
            RI != RE; ++RI) {
         if (RI->r_offset == static_cast<unsigned>(IndexTableOffset)) {
           typename object::ELFFile<ET>::Elf_Rela RelA;
@@ -378,9 +375,12 @@ PrinterContext<ET>::FindExceptionTable(unsigned IndexSectionIndex,
           RelA.r_addend = 0;
 
           std::pair<const Elf_Shdr *, const Elf_Sym *> Symbol =
-            ELF->getRelocationSymbol(&(*SI), &RelA);
+              ELF->getRelocationSymbol(&Sec, &RelA);
 
-          return ELF->getSection(Symbol.second);
+          ErrorOr<const Elf_Shdr *> Ret = ELF->getSection(Symbol.second);
+          if (std::error_code EC = Ret.getError())
+            report_fatal_error(EC.message());
+          return *Ret;
         }
       }
     }
@@ -528,20 +528,18 @@ void PrinterContext<ET>::PrintUnwindInformation() const {
   DictScope UI(SW, "UnwindInformation");
 
   int SectionIndex = 0;
-  for (Elf_Shdr_iterator SI = ELF->begin_sections(), SE = ELF->end_sections();
-       SI != SE; ++SI, ++SectionIndex) {
-    if (SI->sh_type == ELF::SHT_ARM_EXIDX) {
-      const Elf_Shdr *IT = &(*SI);
-
+  for (const Elf_Shdr &Sec : ELF->sections()) {
+    if (Sec.sh_type == ELF::SHT_ARM_EXIDX) {
       DictScope UIT(SW, "UnwindIndexTable");
 
       SW.printNumber("SectionIndex", SectionIndex);
-      if (ErrorOr<StringRef> SectionName = ELF->getSectionName(IT))
+      if (ErrorOr<StringRef> SectionName = ELF->getSectionName(&Sec))
         SW.printString("SectionName", *SectionName);
-      SW.printHex("SectionOffset", IT->sh_offset);
+      SW.printHex("SectionOffset", Sec.sh_offset);
 
-      PrintIndexTable(SectionIndex, IT);
+      PrintIndexTable(SectionIndex, &Sec);
     }
+    ++SectionIndex;
   }
 }
 }
diff --git a/tools/llvm-readobj/ARMWinEHPrinter.cpp b/tools/llvm-readobj/ARMWinEHPrinter.cpp
index 62252fc..a1ea79f 100644
--- a/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -198,13 +198,8 @@ Decoder::getSectionContaining(const COFFObjectFile &COFF, uint64_t VA) {
 ErrorOr<object::SymbolRef> Decoder::getSymbol(const COFFObjectFile &COFF,
                                               uint64_t VA, bool FunctionOnly) {
   for (const auto &Symbol : COFF.symbols()) {
-    if (FunctionOnly) {
-      SymbolRef::Type Type;
-      if (std::error_code EC = Symbol.getType(Type))
-        return EC;
-      if (Type != SymbolRef::ST_Function)
-        continue;
-    }
+    if (FunctionOnly && Symbol.getType() != SymbolRef::ST_Function)
+      continue;
 
     uint64_t Address;
     if (std::error_code EC = Symbol.getAddress(Address))
@@ -219,9 +214,7 @@ ErrorOr<SymbolRef> Decoder::getRelocatedSymbol(const COFFObjectFile &,
                                                const SectionRef &Section,
                                                uint64_t Offset) {
   for (const auto &Relocation : Section.relocations()) {
-    uint64_t RelocationOffset;
-    if (auto Error = Relocation.getOffset(RelocationOffset))
-      return Error;
+    uint64_t RelocationOffset = Relocation.getOffset();
     if (RelocationOffset == Offset)
       return *Relocation.getSymbol();
   }
@@ -574,12 +567,12 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
     if (!Symbol)
       Symbol = getSymbol(COFF, Address, /*FunctionOnly=*/true);
 
-    StringRef Name;
-    if (Symbol)
-      Symbol->getName(Name);
+    ErrorOr<StringRef> Name = Symbol->getName();
+    if (std::error_code EC = Name.getError())
+      report_fatal_error(EC.message());
 
     ListScope EHS(SW, "ExceptionHandler");
-    SW.printString("Routine", formatSymbol(Name, Address));
+    SW.printString("Routine", formatSymbol(*Name, Address));
     SW.printHex("Parameter", Parameter);
   }
 
@@ -608,7 +601,10 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
   StringRef FunctionName;
   uint64_t FunctionAddress;
   if (Function) {
-    Function->getName(FunctionName);
+    ErrorOr<StringRef> FunctionNameOrErr = Function->getName();
+    if (std::error_code EC = FunctionNameOrErr.getError())
+      report_fatal_error(EC.message());
+    FunctionName = *FunctionNameOrErr;
     Function->getAddress(FunctionAddress);
   } else {
     const pe32_header *PEHeader;
@@ -620,13 +616,14 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
   SW.printString("Function", formatSymbol(FunctionName, FunctionAddress));
 
   if (XDataRecord) {
-    StringRef Name;
-    uint64_t Address;
+    ErrorOr<StringRef> Name = XDataRecord->getName();
+    if (std::error_code EC = Name.getError())
+      report_fatal_error(EC.message());
 
-    XDataRecord->getName(Name);
+    uint64_t Address;
     XDataRecord->getAddress(Address);
 
-    SW.printString("ExceptionRecord", formatSymbol(Name, Address));
+    SW.printString("ExceptionRecord", formatSymbol(*Name, Address));
 
     section_iterator SI = COFF.section_end();
     if (XDataRecord->getSection(SI))
@@ -665,7 +662,10 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF,
   StringRef FunctionName;
   uint64_t FunctionAddress;
   if (Function) {
-    Function->getName(FunctionName);
+    ErrorOr<StringRef> FunctionNameOrErr = Function->getName();
+    if (std::error_code EC = FunctionNameOrErr.getError())
+      report_fatal_error(EC.message());
+    FunctionName = *FunctionNameOrErr;
     Function->getAddress(FunctionAddress);
   } else {
     const pe32_header *PEHeader;
diff --git a/tools/llvm-readobj/CMakeLists.txt b/tools/llvm-readobj/CMakeLists.txt
index 30f336f..87407a2 100644
--- a/tools/llvm-readobj/CMakeLists.txt
+++ b/tools/llvm-readobj/CMakeLists.txt
@@ -1,5 +1,4 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
   Object
   Support
   )
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index 4a1d5da..f5effe2 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -16,6 +16,7 @@
 #include "ARMWinEHPrinter.h"
 #include "Error.h"
 #include "ObjDumper.h"
+#include "StackMapPrinter.h"
 #include "StreamWriter.h"
 #include "Win64EHDumper.h"
 #include "llvm/ADT/DenseMap.h"
@@ -60,7 +61,7 @@ public:
   void printCOFFExports() override;
   void printCOFFDirectives() override;
   void printCOFFBaseReloc() override;
-
+  void printStackMap() const override;
 private:
   void printSymbol(const SymbolRef &Sym);
   void printRelocation(const SectionRef &Section, const RelocationRef &Reloc);
@@ -120,9 +121,7 @@ std::error_code COFFDumper::resolveSymbol(const coff_section *Section,
                                           uint64_t Offset, SymbolRef &Sym) {
   const auto &Relocations = RelocMap[Section];
   for (const auto &Relocation : Relocations) {
-    uint64_t RelocationOffset;
-    if (std::error_code EC = Relocation.getOffset(RelocationOffset))
-      return EC;
+    uint64_t RelocationOffset = Relocation.getOffset();
 
     if (RelocationOffset == Offset) {
       Sym = *Relocation.getSymbol();
@@ -140,8 +139,10 @@ std::error_code COFFDumper::resolveSymbolName(const coff_section *Section,
   SymbolRef Symbol;
   if (std::error_code EC = resolveSymbol(Section, Offset, Symbol))
     return EC;
-  if (std::error_code EC = Symbol.getName(Name))
+  ErrorOr<StringRef> NameOrErr = Symbol.getName();
+  if (std::error_code EC = NameOrErr.getError())
     return EC;
+  Name = *NameOrErr;
   return std::error_code();
 }
 
@@ -804,19 +805,18 @@ void COFFDumper::printRelocations() {
 
 void COFFDumper::printRelocation(const SectionRef &Section,
                                  const RelocationRef &Reloc) {
-  uint64_t Offset;
-  uint64_t RelocType;
+  uint64_t Offset = Reloc.getOffset();
+  uint64_t RelocType = Reloc.getType();
   SmallString<32> RelocName;
   StringRef SymbolName;
-  if (error(Reloc.getOffset(Offset)))
-    return;
-  if (error(Reloc.getType(RelocType)))
-    return;
-  if (error(Reloc.getTypeName(RelocName)))
-    return;
+  Reloc.getTypeName(RelocName);
   symbol_iterator Symbol = Reloc.getSymbol();
-  if (Symbol != Obj->symbol_end() && error(Symbol->getName(SymbolName)))
-    return;
+  if (Symbol != Obj->symbol_end()) {
+    ErrorOr<StringRef> SymbolNameOrErr = Symbol->getName();
+    if (error(SymbolNameOrErr.getError()))
+      return;
+    SymbolName = *SymbolNameOrErr;
+  }
 
   if (opts::ExpandRelocs) {
     DictScope Group(W, "Relocation");
@@ -1140,3 +1140,32 @@ void COFFDumper::printCOFFBaseReloc() {
     W.printHex("Address", RVA);
   }
 }
+
+void COFFDumper::printStackMap() const {
+  object::SectionRef StackMapSection;
+  for (auto Sec : Obj->sections()) {
+    StringRef Name;
+    Sec.getName(Name);
+    if (Name == ".llvm_stackmaps") {
+      StackMapSection = Sec;
+      break;
+    }
+  }
+
+  if (StackMapSection == object::SectionRef())
+    return;
+
+  StringRef StackMapContents;
+  StackMapSection.getContents(StackMapContents);
+  ArrayRef<uint8_t> StackMapContentsArray(
+      reinterpret_cast<const uint8_t*>(StackMapContents.data()),
+      StackMapContents.size());
+
+  if (Obj->isLittleEndian())
+    prettyPrintStackMap(
+                      llvm::outs(),
+                      StackMapV1Parser<support::little>(StackMapContentsArray));
+  else
+    prettyPrintStackMap(llvm::outs(),
+                        StackMapV1Parser<support::big>(StackMapContentsArray));
+}
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index 99969fd..a4b25ef 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -17,6 +17,7 @@
 #include "ARMEHABIPrinter.h"
 #include "Error.h"
 #include "ObjDumper.h"
+#include "StackMapPrinter.h"
 #include "StreamWriter.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallString.h"
@@ -47,6 +48,7 @@ public:
   void printFileHeaders() override;
   void printSections() override;
   void printRelocations() override;
+  void printDynamicRelocations() override;
   void printSymbols() override;
   void printDynamicSymbols() override;
   void printUnwindInfo() override;
@@ -60,12 +62,14 @@ public:
   void printMipsABIFlags() override;
   void printMipsReginfo() override;
 
+  void printStackMap() const override;
+
 private:
   typedef ELFFile<ELFT> ELFO;
   typedef typename ELFO::Elf_Shdr Elf_Shdr;
   typedef typename ELFO::Elf_Sym Elf_Sym;
 
-  void printSymbol(typename ELFO::Elf_Sym_Iter Symbol);
+  void printSymbol(const Elf_Sym *Symbol, bool IsDynamic);
 
   void printRelocations(const Elf_Shdr *Sec);
   void printRelocation(const Elf_Shdr *Sec, typename ELFO::Elf_Rela Rel);
@@ -119,9 +123,10 @@ std::error_code createELFDumper(const object::ObjectFile *Obj,
 
 template <typename ELFO>
 static std::string getFullSymbolName(const ELFO &Obj,
-                                     typename ELFO::Elf_Sym_Iter Symbol) {
-  StringRef SymbolName = errorOrDefault(Obj.getSymbolName(Symbol));
-  if (!Symbol.isDynamic())
+                                     const typename ELFO::Elf_Sym *Symbol,
+                                     bool IsDynamic) {
+  StringRef SymbolName = errorOrDefault(Obj.getSymbolName(Symbol, IsDynamic));
+  if (!IsDynamic)
     return SymbolName;
 
   std::string FullSymbolName(SymbolName);
@@ -139,7 +144,7 @@ static std::string getFullSymbolName(const ELFO &Obj,
 
 template <typename ELFO>
 static void
-getSectionNameIndex(const ELFO &Obj, typename ELFO::Elf_Sym_Iter Symbol,
+getSectionNameIndex(const ELFO &Obj, const typename ELFO::Elf_Sym *Symbol,
                     StringRef &SectionName, unsigned &SectionIndex) {
   SectionIndex = Symbol->st_shndx;
   if (Symbol->isUndefined())
@@ -156,11 +161,10 @@ getSectionNameIndex(const ELFO &Obj, typename ELFO::Elf_Sym_Iter Symbol,
     SectionName = "Reserved";
   else {
     if (SectionIndex == SHN_XINDEX)
-      SectionIndex = Obj.getSymbolTableIndex(&*Symbol);
-    assert(SectionIndex != SHN_XINDEX &&
-           "getSymbolTableIndex should handle this");
-    const typename ELFO::Elf_Shdr *Sec = Obj.getSection(SectionIndex);
-    SectionName = errorOrDefault(Obj.getSectionName(Sec));
+      SectionIndex = Obj.getExtendedSymbolTableIndex(&*Symbol);
+    ErrorOr<const typename ELFO::Elf_Shdr *> Sec = Obj.getSection(SectionIndex);
+    if (!error(Sec.getError()))
+      SectionName = errorOrDefault(Obj.getSectionName(*Sec));
   }
 }
 
@@ -382,7 +386,8 @@ static const EnumEntry<unsigned> ElfMachineType[] = {
   LLVM_READOBJ_ENUM_ENT(ELF, EM_RL78         ),
   LLVM_READOBJ_ENUM_ENT(ELF, EM_VIDEOCORE5   ),
   LLVM_READOBJ_ENUM_ENT(ELF, EM_78KOR        ),
-  LLVM_READOBJ_ENUM_ENT(ELF, EM_56800EX      )
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_56800EX      ),
+  LLVM_READOBJ_ENUM_ENT(ELF, EM_AMDGPU       )
 };
 
 static const EnumEntry<unsigned> ElfSymbolBindings[] = {
@@ -574,7 +579,13 @@ void ELFDumper<ELFT>::printFileHeaders() {
       W.printEnum  ("DataEncoding", Header->e_ident[ELF::EI_DATA],
                       makeArrayRef(ElfDataEncoding));
       W.printNumber("FileVersion", Header->e_ident[ELF::EI_VERSION]);
-      W.printEnum  ("OS/ABI", Header->e_ident[ELF::EI_OSABI],
+
+      // Handle architecture specific OS/ABI values.
+      if (Header->e_machine == ELF::EM_AMDGPU &&
+          Header->e_ident[ELF::EI_OSABI] == ELF::ELFOSABI_AMDGPU_HSA)
+        W.printHex("OS/ABI", "AMDGPU_HSA", ELF::ELFOSABI_AMDGPU_HSA);
+      else
+        W.printEnum  ("OS/ABI", Header->e_ident[ELF::EI_OSABI],
                       makeArrayRef(ElfOSABI));
       W.printNumber("ABIVersion", Header->e_ident[ELF::EI_ABIVERSION]);
       W.printBinary("Unused", makeArrayRef(Header->e_ident).slice(ELF::EI_PAD));
@@ -606,46 +617,44 @@ void ELFDumper<ELFT>::printSections() {
   ListScope SectionsD(W, "Sections");
 
   int SectionIndex = -1;
-  for (typename ELFO::Elf_Shdr_Iter SecI = Obj->begin_sections(),
-                                    SecE = Obj->end_sections();
-       SecI != SecE; ++SecI) {
+  for (const typename ELFO::Elf_Shdr &Sec : Obj->sections()) {
     ++SectionIndex;
 
-    const Elf_Shdr *Section = &*SecI;
-    StringRef Name = errorOrDefault(Obj->getSectionName(Section));
+    StringRef Name = errorOrDefault(Obj->getSectionName(&Sec));
 
     DictScope SectionD(W, "Section");
     W.printNumber("Index", SectionIndex);
-    W.printNumber("Name", Name, Section->sh_name);
+    W.printNumber("Name", Name, Sec.sh_name);
     W.printHex("Type",
-               getElfSectionType(Obj->getHeader()->e_machine, Section->sh_type),
-               Section->sh_type);
-    W.printFlags ("Flags", Section->sh_flags, makeArrayRef(ElfSectionFlags));
-    W.printHex   ("Address", Section->sh_addr);
-    W.printHex   ("Offset", Section->sh_offset);
-    W.printNumber("Size", Section->sh_size);
-    W.printNumber("Link", Section->sh_link);
-    W.printNumber("Info", Section->sh_info);
-    W.printNumber("AddressAlignment", Section->sh_addralign);
-    W.printNumber("EntrySize", Section->sh_entsize);
+               getElfSectionType(Obj->getHeader()->e_machine, Sec.sh_type),
+               Sec.sh_type);
+    W.printFlags("Flags", Sec.sh_flags, makeArrayRef(ElfSectionFlags));
+    W.printHex("Address", Sec.sh_addr);
+    W.printHex("Offset", Sec.sh_offset);
+    W.printNumber("Size", Sec.sh_size);
+    W.printNumber("Link", Sec.sh_link);
+    W.printNumber("Info", Sec.sh_info);
+    W.printNumber("AddressAlignment", Sec.sh_addralign);
+    W.printNumber("EntrySize", Sec.sh_entsize);
 
     if (opts::SectionRelocations) {
       ListScope D(W, "Relocations");
-      printRelocations(Section);
+      printRelocations(&Sec);
     }
 
     if (opts::SectionSymbols) {
       ListScope D(W, "Symbols");
-      for (typename ELFO::Elf_Sym_Iter SymI = Obj->begin_symbols(),
-                                       SymE = Obj->end_symbols();
-           SymI != SymE; ++SymI) {
-        if (Obj->getSection(&*SymI) == Section)
-          printSymbol(SymI);
+      for (const typename ELFO::Elf_Sym &Sym : Obj->symbols()) {
+        ErrorOr<const Elf_Shdr *> SymSec = Obj->getSection(&Sym);
+        if (!SymSec)
+          continue;
+        if (*SymSec == &Sec)
+          printSymbol(&Sym, false);
       }
     }
 
-    if (opts::SectionData && Section->sh_type != ELF::SHT_NOBITS) {
-      ArrayRef<uint8_t> Data = errorOrDefault(Obj->getSectionContents(Section));
+    if (opts::SectionData && Sec.sh_type != ELF::SHT_NOBITS) {
+      ArrayRef<uint8_t> Data = errorOrDefault(Obj->getSectionContents(&Sec));
       W.printBinaryBlock("SectionData",
                          StringRef((const char *)Data.data(), Data.size()));
     }
@@ -657,32 +666,63 @@ void ELFDumper<ELFT>::printRelocations() {
   ListScope D(W, "Relocations");
 
   int SectionNumber = -1;
-  for (typename ELFO::Elf_Shdr_Iter SecI = Obj->begin_sections(),
-                                    SecE = Obj->end_sections();
-       SecI != SecE; ++SecI) {
+  for (const typename ELFO::Elf_Shdr &Sec : Obj->sections()) {
     ++SectionNumber;
 
-    if (SecI->sh_type != ELF::SHT_REL && SecI->sh_type != ELF::SHT_RELA)
+    if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA)
       continue;
 
-    StringRef Name = errorOrDefault(Obj->getSectionName(&*SecI));
+    StringRef Name = errorOrDefault(Obj->getSectionName(&Sec));
 
     W.startLine() << "Section (" << SectionNumber << ") " << Name << " {\n";
     W.indent();
 
-    printRelocations(&*SecI);
+    printRelocations(&Sec);
 
     W.unindent();
     W.startLine() << "}\n";
   }
 }
 
+template<class ELFT>
+void ELFDumper<ELFT>::printDynamicRelocations() {
+  W.startLine() << "Dynamic Relocations {\n";
+  W.indent();
+  for (typename ELFO::Elf_Rela_Iter RelI = Obj->dyn_rela_begin(),
+                                    RelE = Obj->dyn_rela_end();
+       RelI != RelE; ++RelI) {
+    SmallString<32> RelocName;
+    Obj->getRelocationTypeName(RelI->getType(Obj->isMips64EL()), RelocName);
+    StringRef SymbolName;
+    uint32_t SymIndex = RelI->getSymbol(Obj->isMips64EL());
+    const typename ELFO::Elf_Sym *Sym = Obj->dynamic_symbol_begin() + SymIndex;
+    SymbolName = errorOrDefault(Obj->getSymbolName(Sym, true));
+    if (opts::ExpandRelocs) {
+      DictScope Group(W, "Relocation");
+      W.printHex("Offset", RelI->r_offset);
+      W.printNumber("Type", RelocName, (int)RelI->getType(Obj->isMips64EL()));
+      W.printString("Symbol", SymbolName.size() > 0 ? SymbolName : "-");
+      W.printHex("Addend", RelI->r_addend);
+    }
+    else {
+      raw_ostream& OS = W.startLine();
+      OS << W.hex(RelI->r_offset)
+        << " " << RelocName
+        << " " << (SymbolName.size() > 0 ? SymbolName : "-")
+        << " " << W.hex(RelI->r_addend)
+        << "\n";
+    }
+  }
+  W.unindent();
+  W.startLine() << "}\n";
+}
+
 template <class ELFT>
 void ELFDumper<ELFT>::printRelocations(const Elf_Shdr *Sec) {
   switch (Sec->sh_type) {
   case ELF::SHT_REL:
-    for (typename ELFO::Elf_Rel_Iter RI = Obj->begin_rel(Sec),
-                                     RE = Obj->end_rel(Sec);
+    for (typename ELFO::Elf_Rel_Iter RI = Obj->rel_begin(Sec),
+                                     RE = Obj->rel_end(Sec);
          RI != RE; ++RI) {
       typename ELFO::Elf_Rela Rela;
       Rela.r_offset = RI->r_offset;
@@ -692,8 +732,8 @@ void ELFDumper<ELFT>::printRelocations(const Elf_Shdr *Sec) {
     }
     break;
   case ELF::SHT_RELA:
-    for (typename ELFO::Elf_Rela_Iter RI = Obj->begin_rela(Sec),
-                                      RE = Obj->end_rela(Sec);
+    for (typename ELFO::Elf_Rela_Iter RI = Obj->rela_begin(Sec),
+                                      RE = Obj->rela_end(Sec);
          RI != RE; ++RI) {
       printRelocation(Sec, *RI);
     }
@@ -710,12 +750,20 @@ void ELFDumper<ELFT>::printRelocation(const Elf_Shdr *Sec,
   std::pair<const Elf_Shdr *, const Elf_Sym *> Sym =
       Obj->getRelocationSymbol(Sec, &Rel);
   if (Sym.second && Sym.second->getType() == ELF::STT_SECTION) {
-    const Elf_Shdr *Sec = Obj->getSection(Sym.second);
-    ErrorOr<StringRef> SecName = Obj->getSectionName(Sec);
-    if (SecName)
-      TargetName = SecName.get();
+    ErrorOr<const Elf_Shdr *> Sec = Obj->getSection(Sym.second);
+    if (!error(Sec.getError())) {
+      ErrorOr<StringRef> SecName = Obj->getSectionName(*Sec);
+      if (SecName)
+        TargetName = SecName.get();
+    }
   } else if (Sym.first) {
-    TargetName = errorOrDefault(Obj->getSymbolName(Sym.first, Sym.second));
+    const Elf_Shdr *SymTable = Sym.first;
+    ErrorOr<const Elf_Shdr *> StrTableSec = Obj->getSection(SymTable->sh_link);
+    if (!error(StrTableSec.getError())) {
+      ErrorOr<StringRef> StrTableOrErr = Obj->getStringTable(*StrTableSec);
+      if (!error(StrTableOrErr.getError()))
+        TargetName = errorOrDefault(Sym.second->getName(*StrTableOrErr));
+    }
   }
 
   if (opts::ExpandRelocs) {
@@ -736,30 +784,25 @@ void ELFDumper<ELFT>::printRelocation(const Elf_Shdr *Sec,
 template<class ELFT>
 void ELFDumper<ELFT>::printSymbols() {
   ListScope Group(W, "Symbols");
-  for (typename ELFO::Elf_Sym_Iter SymI = Obj->begin_symbols(),
-                                   SymE = Obj->end_symbols();
-       SymI != SymE; ++SymI) {
-    printSymbol(SymI);
-  }
+  for (const typename ELFO::Elf_Sym &Sym : Obj->symbols())
+    printSymbol(&Sym, false);
 }
 
 template<class ELFT>
 void ELFDumper<ELFT>::printDynamicSymbols() {
   ListScope Group(W, "DynamicSymbols");
 
-  for (typename ELFO::Elf_Sym_Iter SymI = Obj->begin_dynamic_symbols(),
-                                   SymE = Obj->end_dynamic_symbols();
-       SymI != SymE; ++SymI) {
-    printSymbol(SymI);
-  }
+  for (const typename ELFO::Elf_Sym &Sym : Obj->dynamic_symbols())
+    printSymbol(&Sym, true);
 }
 
 template <class ELFT>
-void ELFDumper<ELFT>::printSymbol(typename ELFO::Elf_Sym_Iter Symbol) {
+void ELFDumper<ELFT>::printSymbol(const typename ELFO::Elf_Sym *Symbol,
+                                  bool IsDynamic) {
   unsigned SectionIndex = 0;
   StringRef SectionName;
   getSectionNameIndex(*Obj, Symbol, SectionName, SectionIndex);
-  std::string FullSymbolName = getFullSymbolName(*Obj, Symbol);
+  std::string FullSymbolName = getFullSymbolName(*Obj, Symbol, IsDynamic);
 
   DictScope D(W, "Symbol");
   W.printNumber("Name", FullSymbolName, Symbol->st_name);
@@ -987,6 +1030,9 @@ static void printValue(const ELFFile<ELFT> *O, uint64_t Type, uint64_t Value,
   case DT_FLAGS_1:
     printFlags(Value, makeArrayRef(ElfDynamicDTFlags1), OS);
     break;
+  default:
+    OS << format("0x%" PRIX64, Value);
+    break;
   }
 }
 
@@ -1056,9 +1102,9 @@ template<class ELFT>
 void ELFDumper<ELFT>::printProgramHeaders() {
   ListScope L(W, "ProgramHeaders");
 
-  for (typename ELFO::Elf_Phdr_Iter PI = Obj->begin_program_headers(),
-                                    PE = Obj->end_program_headers();
-                                    PI != PE; ++PI) {
+  for (typename ELFO::Elf_Phdr_Iter PI = Obj->program_header_begin(),
+                                    PE = Obj->program_header_end();
+       PI != PE; ++PI) {
     DictScope P(W, "ProgramHeader");
     W.printHex   ("Type",
                   getElfSegmentType(Obj->getHeader()->e_machine, PI->p_type),
@@ -1086,12 +1132,11 @@ template <> void ELFDumper<ELFType<support::little, false>>::printAttributes() {
   }
 
   DictScope BA(W, "BuildAttributes");
-  for (ELFO::Elf_Shdr_Iter SI = Obj->begin_sections(), SE = Obj->end_sections();
-       SI != SE; ++SI) {
-    if (SI->sh_type != ELF::SHT_ARM_ATTRIBUTES)
+  for (const ELFO::Elf_Shdr &Sec : Obj->sections()) {
+    if (Sec.sh_type != ELF::SHT_ARM_ATTRIBUTES)
       continue;
 
-    ErrorOr<ArrayRef<uint8_t> > Contents = Obj->getSectionContents(&(*SI));
+    ErrorOr<ArrayRef<uint8_t>> Contents = Obj->getSectionContents(&Sec);
     if (!Contents)
       continue;
 
@@ -1115,13 +1160,13 @@ template <class ELFT> class MipsGOTParser {
 public:
   typedef object::ELFFile<ELFT> ObjectFile;
   typedef typename ObjectFile::Elf_Shdr Elf_Shdr;
+  typedef typename ObjectFile::Elf_Sym Elf_Sym;
 
   MipsGOTParser(const ObjectFile *Obj, StreamWriter &W) : Obj(Obj), W(W) {}
 
   void parseGOT(const Elf_Shdr &GOTShdr);
 
 private:
-  typedef typename ObjectFile::Elf_Sym_Iter Elf_Sym_Iter;
   typedef typename ObjectFile::Elf_Addr GOTEntry;
   typedef typename ObjectFile::template ELFEntityIterator<const GOTEntry>
   GOTIter;
@@ -1135,7 +1180,7 @@ private:
   bool getGOTTags(uint64_t &LocalGotNum, uint64_t &GotSym);
   void printGotEntry(uint64_t GotAddr, GOTIter BeginIt, GOTIter It);
   void printGlobalGotEntry(uint64_t GotAddr, GOTIter BeginIt, GOTIter It,
-                           Elf_Sym_Iter Sym);
+                           const Elf_Sym *Sym, bool IsDynamic);
 };
 }
 
@@ -1161,8 +1206,8 @@ void MipsGOTParser<ELFT>::parseGOT(const Elf_Shdr &GOTShdr) {
     return;
   }
 
-  Elf_Sym_Iter DynSymBegin = Obj->begin_dynamic_symbols();
-  Elf_Sym_Iter DynSymEnd = Obj->end_dynamic_symbols();
+  const Elf_Sym *DynSymBegin = Obj->dynamic_symbol_begin();
+  const Elf_Sym *DynSymEnd = Obj->dynamic_symbol_end();
   std::size_t DynSymTotal = std::size_t(std::distance(DynSymBegin, DynSymEnd));
 
   if (DtGotSym > DynSymTotal) {
@@ -1210,10 +1255,10 @@ void MipsGOTParser<ELFT>::parseGOT(const Elf_Shdr &GOTShdr) {
     ListScope GS(W, "Global entries");
 
     GOTIter GotGlobalEnd = makeGOTIter(*GOT, DtLocalGotNum + GlobalGotNum);
-    Elf_Sym_Iter GotDynSym = DynSymBegin + DtGotSym;
+    const Elf_Sym *GotDynSym = DynSymBegin + DtGotSym;
     for (; It != GotGlobalEnd; ++It) {
       DictScope D(W, "Entry");
-      printGlobalGotEntry(GOTShdr.sh_addr, GotBegin, It, GotDynSym++);
+      printGlobalGotEntry(GOTShdr.sh_addr, GotBegin, It, GotDynSym++, true);
     }
   }
 
@@ -1274,7 +1319,8 @@ void MipsGOTParser<ELFT>::printGotEntry(uint64_t GotAddr, GOTIter BeginIt,
 
 template <class ELFT>
 void MipsGOTParser<ELFT>::printGlobalGotEntry(uint64_t GotAddr, GOTIter BeginIt,
-                                              GOTIter It, Elf_Sym_Iter Sym) {
+                                              GOTIter It, const Elf_Sym *Sym,
+                                              bool IsDynamic) {
   printGotEntry(GotAddr, BeginIt, It);
 
   W.printHex("Value", Sym->st_value);
@@ -1285,7 +1331,7 @@ void MipsGOTParser<ELFT>::printGlobalGotEntry(uint64_t GotAddr, GOTIter BeginIt,
   getSectionNameIndex(*Obj, Sym, SectionName, SectionIndex);
   W.printHex("Section", SectionName, SectionIndex);
 
-  std::string FullSymbolName = getFullSymbolName(*Obj, Sym);
+  std::string FullSymbolName = getFullSymbolName(*Obj, Sym, IsDynamic);
   W.printNumber("Name", FullSymbolName, Sym->st_name);
 }
 
@@ -1452,3 +1498,25 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsReginfo() {
   W.printHex("Co-Proc Mask2", Reginfo->ri_cprmask[2]);
   W.printHex("Co-Proc Mask3", Reginfo->ri_cprmask[3]);
 }
+
+template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
+  const typename ELFFile<ELFT>::Elf_Shdr *StackMapSection = nullptr;
+  for (const auto &Sec : Obj->sections()) {
+    ErrorOr<StringRef> Name = Obj->getSectionName(&Sec);
+    if (*Name == ".llvm_stackmaps") {
+      StackMapSection = &Sec;
+      break;
+    }
+  }
+
+  if (!StackMapSection)
+    return;
+
+  StringRef StackMapContents;
+  ErrorOr<ArrayRef<uint8_t>> StackMapContentsArray =
+    Obj->getSectionContents(StackMapSection);
+
+  prettyPrintStackMap(
+              llvm::outs(),
+              StackMapV1Parser<ELFT::TargetEndianness>(*StackMapContentsArray));
+}
diff --git a/tools/llvm-readobj/MachODumper.cpp b/tools/llvm-readobj/MachODumper.cpp
index aeb563a..adb99b0 100644
--- a/tools/llvm-readobj/MachODumper.cpp
+++ b/tools/llvm-readobj/MachODumper.cpp
@@ -14,6 +14,7 @@
 #include "llvm-readobj.h"
 #include "Error.h"
 #include "ObjDumper.h"
+#include "StackMapPrinter.h"
 #include "StreamWriter.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
@@ -37,6 +38,7 @@ public:
   void printSymbols() override;
   void printDynamicSymbols() override;
   void printUnwindInfo() override;
+  void printStackMap() const override;
 
 private:
   template<class MachHeader>
@@ -459,12 +461,9 @@ void MachODumper::printRelocation(const RelocationRef &Reloc) {
 
 void MachODumper::printRelocation(const MachOObjectFile *Obj,
                                   const RelocationRef &Reloc) {
-  uint64_t Offset;
+  uint64_t Offset = Reloc.getOffset();
   SmallString<32> RelocName;
-  if (error(Reloc.getOffset(Offset)))
-    return;
-  if (error(Reloc.getTypeName(RelocName)))
-    return;
+  Reloc.getTypeName(RelocName);
 
   DataRefImpl DR = Reloc.getRawDataRefImpl();
   MachO::any_relocation_info RE = Obj->getRelocation(DR);
@@ -475,8 +474,10 @@ void MachODumper::printRelocation(const MachOObjectFile *Obj,
   if (IsExtern) {
     symbol_iterator Symbol = Reloc.getSymbol();
     if (Symbol != Obj->symbol_end()) {
-      if (error(Symbol->getName(TargetName)))
+      ErrorOr<StringRef> TargetNameOrErr = Symbol->getName();
+      if (error(TargetNameOrErr.getError()))
         return;
+      TargetName = *TargetNameOrErr;
     }
   } else if (!IsScattered) {
     section_iterator SecI = Obj->getRelocationSection(DR);
@@ -539,8 +540,8 @@ void MachODumper::printDynamicSymbols() {
 
 void MachODumper::printSymbol(const SymbolRef &Symbol) {
   StringRef SymbolName;
-  if (Symbol.getName(SymbolName))
-    SymbolName = "";
+  if (ErrorOr<StringRef> SymbolNameOrErr = Symbol.getName())
+    SymbolName = *SymbolNameOrErr;
 
   MachOSymbol MOSymbol;
   getSymbol(Obj, Symbol.getRawDataRefImpl(), MOSymbol);
@@ -573,3 +574,32 @@ void MachODumper::printSymbol(const SymbolRef &Symbol) {
 void MachODumper::printUnwindInfo() {
   W.startLine() << "UnwindInfo not implemented.\n";
 }
+
+void MachODumper::printStackMap() const {
+  object::SectionRef StackMapSection;
+  for (auto Sec : Obj->sections()) {
+    StringRef Name;
+    Sec.getName(Name);
+    if (Name == "__llvm_stackmaps") {
+      StackMapSection = Sec;
+      break;
+    }
+  }
+
+  if (StackMapSection == object::SectionRef())
+    return;
+
+  StringRef StackMapContents;
+  StackMapSection.getContents(StackMapContents);
+  ArrayRef<uint8_t> StackMapContentsArray(
+      reinterpret_cast<const uint8_t*>(StackMapContents.data()),
+      StackMapContents.size());
+
+  if (Obj->isLittleEndian())
+     prettyPrintStackMap(
+                      llvm::outs(),
+                      StackMapV1Parser<support::little>(StackMapContentsArray));
+  else
+     prettyPrintStackMap(llvm::outs(),
+                         StackMapV1Parser<support::big>(StackMapContentsArray));
+}
diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h
index 323f5e3..27e15b2 100644
--- a/tools/llvm-readobj/ObjDumper.h
+++ b/tools/llvm-readobj/ObjDumper.h
@@ -33,6 +33,7 @@ public:
   virtual void printUnwindInfo() = 0;
 
   // Only implemented for ELF at this time.
+  virtual void printDynamicRelocations() { }
   virtual void printDynamicTable() { }
   virtual void printNeededLibraries() { }
   virtual void printProgramHeaders() { }
@@ -51,6 +52,8 @@ public:
   virtual void printCOFFDirectives() { }
   virtual void printCOFFBaseReloc() { }
 
+  virtual void printStackMap() const = 0;
+
 protected:
   StreamWriter& W;
 };
diff --git a/tools/llvm-readobj/StackMapPrinter.h b/tools/llvm-readobj/StackMapPrinter.h
new file mode 100644
index 0000000..92645bc
--- /dev/null
+++ b/tools/llvm-readobj/StackMapPrinter.h
@@ -0,0 +1,80 @@
+//===-------- StackMapPrinter.h - Pretty-print stackmaps --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_READOBJ_STACKMAPPRINTER_H
+#define LLVM_TOOLS_LLVM_READOBJ_STACKMAPPRINTER_H
+
+#include "llvm/Object/StackMapParser.h"
+
+namespace llvm {
+
+// Pretty print a stackmap to the given ostream.
+template <typename OStreamT, typename StackMapParserT>
+void prettyPrintStackMap(OStreamT &OS, const StackMapParserT &SMP) {
+
+  OS << "LLVM StackMap Version: " << SMP.getVersion()
+     << "\nNum Functions: " << SMP.getNumFunctions();
+
+  // Functions:
+  for (const auto &F : SMP.functions())
+    OS << "\n  Function address: " << F.getFunctionAddress()
+       << ", stack size: " << F.getStackSize();
+
+  // Constants:
+  OS << "\nNum Constants: " << SMP.getNumConstants();
+  unsigned ConstantIndex = 0;
+  for (const auto &C : SMP.constants())
+    OS << "\n  #" << ++ConstantIndex << ": " << C.getValue();
+
+  // Records:
+  OS << "\nNum Records: " << SMP.getNumRecords();
+  for (const auto &R : SMP.records()) {
+    OS << "\n  Record ID: " << R.getID()
+       << ", instruction offset: " << R.getInstructionOffset()
+       << "\n    " << R.getNumLocations() << " locations:";
+
+    unsigned LocationIndex = 0;
+    for (const auto &Loc : R.locations()) {
+      OS << "\n      #" << ++LocationIndex << ": ";
+      switch (Loc.getKind()) {
+      case StackMapParserT::LocationKind::Register:
+        OS << "Register R#" << Loc.getDwarfRegNum();
+        break;
+      case StackMapParserT::LocationKind::Direct:
+        OS << "Direct R#" << Loc.getDwarfRegNum() << " + "
+           << Loc.getOffset();
+        break;
+      case StackMapParserT::LocationKind::Indirect:
+        OS << "Indirect [R#" << Loc.getDwarfRegNum() << " + "
+           << Loc.getOffset() << "]";
+        break;
+      case StackMapParserT::LocationKind::Constant:
+        OS << "Constant " << Loc.getSmallConstant();
+        break;
+      case StackMapParserT::LocationKind::ConstantIndex:
+        OS << "ConstantIndex #" << Loc.getConstantIndex() << " ("
+           << SMP.getConstant(Loc.getConstantIndex()).getValue() << ")";
+        break;
+      }
+    }
+
+    OS << "\n    " << R.getNumLiveOuts() << " live-outs: [ ";
+    for (const auto &LO : R.liveouts())
+      OS << "R#" << LO.getDwarfRegNum() << " ("
+         << LO.getSizeInBytes() << "-bytes) ";
+    OS << "]\n";
+  }
+
+ OS << "\n";
+
+}
+
+}
+
+#endif
diff --git a/tools/llvm-readobj/Win64EHDumper.cpp b/tools/llvm-readobj/Win64EHDumper.cpp
index b148c5d..5a8af41 100644
--- a/tools/llvm-readobj/Win64EHDumper.cpp
+++ b/tools/llvm-readobj/Win64EHDumper.cpp
@@ -118,19 +118,19 @@ static std::string formatSymbol(const Dumper::Context &Ctx,
   std::string Buffer;
   raw_string_ostream OS(Buffer);
 
-  StringRef Name;
   SymbolRef Symbol;
-  if (Ctx.ResolveSymbol(Section, Offset, Symbol, Ctx.UserData) ||
-      Symbol.getName(Name)) {
-    OS << format(" (0x%" PRIX64 ")", Offset);
-    return OS.str();
+  if (!Ctx.ResolveSymbol(Section, Offset, Symbol, Ctx.UserData)) {
+    if (ErrorOr<StringRef> Name = Symbol.getName()) {
+      OS << *Name;
+      if (Displacement > 0)
+        OS << format(" +0x%X (0x%" PRIX64 ")", Displacement, Offset);
+      else
+        OS << format(" (0x%" PRIX64 ")", Offset);
+      return OS.str();
+    }
   }
 
-  OS << Name;
-  if (Displacement > 0)
-    OS << format(" +0x%X (0x%" PRIX64 ")", Displacement, Offset);
-  else
-    OS << format(" (0x%" PRIX64 ")", Offset);
+  OS << format(" (0x%" PRIX64 ")", Offset);
   return OS.str();
 }
 
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index f960796..c5bccf9 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -40,7 +40,6 @@
 #include <string>
 #include <system_error>
 
-
 using namespace llvm;
 using namespace llvm::object;
 
@@ -91,6 +90,10 @@ namespace opts {
     cl::desc("Alias for --relocations"),
     cl::aliasopt(Relocations));
 
+  // -dyn-relocations
+  cl::opt<bool> DynRelocs("dyn-relocations",
+    cl::desc("Display the dynamic relocation entries in the file"));
+
   // -symbols, -t
   cl::opt<bool> Symbols("symbols",
     cl::desc("Display the symbol table"));
@@ -173,6 +176,12 @@ namespace opts {
   cl::opt<bool>
   COFFBaseRelocs("coff-basereloc",
                  cl::desc("Display the PE/COFF .reloc section"));
+
+  // -stackmap
+  cl::opt<bool>
+  PrintStackMap("stackmap",
+                cl::desc("Display contents of stackmap section"));
+
 } // namespace opts
 
 static int ReturnValue = EXIT_SUCCESS;
@@ -190,9 +199,8 @@ bool error(std::error_code EC) {
 }
 
 bool relocAddressLess(RelocationRef a, RelocationRef b) {
-  uint64_t a_addr, b_addr;
-  if (error(a.getOffset(a_addr))) exit(ReturnValue);
-  if (error(b.getOffset(b_addr))) exit(ReturnValue);
+  uint64_t a_addr = a.getOffset();
+  uint64_t b_addr = b.getOffset();
   return a_addr < b_addr;
 }
 
@@ -280,6 +288,8 @@ static void dumpObject(const ObjectFile *Obj) {
     Dumper->printSections();
   if (opts::Relocations)
     Dumper->printRelocations();
+  if (opts::DynRelocs)
+    Dumper->printDynamicRelocations();
   if (opts::Symbols)
     Dumper->printSymbols();
   if (opts::DynamicSymbols)
@@ -311,8 +321,10 @@ static void dumpObject(const ObjectFile *Obj) {
     Dumper->printCOFFDirectives();
   if (opts::COFFBaseRelocs)
     Dumper->printCOFFBaseReloc();
-}
 
+  if (opts::PrintStackMap)
+    Dumper->printStackMap();
+}
 
 /// @brief Dumps each object file in \a Arc;
 static void dumpArchive(const Archive *Arc) {
@@ -374,15 +386,11 @@ static void dumpInput(StringRef File) {
     reportError(File, readobj_error::unrecognized_file_format);
 }
 
-
 int main(int argc, const char *argv[]) {
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
   llvm_shutdown_obj Y;
 
-  // Initialize targets.
-  llvm::InitializeAllTargetInfos();
-
   // Register the target printer for --version.
   cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
 
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
index f857b2e..98c6f5c 100644
--- a/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -25,6 +25,7 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Object/MachO.h"
+#include "llvm/Object/SymbolSize.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -81,6 +82,12 @@ Dylibs("dylib",
 static cl::opt<std::string>
 TripleName("triple", cl::desc("Target triple for disassembler"));
 
+static cl::opt<std::string>
+MCPU("mcpu",
+     cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+     cl::value_desc("cpu-name"),
+     cl::init(""));
+
 static cl::list<std::string>
 CheckFiles("check",
            cl::desc("File containing RuntimeDyld verifier checks."),
@@ -252,63 +259,21 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
     std::unique_ptr<DIContext> Context(
       new DWARFContextInMemory(*SymbolObj,LoadedObjInfo.get()));
 
-    // FIXME: This is generally useful. Figure out a place in lib/Object to
-    // put utility functions.
-    std::map<object::SectionRef, std::vector<uint64_t>> FuncAddresses;
-    if (!isa<ELFObjectFileBase>(SymbolObj)) {
-      for (object::SymbolRef Sym : SymbolObj->symbols()) {
-        object::SymbolRef::Type SymType;
-        if (Sym.getType(SymType))
-          continue;
-        if (SymType != object::SymbolRef::ST_Function)
-          continue;
-        uint64_t Addr;
-        if (Sym.getAddress(Addr))
-          continue;
-        object::section_iterator Sec = SymbolObj->section_end();
-        if (Sym.getSection(Sec))
-          continue;
-        std::vector<uint64_t> &Addrs = FuncAddresses[*Sec];
-        if (Addrs.empty()) {
-          uint64_t SecAddr = Sec->getAddress();
-          uint64_t SecSize = Sec->getSize();
-          Addrs.push_back(SecAddr + SecSize);
-        }
-        Addrs.push_back(Addr);
-      }
-      for (auto &Pair : FuncAddresses) {
-        std::vector<uint64_t> &Addrs = Pair.second;
-        array_pod_sort(Addrs.begin(), Addrs.end());
-      }
-    }
+    std::vector<std::pair<SymbolRef, uint64_t>> SymAddr =
+        object::computeSymbolSizes(*SymbolObj);
 
     // Use symbol info to iterate functions in the object.
-    for (object::SymbolRef Sym : SymbolObj->symbols()) {
-      object::SymbolRef::Type SymType;
-      if (Sym.getType(SymType))
-        continue;
-      if (SymType == object::SymbolRef::ST_Function) {
-        StringRef  Name;
-        uint64_t   Addr;
-        if (Sym.getName(Name))
+    for (const auto &P : SymAddr) {
+      object::SymbolRef Sym = P.first;
+      if (Sym.getType() == object::SymbolRef::ST_Function) {
+        ErrorOr<StringRef> Name = Sym.getName();
+        if (!Name)
           continue;
+        uint64_t Addr;
         if (Sym.getAddress(Addr))
           continue;
 
-        uint64_t Size;
-        if (isa<ELFObjectFileBase>(SymbolObj)) {
-          Size = Sym.getSize();
-        } else {
-          object::section_iterator Sec = SymbolObj->section_end();
-          if (Sym.getSection(Sec))
-            continue;
-          const std::vector<uint64_t> &Addrs = FuncAddresses[*Sec];
-          auto AddrI = std::find(Addrs.begin(), Addrs.end(), Addr);
-          assert(AddrI != Addrs.end() && (AddrI + 1) != Addrs.end());
-          assert(*AddrI == Addr);
-          Size = *(AddrI + 1) - Addr;
-        }
-
+        uint64_t Size = P.second;
         // If we're not using the debug object, compute the address of the
         // symbol in memory (rather than that in the unrelocated object file)
         // and use that to query the DWARFContext.
@@ -323,7 +288,8 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
             Addr += SectionLoadAddress - Sec->getAddress();
         }
 
-        outs() << "Function: " << Name << ", Size = " << Size << ", Addr = " << Addr << "\n";
+        outs() << "Function: " << *Name << ", Size = " << Size
+               << ", Addr = " << Addr << "\n";
 
         DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
         DILineInfoTable::iterator  Begin = Lines.begin();
@@ -575,7 +541,7 @@ static int linkAndVerify() {
   TripleName = TheTriple.getTriple();
 
   std::unique_ptr<MCSubtargetInfo> STI(
-    TheTarget->createMCSubtargetInfo(TripleName, "", ""));
+    TheTarget->createMCSubtargetInfo(TripleName, MCPU, ""));
   assert(STI && "Unable to create subtarget info!");
 
   std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
diff --git a/tools/llvm-stress/llvm-stress.cpp b/tools/llvm-stress/llvm-stress.cpp
index f5e718b..727d03f 100644
--- a/tools/llvm-stress/llvm-stress.cpp
+++ b/tools/llvm-stress/llvm-stress.cpp
@@ -96,24 +96,21 @@ private:
 
 /// Generate an empty function with a default argument list.
 Function *GenEmptyFunction(Module *M) {
-  // Type Definitions
-  std::vector<Type*> ArgsTy;
   // Define a few arguments
   LLVMContext &Context = M->getContext();
-  ArgsTy.push_back(PointerType::get(IntegerType::getInt8Ty(Context), 0));
-  ArgsTy.push_back(PointerType::get(IntegerType::getInt32Ty(Context), 0));
-  ArgsTy.push_back(PointerType::get(IntegerType::getInt64Ty(Context), 0));
-  ArgsTy.push_back(IntegerType::getInt32Ty(Context));
-  ArgsTy.push_back(IntegerType::getInt64Ty(Context));
-  ArgsTy.push_back(IntegerType::getInt8Ty(Context));
-
-  FunctionType *FuncTy = FunctionType::get(Type::getVoidTy(Context), ArgsTy, 0);
+  Type* ArgsTy[] = {
+    Type::getInt8PtrTy(Context),
+    Type::getInt32PtrTy(Context),
+    Type::getInt64PtrTy(Context),
+    Type::getInt32Ty(Context),
+    Type::getInt64Ty(Context),
+    Type::getInt8Ty(Context)
+  };
+
+  auto *FuncTy = FunctionType::get(Type::getVoidTy(Context), ArgsTy, false);
   // Pick a unique name to describe the input parameters
-  std::stringstream ss;
-  ss<<"autogen_SD"<<SeedCL;
-  Function *Func = Function::Create(FuncTy, GlobalValue::ExternalLinkage,
-                                    ss.str(), M);
-
+  Twine Name = "autogen_SD" + Twine{SeedCL};
+  auto *Func = Function::Create(FuncTy, GlobalValue::ExternalLinkage, Name, M);
   Func->setCallingConv(CallingConv::C);
   return Func;
 }
@@ -620,59 +617,45 @@ static void FillFunction(Function *F, Random &R) {
   Modifier::PieceTable PT;
 
   // Consider arguments as legal values.
-  for (Function::arg_iterator it = F->arg_begin(), e = F->arg_end();
-       it != e; ++it)
-    PT.push_back(it);
+  for (auto &arg : F->args())
+    PT.push_back(&arg);
 
   // List of modifiers which add new random instructions.
-  std::vector<Modifier*> Modifiers;
-  std::unique_ptr<Modifier> LM(new LoadModifier(BB, &PT, &R));
-  std::unique_ptr<Modifier> SM(new StoreModifier(BB, &PT, &R));
-  std::unique_ptr<Modifier> EE(new ExtractElementModifier(BB, &PT, &R));
-  std::unique_ptr<Modifier> SHM(new ShuffModifier(BB, &PT, &R));
-  std::unique_ptr<Modifier> IE(new InsertElementModifier(BB, &PT, &R));
-  std::unique_ptr<Modifier> BM(new BinModifier(BB, &PT, &R));
-  std::unique_ptr<Modifier> CM(new CastModifier(BB, &PT, &R));
-  std::unique_ptr<Modifier> SLM(new SelectModifier(BB, &PT, &R));
-  std::unique_ptr<Modifier> PM(new CmpModifier(BB, &PT, &R));
-  Modifiers.push_back(LM.get());
-  Modifiers.push_back(SM.get());
-  Modifiers.push_back(EE.get());
-  Modifiers.push_back(SHM.get());
-  Modifiers.push_back(IE.get());
-  Modifiers.push_back(BM.get());
-  Modifiers.push_back(CM.get());
-  Modifiers.push_back(SLM.get());
-  Modifiers.push_back(PM.get());
+  std::vector<std::unique_ptr<Modifier>> Modifiers;
+  Modifiers.emplace_back(new LoadModifier(BB, &PT, &R));
+  Modifiers.emplace_back(new StoreModifier(BB, &PT, &R));
+  auto SM = Modifiers.back().get();
+  Modifiers.emplace_back(new ExtractElementModifier(BB, &PT, &R));
+  Modifiers.emplace_back(new ShuffModifier(BB, &PT, &R));
+  Modifiers.emplace_back(new InsertElementModifier(BB, &PT, &R));
+  Modifiers.emplace_back(new BinModifier(BB, &PT, &R));
+  Modifiers.emplace_back(new CastModifier(BB, &PT, &R));
+  Modifiers.emplace_back(new SelectModifier(BB, &PT, &R));
+  Modifiers.emplace_back(new CmpModifier(BB, &PT, &R));
 
   // Generate the random instructions
-  AllocaModifier AM(BB, &PT, &R); AM.ActN(5); // Throw in a few allocas
-  ConstModifier COM(BB, &PT, &R);  COM.ActN(40); // Throw in a few constants
+  AllocaModifier{BB, &PT, &R}.ActN(5); // Throw in a few allocas
+  ConstModifier{BB, &PT, &R}.ActN(40); // Throw in a few constants
 
-  for (unsigned i=0; i< SizeCL / Modifiers.size(); ++i)
-    for (std::vector<Modifier*>::iterator it = Modifiers.begin(),
-         e = Modifiers.end(); it != e; ++it) {
-      (*it)->Act();
-    }
+  for (unsigned i = 0; i < SizeCL / Modifiers.size(); ++i)
+    for (auto &Mod : Modifiers)
+      Mod->Act();
 
   SM->ActN(5); // Throw in a few stores.
 }
 
 static void IntroduceControlFlow(Function *F, Random &R) {
   std::vector<Instruction*> BoolInst;
-  for (BasicBlock::iterator it = F->begin()->begin(),
-       e = F->begin()->end(); it != e; ++it) {
-    if (it->getType() == IntegerType::getInt1Ty(F->getContext()))
-      BoolInst.push_back(it);
+  for (auto &Instr : F->front()) {
+    if (Instr.getType() == IntegerType::getInt1Ty(F->getContext()))
+      BoolInst.push_back(&Instr);
   }
 
   std::random_shuffle(BoolInst.begin(), BoolInst.end(), R);
 
-  for (std::vector<Instruction*>::iterator it = BoolInst.begin(),
-       e = BoolInst.end(); it != e; ++it) {
-    Instruction *Instr = *it;
+  for (auto *Instr : BoolInst) {
     BasicBlock *Curr = Instr->getParent();
-    BasicBlock::iterator Loc= Instr;
+    BasicBlock::iterator Loc = Instr;
     BasicBlock *Next = Curr->splitBasicBlock(Loc, "CF");
     Instr->moveBefore(Curr->getTerminator());
     if (Curr != &F->getEntryBlock()) {
@@ -688,7 +671,7 @@ int main(int argc, char **argv) {
   cl::ParseCommandLineOptions(argc, argv, "llvm codegen stress-tester\n");
   llvm_shutdown_obj Y;
 
-  std::unique_ptr<Module> M(new Module("/tmp/autogen.bc", getGlobalContext()));
+  auto M = make_unique<Module>("/tmp/autogen.bc", getGlobalContext());
   Function *F = GenEmptyFunction(M.get());
 
   // Pick an initial seed value
diff --git a/tools/llvm-symbolizer/LLVMSymbolize.cpp b/tools/llvm-symbolizer/LLVMSymbolize.cpp
index b8fa838..ec3fe486 100644
--- a/tools/llvm-symbolizer/LLVMSymbolize.cpp
+++ b/tools/llvm-symbolizer/LLVMSymbolize.cpp
@@ -19,6 +19,7 @@
 #include "llvm/DebugInfo/PDB/PDBContext.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/MachO.h"
+#include "llvm/Object/SymbolSize.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/DataExtractor.h"
@@ -32,6 +33,7 @@
 #if defined(_MSC_VER)
 #include <Windows.h>
 #include <DbgHelp.h>
+#pragma comment(lib, "dbghelp.lib")
 #endif
 
 namespace llvm {
@@ -71,30 +73,20 @@ ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx)
       }
     }
   }
-  for (const SymbolRef &Symbol : Module->symbols()) {
-    addSymbol(Symbol, OpdExtractor.get(), OpdAddress);
-  }
-  bool NoSymbolTable = (Module->symbol_begin() == Module->symbol_end());
-  if (NoSymbolTable && Module->isELF()) {
-    // Fallback to dynamic symbol table, if regular symbol table is stripped.
-    std::pair<symbol_iterator, symbol_iterator> IDyn =
-        getELFDynamicSymbolIterators(Module);
-    for (symbol_iterator si = IDyn.first, se = IDyn.second; si != se; ++si) {
-      addSymbol(*si, OpdExtractor.get(), OpdAddress);
-    }
-  }
+  std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
+      computeSymbolSizes(*Module);
+  for (auto &P : Symbols)
+    addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress);
 }
 
-void ModuleInfo::addSymbol(const SymbolRef &Symbol, DataExtractor *OpdExtractor,
-                           uint64_t OpdAddress) {
-  SymbolRef::Type SymbolType;
-  if (error(Symbol.getType(SymbolType)))
-    return;
+void ModuleInfo::addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize,
+                           DataExtractor *OpdExtractor, uint64_t OpdAddress) {
+  SymbolRef::Type SymbolType = Symbol.getType();
   if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
     return;
   uint64_t SymbolAddress;
   if (error(Symbol.getAddress(SymbolAddress)) ||
-      SymbolAddress == UnknownAddressOrSize)
+      SymbolAddress == UnknownAddress)
     return;
   if (OpdExtractor) {
     // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
@@ -108,19 +100,10 @@ void ModuleInfo::addSymbol(const SymbolRef &Symbol, DataExtractor *OpdExtractor,
         OpdExtractor->isValidOffsetForAddress(OpdOffset32))
       SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
   }
-  uint64_t SymbolSize;
-  // Getting symbol size is linear for Mach-O files, so assume that symbol
-  // occupies the memory range up to the following symbol.
-  if (isa<MachOObjectFile>(Module))
-    SymbolSize = 0;
-  else {
-    SymbolSize = Symbol.getSize();
-    if (SymbolSize == UnknownAddressOrSize)
-      return;
-  }
-  StringRef SymbolName;
-  if (error(Symbol.getName(SymbolName)))
+  ErrorOr<StringRef> SymbolNameOrErr = Symbol.getName();
+  if (error(SymbolNameOrErr.getError()))
     return;
+  StringRef SymbolName = *SymbolNameOrErr;
   // Mach-O symbol table names have leading underscore, skip it.
   if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_')
     SymbolName = SymbolName.drop_front();
@@ -436,7 +419,7 @@ LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin,
     if (I != ObjectFileForArch.end())
       return I->second;
     ErrorOr<std::unique_ptr<ObjectFile>> ParsedObj =
-        UB->getObjectForArch(Triple(ArchName).getArch());
+        UB->getObjectForArch(ArchName);
     if (ParsedObj) {
       Res = ParsedObj.get().get();
       ParsedBinariesAndObjects.push_back(std::move(ParsedObj.get()));
diff --git a/tools/llvm-symbolizer/LLVMSymbolize.h b/tools/llvm-symbolizer/LLVMSymbolize.h
index 1c2006f..be246c3 100644
--- a/tools/llvm-symbolizer/LLVMSymbolize.h
+++ b/tools/llvm-symbolizer/LLVMSymbolize.h
@@ -119,7 +119,7 @@ private:
                               uint64_t &Size) const;
   // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd
   // (function descriptor) section and OpdExtractor refers to its contents.
-  void addSymbol(const SymbolRef &Symbol,
+  void addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize,
                  DataExtractor *OpdExtractor = nullptr,
                  uint64_t OpdAddress = 0);
   ObjectFile *Module;
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index e55708c..5c712f1 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -223,20 +223,8 @@ lto_symbol_attributes lto_module_get_symbol_attribute(lto_module_t mod,
   return unwrap(mod)->getSymbolAttributes(index);
 }
 
-unsigned int lto_module_get_num_deplibs(lto_module_t mod) {
-  return unwrap(mod)->getDependentLibraryCount();
-}
-
-const char* lto_module_get_deplib(lto_module_t mod, unsigned int index) {
-  return unwrap(mod)->getDependentLibrary(index);
-}
-
-unsigned int lto_module_get_num_linkeropts(lto_module_t mod) {
-  return unwrap(mod)->getLinkerOptCount();
-}
-
-const char* lto_module_get_linkeropt(lto_module_t mod, unsigned int index) {
-  return unwrap(mod)->getLinkerOpt(index);
+const char* lto_module_get_linkeropts(lto_module_t mod) {
+  return unwrap(mod)->getLinkerOpts();
 }
 
 void lto_codegen_set_diagnostic_handler(lto_code_gen_t cg,
diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports
index 9145a6f..8bc2b0f 100644
--- a/tools/lto/lto.exports
+++ b/tools/lto/lto.exports
@@ -8,10 +8,7 @@ lto_module_create_from_memory
 lto_module_create_from_memory_with_path
 lto_module_create_in_local_context
 lto_module_create_in_codegen_context
-lto_module_get_deplib
-lto_module_get_linkeropt
-lto_module_get_num_deplibs
-lto_module_get_num_linkeropts
+lto_module_get_linkeropts
 lto_module_get_num_symbols
 lto_module_get_symbol_attribute
 lto_module_get_symbol_name
@@ -42,6 +39,7 @@ lto_codegen_compile_to_file
 lto_codegen_optimize
 lto_codegen_compile_optimized
 lto_codegen_set_should_internalize
+lto_codegen_set_should_embed_uselists
 LLVMCreateDisasm
 LLVMCreateDisasmCPU
 LLVMDisasmDispose
diff --git a/tools/obj2yaml/coff2yaml.cpp b/tools/obj2yaml/coff2yaml.cpp
index 1e29107..f675bfe 100644
--- a/tools/obj2yaml/coff2yaml.cpp
+++ b/tools/obj2yaml/coff2yaml.cpp
@@ -120,7 +120,10 @@ void COFFDumper::dumpSections(unsigned NumSections) {
       const object::coff_relocation *reloc = Obj.getCOFFRelocation(Reloc);
       COFFYAML::Relocation Rel;
       object::symbol_iterator Sym = Reloc.getSymbol();
-      Sym->getName(Rel.SymbolName);
+      ErrorOr<StringRef> SymbolNameOrErr = Sym->getName();
+      if (std::error_code EC = SymbolNameOrErr.getError())
+        report_fatal_error(EC.message());
+      Rel.SymbolName = *SymbolNameOrErr;
       Rel.VirtualAddress = reloc->VirtualAddress;
       Rel.Type = reloc->Type;
       Relocations.push_back(Rel);
diff --git a/tools/obj2yaml/elf2yaml.cpp b/tools/obj2yaml/elf2yaml.cpp
index eeabb0f..9afcede 100644
--- a/tools/obj2yaml/elf2yaml.cpp
+++ b/tools/obj2yaml/elf2yaml.cpp
@@ -23,12 +23,12 @@ template <class ELFT>
 class ELFDumper {
   typedef object::Elf_Sym_Impl<ELFT> Elf_Sym;
   typedef typename object::ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
-  typedef typename object::ELFFile<ELFT>::Elf_Sym_Iter Elf_Sym_Iter;
   typedef typename object::ELFFile<ELFT>::Elf_Word Elf_Word;
 
   const object::ELFFile<ELFT> &Obj;
 
-  std::error_code dumpSymbol(Elf_Sym_Iter Sym, ELFYAML::Symbol &S);
+  std::error_code dumpSymbol(const Elf_Sym *Sym, bool IsDynamic,
+                             ELFYAML::Symbol &S);
   std::error_code dumpCommonSection(const Elf_Shdr *Shdr, ELFYAML::Section &S);
   std::error_code dumpCommonRelocationSection(const Elf_Shdr *Shdr,
                                               ELFYAML::RelocationSection &S);
@@ -115,14 +115,14 @@ ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
 
   // Dump symbols
   bool IsFirstSym = true;
-  for (auto SI = Obj.begin_symbols(), SE = Obj.end_symbols(); SI != SE; ++SI) {
+  for (auto SI = Obj.symbol_begin(), SE = Obj.symbol_end(); SI != SE; ++SI) {
     if (IsFirstSym) {
       IsFirstSym = false;
       continue;
     }
 
     ELFYAML::Symbol S;
-    if (std::error_code EC = ELFDumper<ELFT>::dumpSymbol(SI, S))
+    if (std::error_code EC = ELFDumper<ELFT>::dumpSymbol(SI, false, S))
       return EC;
 
     switch (SI->getBinding())
@@ -145,19 +145,22 @@ ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
 }
 
 template <class ELFT>
-std::error_code ELFDumper<ELFT>::dumpSymbol(Elf_Sym_Iter Sym,
+std::error_code ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, bool IsDynamic,
                                             ELFYAML::Symbol &S) {
   S.Type = Sym->getType();
   S.Value = Sym->st_value;
   S.Size = Sym->st_size;
   S.Other = Sym->st_other;
 
-  ErrorOr<StringRef> NameOrErr = Obj.getSymbolName(Sym);
+  ErrorOr<StringRef> NameOrErr = Obj.getSymbolName(Sym, IsDynamic);
   if (std::error_code EC = NameOrErr.getError())
     return EC;
   S.Name = NameOrErr.get();
 
-  const Elf_Shdr *Shdr = Obj.getSection(&*Sym);
+  ErrorOr<const Elf_Shdr *> ShdrOrErr = Obj.getSection(&*Sym);
+  if (std::error_code EC = ShdrOrErr.getError())
+    return EC;
+  const Elf_Shdr *Shdr = *ShdrOrErr;
   if (!Shdr)
     return obj2yaml_error::success;
 
@@ -182,8 +185,16 @@ std::error_code ELFDumper<ELFT>::dumpRelocation(const Elf_Shdr *Shdr,
   if (!NamePair.first)
     return obj2yaml_error::success;
 
-  ErrorOr<StringRef> NameOrErr =
-      Obj.getSymbolName(NamePair.first, NamePair.second);
+  const Elf_Shdr *SymTab = NamePair.first;
+  ErrorOr<const Elf_Shdr *> StrTabSec = Obj.getSection(SymTab->sh_link);
+  if (std::error_code EC = StrTabSec.getError())
+    return EC;
+  ErrorOr<StringRef> StrTabOrErr = Obj.getStringTable(*StrTabSec);
+  if (std::error_code EC = StrTabOrErr.getError())
+    return EC;
+  StringRef StrTab = *StrTabOrErr;
+
+  ErrorOr<StringRef> NameOrErr = NamePair.second->getName(StrTab);
   if (std::error_code EC = NameOrErr.getError())
     return EC;
   R.Symbol = NameOrErr.get();
@@ -205,12 +216,13 @@ std::error_code ELFDumper<ELFT>::dumpCommonSection(const Elf_Shdr *Shdr,
   S.Name = NameOrErr.get();
 
   if (Shdr->sh_link != ELF::SHN_UNDEF) {
-    if (const Elf_Shdr *LinkSection = Obj.getSection(Shdr->sh_link)) {
-      NameOrErr = Obj.getSectionName(LinkSection);
-      if (std::error_code EC = NameOrErr.getError())
-        return EC;
-      S.Link = NameOrErr.get();
-    }
+    ErrorOr<const Elf_Shdr *> LinkSection = Obj.getSection(Shdr->sh_link);
+    if (std::error_code EC = LinkSection.getError())
+      return EC;
+    NameOrErr = Obj.getSectionName(*LinkSection);
+    if (std::error_code EC = NameOrErr.getError())
+      return EC;
+    S.Link = NameOrErr.get();
   }
 
   return obj2yaml_error::success;
@@ -223,12 +235,14 @@ ELFDumper<ELFT>::dumpCommonRelocationSection(const Elf_Shdr *Shdr,
   if (std::error_code EC = dumpCommonSection(Shdr, S))
     return EC;
 
-  if (const Elf_Shdr *InfoSection = Obj.getSection(Shdr->sh_info)) {
-    ErrorOr<StringRef> NameOrErr = Obj.getSectionName(InfoSection);
-    if (std::error_code EC = NameOrErr.getError())
-      return EC;
-    S.Info = NameOrErr.get();
-  }
+  ErrorOr<const Elf_Shdr *> InfoSection = Obj.getSection(Shdr->sh_info);
+  if (std::error_code EC = InfoSection.getError())
+    return EC;
+
+  ErrorOr<StringRef> NameOrErr = Obj.getSectionName(*InfoSection);
+  if (std::error_code EC = NameOrErr.getError())
+    return EC;
+  S.Info = NameOrErr.get();
 
   return obj2yaml_error::success;
 }
@@ -242,8 +256,7 @@ ELFDumper<ELFT>::dumpRelSection(const Elf_Shdr *Shdr) {
   if (std::error_code EC = dumpCommonRelocationSection(Shdr, *S))
     return EC;
 
-  for (auto RI = Obj.begin_rel(Shdr), RE = Obj.end_rel(Shdr); RI != RE;
-       ++RI) {
+  for (auto RI = Obj.rel_begin(Shdr), RE = Obj.rel_end(Shdr); RI != RE; ++RI) {
     ELFYAML::Relocation R;
     if (std::error_code EC = dumpRelocation(Shdr, &*RI, R))
       return EC;
@@ -262,7 +275,7 @@ ELFDumper<ELFT>::dumpRelaSection(const Elf_Shdr *Shdr) {
   if (std::error_code EC = dumpCommonRelocationSection(Shdr, *S))
     return EC;
 
-  for (auto RI = Obj.begin_rela(Shdr), RE = Obj.end_rela(Shdr); RI != RE;
+  for (auto RI = Obj.rela_begin(Shdr), RE = Obj.rela_end(Shdr); RI != RE;
        ++RI) {
     ELFYAML::Relocation R;
     if (std::error_code EC = dumpRelocation(Shdr, &*RI, R))
@@ -299,11 +312,20 @@ ErrorOr<ELFYAML::Group *> ELFDumper<ELFT>::dumpGroup(const Elf_Shdr *Shdr) {
     return EC;
   // Get sh_info which is the signature.
   const Elf_Sym *symbol = Obj.getSymbol(Shdr->sh_info);
-  const Elf_Shdr *symtab = Obj.getSection(Shdr->sh_link);
+  ErrorOr<const Elf_Shdr *> Symtab = Obj.getSection(Shdr->sh_link);
+  if (std::error_code EC = Symtab.getError())
+    return EC;
+  ErrorOr<const Elf_Shdr *> StrTabSec = Obj.getSection((*Symtab)->sh_link);
+  if (std::error_code EC = StrTabSec.getError())
+    return EC;
+  ErrorOr<StringRef> StrTabOrErr = Obj.getStringTable(*StrTabSec);
+  if (std::error_code EC = StrTabOrErr.getError())
+    return EC;
+  StringRef StrTab = *StrTabOrErr;
   auto sectionContents = Obj.getSectionContents(Shdr);
   if (std::error_code ec = sectionContents.getError())
     return ec;
-  ErrorOr<StringRef> symbolName = Obj.getSymbolName(symtab, symbol);
+  ErrorOr<StringRef> symbolName = symbol->getName(StrTab);
   if (std::error_code EC = symbolName.getError())
     return EC;
   S->Info = *symbolName;
@@ -315,8 +337,10 @@ ErrorOr<ELFYAML::Group *> ELFDumper<ELFT>::dumpGroup(const Elf_Shdr *Shdr) {
     if (groupMembers[i] == llvm::ELF::GRP_COMDAT) {
       s.sectionNameOrType = "GRP_COMDAT";
     } else {
-      const Elf_Shdr *sHdr = Obj.getSection(groupMembers[i]);
-      ErrorOr<StringRef> sectionName = Obj.getSectionName(sHdr);
+      ErrorOr<const Elf_Shdr *> sHdr = Obj.getSection(groupMembers[i]);
+      if (std::error_code EC = sHdr.getError())
+        return EC;
+      ErrorOr<StringRef> sectionName = Obj.getSectionName(*sHdr);
       if (std::error_code ec = sectionName.getError())
         return ec;
       s.sectionNameOrType = *sectionName;
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index e4398f0..ffba7b1 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallString.h"
 #include "gtest/gtest.h"
+#include <array>
 #include <ostream>
 
 using namespace llvm;
@@ -215,6 +216,171 @@ TEST(APIntTest, i1) {
   }
 }
 
+TEST(APIntTest, compare) {
+  std::array<APInt, 5> testVals{{
+    APInt{16, 2},
+    APInt{16, 1},
+    APInt{16, 0},
+    APInt{16, (uint64_t)-1, true},
+    APInt{16, (uint64_t)-2, true},
+  }};
+
+  for (auto &arg1 : testVals)
+    for (auto &arg2 : testVals) {
+      auto uv1 = arg1.getZExtValue();
+      auto uv2 = arg2.getZExtValue();
+      auto sv1 = arg1.getSExtValue();
+      auto sv2 = arg2.getSExtValue();
+
+      EXPECT_EQ(uv1 <  uv2, arg1.ult(arg2));
+      EXPECT_EQ(uv1 <= uv2, arg1.ule(arg2));
+      EXPECT_EQ(uv1 >  uv2, arg1.ugt(arg2));
+      EXPECT_EQ(uv1 >= uv2, arg1.uge(arg2));
+
+      EXPECT_EQ(sv1 <  sv2, arg1.slt(arg2));
+      EXPECT_EQ(sv1 <= sv2, arg1.sle(arg2));
+      EXPECT_EQ(sv1 >  sv2, arg1.sgt(arg2));
+      EXPECT_EQ(sv1 >= sv2, arg1.sge(arg2));
+
+      EXPECT_EQ(uv1 <  uv2, arg1.ult(uv2));
+      EXPECT_EQ(uv1 <= uv2, arg1.ule(uv2));
+      EXPECT_EQ(uv1 >  uv2, arg1.ugt(uv2));
+      EXPECT_EQ(uv1 >= uv2, arg1.uge(uv2));
+
+      EXPECT_EQ(sv1 <  sv2, arg1.slt(sv2));
+      EXPECT_EQ(sv1 <= sv2, arg1.sle(sv2));
+      EXPECT_EQ(sv1 >  sv2, arg1.sgt(sv2));
+      EXPECT_EQ(sv1 >= sv2, arg1.sge(sv2));
+    }
+}
+
+TEST(APIntTest, compareWithRawIntegers) {
+  EXPECT_TRUE(!APInt(8, 1).uge(256));
+  EXPECT_TRUE(!APInt(8, 1).ugt(256));
+  EXPECT_TRUE( APInt(8, 1).ule(256));
+  EXPECT_TRUE( APInt(8, 1).ult(256));
+  EXPECT_TRUE(!APInt(8, 1).sge(256));
+  EXPECT_TRUE(!APInt(8, 1).sgt(256));
+  EXPECT_TRUE( APInt(8, 1).sle(256));
+  EXPECT_TRUE( APInt(8, 1).slt(256));
+  EXPECT_TRUE(!(APInt(8, 0) == 256));
+  EXPECT_TRUE(  APInt(8, 0) != 256);
+  EXPECT_TRUE(!(APInt(8, 1) == 256));
+  EXPECT_TRUE(  APInt(8, 1) != 256);
+
+  auto uint64max = UINT64_MAX;
+  auto int64max  = INT64_MAX;
+  auto int64min  = INT64_MIN;
+
+  auto u64 = APInt{128, uint64max};
+  auto s64 = APInt{128, static_cast<uint64_t>(int64max), true};
+  auto big = u64 + 1;
+
+  EXPECT_TRUE( u64.uge(uint64max));
+  EXPECT_TRUE(!u64.ugt(uint64max));
+  EXPECT_TRUE( u64.ule(uint64max));
+  EXPECT_TRUE(!u64.ult(uint64max));
+  EXPECT_TRUE( u64.sge(int64max));
+  EXPECT_TRUE( u64.sgt(int64max));
+  EXPECT_TRUE(!u64.sle(int64max));
+  EXPECT_TRUE(!u64.slt(int64max));
+  EXPECT_TRUE( u64.sge(int64min));
+  EXPECT_TRUE( u64.sgt(int64min));
+  EXPECT_TRUE(!u64.sle(int64min));
+  EXPECT_TRUE(!u64.slt(int64min));
+
+  EXPECT_TRUE(u64 == uint64max);
+  EXPECT_TRUE(u64 != int64max);
+  EXPECT_TRUE(u64 != int64min);
+
+  EXPECT_TRUE(!s64.uge(uint64max));
+  EXPECT_TRUE(!s64.ugt(uint64max));
+  EXPECT_TRUE( s64.ule(uint64max));
+  EXPECT_TRUE( s64.ult(uint64max));
+  EXPECT_TRUE( s64.sge(int64max));
+  EXPECT_TRUE(!s64.sgt(int64max));
+  EXPECT_TRUE( s64.sle(int64max));
+  EXPECT_TRUE(!s64.slt(int64max));
+  EXPECT_TRUE( s64.sge(int64min));
+  EXPECT_TRUE( s64.sgt(int64min));
+  EXPECT_TRUE(!s64.sle(int64min));
+  EXPECT_TRUE(!s64.slt(int64min));
+
+  EXPECT_TRUE(s64 != uint64max);
+  EXPECT_TRUE(s64 == int64max);
+  EXPECT_TRUE(s64 != int64min);
+
+  EXPECT_TRUE( big.uge(uint64max));
+  EXPECT_TRUE( big.ugt(uint64max));
+  EXPECT_TRUE(!big.ule(uint64max));
+  EXPECT_TRUE(!big.ult(uint64max));
+  EXPECT_TRUE( big.sge(int64max));
+  EXPECT_TRUE( big.sgt(int64max));
+  EXPECT_TRUE(!big.sle(int64max));
+  EXPECT_TRUE(!big.slt(int64max));
+  EXPECT_TRUE( big.sge(int64min));
+  EXPECT_TRUE( big.sgt(int64min));
+  EXPECT_TRUE(!big.sle(int64min));
+  EXPECT_TRUE(!big.slt(int64min));
+
+  EXPECT_TRUE(big != uint64max);
+  EXPECT_TRUE(big != int64max);
+  EXPECT_TRUE(big != int64min);
+}
+
+TEST(APIntTest, compareWithInt64Min) {
+  int64_t edge = INT64_MIN;
+  int64_t edgeP1 = edge + 1;
+  int64_t edgeM1 = INT64_MAX;
+  auto a = APInt{64, static_cast<uint64_t>(edge), true};
+
+  EXPECT_TRUE(!a.slt(edge));
+  EXPECT_TRUE( a.sle(edge));
+  EXPECT_TRUE(!a.sgt(edge));
+  EXPECT_TRUE( a.sge(edge));
+  EXPECT_TRUE( a.slt(edgeP1));
+  EXPECT_TRUE( a.sle(edgeP1));
+  EXPECT_TRUE(!a.sgt(edgeP1));
+  EXPECT_TRUE(!a.sge(edgeP1));
+  EXPECT_TRUE( a.slt(edgeM1));
+  EXPECT_TRUE( a.sle(edgeM1));
+  EXPECT_TRUE(!a.sgt(edgeM1));
+  EXPECT_TRUE(!a.sge(edgeM1));
+}
+
+TEST(APIntTest, compareWithHalfInt64Max) {
+  uint64_t edge = 0x4000000000000000;
+  uint64_t edgeP1 = edge + 1;
+  uint64_t edgeM1 = edge - 1;
+  auto a = APInt{64, edge};
+
+  EXPECT_TRUE(!a.ult(edge));
+  EXPECT_TRUE( a.ule(edge));
+  EXPECT_TRUE(!a.ugt(edge));
+  EXPECT_TRUE( a.uge(edge));
+  EXPECT_TRUE( a.ult(edgeP1));
+  EXPECT_TRUE( a.ule(edgeP1));
+  EXPECT_TRUE(!a.ugt(edgeP1));
+  EXPECT_TRUE(!a.uge(edgeP1));
+  EXPECT_TRUE(!a.ult(edgeM1));
+  EXPECT_TRUE(!a.ule(edgeM1));
+  EXPECT_TRUE( a.ugt(edgeM1));
+  EXPECT_TRUE( a.uge(edgeM1));
+
+  EXPECT_TRUE(!a.slt(edge));
+  EXPECT_TRUE( a.sle(edge));
+  EXPECT_TRUE(!a.sgt(edge));
+  EXPECT_TRUE( a.sge(edge));
+  EXPECT_TRUE( a.slt(edgeP1));
+  EXPECT_TRUE( a.sle(edgeP1));
+  EXPECT_TRUE(!a.sgt(edgeP1));
+  EXPECT_TRUE(!a.sge(edgeP1));
+  EXPECT_TRUE(!a.slt(edgeM1));
+  EXPECT_TRUE(!a.sle(edgeM1));
+  EXPECT_TRUE( a.sgt(edgeM1));
+  EXPECT_TRUE( a.sge(edgeM1));
+}
+
 
 // Tests different div/rem varaints using scheme (a * b + c) / a
 void testDiv(APInt a, APInt b, APInt c) {
diff --git a/unittests/ADT/APSIntTest.cpp b/unittests/ADT/APSIntTest.cpp
index 5e4e874..a9b3071 100644
--- a/unittests/ADT/APSIntTest.cpp
+++ b/unittests/ADT/APSIntTest.cpp
@@ -143,4 +143,21 @@ TEST(APSIntTest, compareValues) {
   EXPECT_TRUE(APSInt::compareValues(U(8), S(-7).trunc(32)) > 0);
 }
 
+TEST(APSIntTest, FromString) {
+  EXPECT_EQ(APSInt("1").getExtValue(), 1);
+  EXPECT_EQ(APSInt("-1").getExtValue(), -1);
+  EXPECT_EQ(APSInt("0").getExtValue(), 0);
+  EXPECT_EQ(APSInt("56789").getExtValue(), 56789);
+  EXPECT_EQ(APSInt("-1234").getExtValue(), -1234);
 }
+
+#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
+
+TEST(APSIntTest, StringDeath) {
+  EXPECT_DEATH(APSInt(""), "Invalid string length");
+  EXPECT_DEATH(APSInt("1a"), "Invalid character in digit string");
+}
+
+#endif
+
+} // end anonymous namespace
diff --git a/unittests/ADT/DenseMapTest.cpp b/unittests/ADT/DenseMapTest.cpp
index 9780777..2c6fe35 100644
--- a/unittests/ADT/DenseMapTest.cpp
+++ b/unittests/ADT/DenseMapTest.cpp
@@ -323,6 +323,31 @@ TYPED_TEST(DenseMapTest, ConstIteratorTest) {
   EXPECT_TRUE(cit == cit2);
 }
 
+// Make sure DenseMap works with StringRef keys.
+TEST(DenseMapCustomTest, StringRefTest) {
+  DenseMap<StringRef, int> M;
+
+  M["a"] = 1;
+  M["b"] = 2;
+  M["c"] = 3;
+
+  EXPECT_EQ(3u, M.size());
+  EXPECT_EQ(1, M.lookup("a"));
+  EXPECT_EQ(2, M.lookup("b"));
+  EXPECT_EQ(3, M.lookup("c"));
+
+  EXPECT_EQ(0, M.lookup("q"));
+
+  // Test the empty string, spelled various ways.
+  EXPECT_EQ(0, M.lookup(""));
+  EXPECT_EQ(0, M.lookup(StringRef()));
+  EXPECT_EQ(0, M.lookup(StringRef("a", 0)));
+  M[""] = 42;
+  EXPECT_EQ(42, M.lookup(""));
+  EXPECT_EQ(42, M.lookup(StringRef()));
+  EXPECT_EQ(42, M.lookup(StringRef("a", 0)));
+}
+
 // Key traits that allows lookup with either an unsigned or char* key;
 // In the latter case, "a" == 0, "b" == 1 and so on.
 struct TestDenseMapInfo {
diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp
index 23a9128..d507684 100644
--- a/unittests/ADT/TripleTest.cpp
+++ b/unittests/ADT/TripleTest.cpp
@@ -182,6 +182,18 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::CloudABI, T.getOS());
   EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
 
+  T = Triple("wasm32-unknown-unknown");
+  EXPECT_EQ(Triple::wasm32, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+  T = Triple("wasm64-unknown-unknown");
+  EXPECT_EQ(Triple::wasm64, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
   T = Triple("huh");
   EXPECT_EQ(Triple::UnknownArch, T.getArch());
 }
@@ -439,6 +451,16 @@ TEST(TripleTest, BitWidthPredicates) {
   EXPECT_FALSE(T.isArch16Bit());
   EXPECT_FALSE(T.isArch32Bit());
   EXPECT_TRUE(T.isArch64Bit());
+
+  T.setArch(Triple::wasm32);
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_TRUE(T.isArch32Bit());
+  EXPECT_FALSE(T.isArch64Bit());
+
+  T.setArch(Triple::wasm64);
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_FALSE(T.isArch32Bit());
+  EXPECT_TRUE(T.isArch64Bit());
 }
 
 TEST(TripleTest, BitWidthArchVariants) {
@@ -521,6 +543,14 @@ TEST(TripleTest, BitWidthArchVariants) {
   T.setArch(Triple::spir64);
   EXPECT_EQ(Triple::spir, T.get32BitArchVariant().getArch());
   EXPECT_EQ(Triple::spir64, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::wasm32);
+  EXPECT_EQ(Triple::wasm32, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::wasm64, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::wasm64);
+  EXPECT_EQ(Triple::wasm32, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::wasm64, T.get64BitArchVariant().getArch());
 }
 
 TEST(TripleTest, getOSVersion) {
diff --git a/unittests/AsmParser/AsmParserTest.cpp b/unittests/AsmParser/AsmParserTest.cpp
index 8847b18..9c2081f 100644
--- a/unittests/AsmParser/AsmParserTest.cpp
+++ b/unittests/AsmParser/AsmParserTest.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/AsmParser/Parser.h"
+#include "llvm/AsmParser/SlotMapping.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/SourceMgr.h"
@@ -44,4 +45,23 @@ TEST(AsmParserTest, NonNullTerminatedInput) {
 #endif
 #endif
 
+TEST(AsmParserTest, SlotMappingTest) {
+  LLVMContext &Ctx = getGlobalContext();
+  StringRef Source = "@0 = global i32 0\n !0 = !{}\n !42 = !{i32 42}";
+  SMDiagnostic Error;
+  SlotMapping Mapping;
+  auto Mod = parseAssemblyString(Source, Error, Ctx, &Mapping);
+
+  EXPECT_TRUE(Mod != nullptr);
+  EXPECT_TRUE(Error.getMessage().empty());
+
+  ASSERT_EQ(Mapping.GlobalValues.size(), 1u);
+  EXPECT_TRUE(isa<GlobalVariable>(Mapping.GlobalValues[0]));
+
+  EXPECT_EQ(Mapping.MetadataNodes.size(), 2u);
+  EXPECT_EQ(Mapping.MetadataNodes.count(0), 1u);
+  EXPECT_EQ(Mapping.MetadataNodes.count(42), 1u);
+  EXPECT_EQ(Mapping.MetadataNodes.count(1), 0u);
+}
+
 } // end anonymous namespace
diff --git a/unittests/CodeGen/DIEHashTest.cpp b/unittests/CodeGen/DIEHashTest.cpp
index 8e78f0c..e3a9e56 100644
--- a/unittests/CodeGen/DIEHashTest.cpp
+++ b/unittests/CodeGen/DIEHashTest.cpp
@@ -22,6 +22,10 @@ namespace {
 
 // Test fixture
 class DIEHashTest : public testing::Test {
+public:
+  BumpPtrAllocator Alloc;
+
+private:
   StringMap<DwarfStringPoolEntry> Pool;
 
 public:
@@ -34,22 +38,22 @@ public:
 
 TEST_F(DIEHashTest, Data1) {
   DIEHash Hash;
-  DIE Die(dwarf::DW_TAG_base_type);
+  DIE &Die = *DIE::get(Alloc, dwarf::DW_TAG_base_type);
   DIEInteger Size(4);
-  Die.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Size);
+  Die.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Size);
   uint64_t MD5Res = Hash.computeTypeSignature(Die);
   ASSERT_EQ(0x1AFE116E83701108ULL, MD5Res);
 }
 
 // struct {};
 TEST_F(DIEHashTest, TrivialType) {
-  DIE Unnamed(dwarf::DW_TAG_structure_type);
+  DIE &Unnamed = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger One(1);
-  Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+  Unnamed.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
 
   // Line and file number are ignored.
-  Unnamed.addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
-  Unnamed.addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, One);
+  Unnamed.addValue(Alloc, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+  Unnamed.addValue(Alloc, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, One);
   uint64_t MD5Res = DIEHash().computeTypeSignature(Unnamed);
 
   // The exact same hash GCC produces for this DIE.
@@ -58,11 +62,11 @@ TEST_F(DIEHashTest, TrivialType) {
 
 // struct foo { };
 TEST_F(DIEHashTest, NamedType) {
-  DIE Foo(dwarf::DW_TAG_structure_type);
+  DIE &Foo = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger One(1);
   DIEString FooStr = getString("foo");
-  Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
-  Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+  Foo.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+  Foo.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
 
   uint64_t MD5Res = DIEHash().computeTypeSignature(Foo);
 
@@ -72,20 +76,21 @@ TEST_F(DIEHashTest, NamedType) {
 
 // namespace space { struct foo { }; }
 TEST_F(DIEHashTest, NamespacedType) {
-  DIE CU(dwarf::DW_TAG_compile_unit);
+  DIE &CU = *DIE::get(Alloc, dwarf::DW_TAG_compile_unit);
 
-  auto Space = make_unique<DIE>(dwarf::DW_TAG_namespace);
+  auto Space = DIE::get(Alloc, dwarf::DW_TAG_namespace);
   DIEInteger One(1);
   DIEString SpaceStr = getString("space");
-  Space->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, SpaceStr);
+  Space->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, SpaceStr);
   // DW_AT_declaration is ignored.
-  Space->addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, One);
+  Space->addValue(Alloc, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present,
+                  One);
   // sibling?
 
-  auto Foo = make_unique<DIE>(dwarf::DW_TAG_structure_type);
+  auto Foo = DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEString FooStr = getString("foo");
-  Foo->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
-  Foo->addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+  Foo->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+  Foo->addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
 
   DIE &N = *Foo;
   Space->addChild(std::move(Foo));
@@ -99,26 +104,26 @@ TEST_F(DIEHashTest, NamespacedType) {
 
 // struct { int member; };
 TEST_F(DIEHashTest, TypeWithMember) {
-  DIE Unnamed(dwarf::DW_TAG_structure_type);
+  DIE &Unnamed = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger Four(4);
-  Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
+  Unnamed.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
 
-  DIE Int(dwarf::DW_TAG_base_type);
+  DIE &Int = *DIE::get(Alloc, dwarf::DW_TAG_base_type);
   DIEString IntStr = getString("int");
-  Int.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, IntStr);
-  Int.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
+  Int.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, IntStr);
+  Int.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
   DIEInteger Five(5);
-  Int.addValue(dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Five);
+  Int.addValue(Alloc, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Five);
 
   DIEEntry IntRef(Int);
 
-  auto Member = make_unique<DIE>(dwarf::DW_TAG_member);
+  auto Member = DIE::get(Alloc, dwarf::DW_TAG_member);
   DIEString MemberStr = getString("member");
-  Member->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemberStr);
+  Member->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemberStr);
   DIEInteger Zero(0);
-  Member->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
-                   Zero);
-  Member->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IntRef);
+  Member->addValue(Alloc, dwarf::DW_AT_data_member_location,
+                   dwarf::DW_FORM_data1, Zero);
+  Member->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IntRef);
 
   Unnamed.addChild(std::move(Member));
 
@@ -129,36 +134,36 @@ TEST_F(DIEHashTest, TypeWithMember) {
 
 // struct foo { int mem1, mem2; };
 TEST_F(DIEHashTest, ReusedType) {
-  DIE Unnamed(dwarf::DW_TAG_structure_type);
+  DIE &Unnamed = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger Eight(8);
-  Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+  Unnamed.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
 
   DIEInteger Four(4);
-  DIE Int(dwarf::DW_TAG_base_type);
+  DIE &Int = *DIE::get(Alloc, dwarf::DW_TAG_base_type);
   DIEString IntStr = getString("int");
-  Int.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, IntStr);
-  Int.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
+  Int.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, IntStr);
+  Int.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
   DIEInteger Five(5);
-  Int.addValue(dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Five);
+  Int.addValue(Alloc, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Five);
 
   DIEEntry IntRef(Int);
 
-  auto Mem1 = make_unique<DIE>(dwarf::DW_TAG_member);
+  auto Mem1 = DIE::get(Alloc, dwarf::DW_TAG_member);
   DIEString Mem1Str = getString("mem1");
-  Mem1->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, Mem1Str);
+  Mem1->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, Mem1Str);
   DIEInteger Zero(0);
-  Mem1->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
+  Mem1->addValue(Alloc, dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
                  Zero);
-  Mem1->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IntRef);
+  Mem1->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IntRef);
 
   Unnamed.addChild(std::move(Mem1));
 
-  auto Mem2 = make_unique<DIE>(dwarf::DW_TAG_member);
+  auto Mem2 = DIE::get(Alloc, dwarf::DW_TAG_member);
   DIEString Mem2Str = getString("mem2");
-  Mem2->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, Mem2Str);
-  Mem2->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
+  Mem2->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, Mem2Str);
+  Mem2->addValue(Alloc, dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
                  Four);
-  Mem2->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IntRef);
+  Mem2->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IntRef);
 
   Unnamed.addChild(std::move(Mem2));
 
@@ -169,17 +174,17 @@ TEST_F(DIEHashTest, ReusedType) {
 
 // struct foo { static foo f; };
 TEST_F(DIEHashTest, RecursiveType) {
-  DIE Foo(dwarf::DW_TAG_structure_type);
+  DIE &Foo = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger One(1);
-  Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+  Foo.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
   DIEString FooStr = getString("foo");
-  Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+  Foo.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
 
-  auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
+  auto Mem = DIE::get(Alloc, dwarf::DW_TAG_member);
   DIEString MemStr = getString("mem");
-  Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
+  Mem->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
   DIEEntry FooRef(Foo);
-  Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRef);
+  Mem->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRef);
   // DW_AT_external and DW_AT_declaration are ignored anyway, so skip them.
 
   Foo.addChild(std::move(Mem));
@@ -191,25 +196,26 @@ TEST_F(DIEHashTest, RecursiveType) {
 
 // struct foo { foo *mem; };
 TEST_F(DIEHashTest, Pointer) {
-  DIE Foo(dwarf::DW_TAG_structure_type);
+  DIE &Foo = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger Eight(8);
-  Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+  Foo.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
   DIEString FooStr = getString("foo");
-  Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+  Foo.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
 
-  auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
+  auto Mem = DIE::get(Alloc, dwarf::DW_TAG_member);
   DIEString MemStr = getString("mem");
-  Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
+  Mem->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
   DIEInteger Zero(0);
-  Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, Zero);
+  Mem->addValue(Alloc, dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
+                Zero);
 
-  DIE FooPtr(dwarf::DW_TAG_pointer_type);
-  FooPtr.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+  DIE &FooPtr = *DIE::get(Alloc, dwarf::DW_TAG_pointer_type);
+  FooPtr.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
   DIEEntry FooRef(Foo);
-  FooPtr.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRef);
+  FooPtr.addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRef);
 
   DIEEntry FooPtrRef(FooPtr);
-  Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooPtrRef);
+  Mem->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooPtrRef);
 
   Foo.addChild(std::move(Mem));
 
@@ -220,29 +226,31 @@ TEST_F(DIEHashTest, Pointer) {
 
 // struct foo { foo &mem; };
 TEST_F(DIEHashTest, Reference) {
-  DIE Foo(dwarf::DW_TAG_structure_type);
+  DIE &Foo = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger Eight(8);
-  Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+  Foo.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
   DIEString FooStr = getString("foo");
-  Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+  Foo.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
 
-  auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
+  auto Mem = DIE::get(Alloc, dwarf::DW_TAG_member);
   DIEString MemStr = getString("mem");
-  Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
+  Mem->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
   DIEInteger Zero(0);
-  Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, Zero);
+  Mem->addValue(Alloc, dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
+                Zero);
 
-  DIE FooRef(dwarf::DW_TAG_reference_type);
-  FooRef.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+  DIE &FooRef = *DIE::get(Alloc, dwarf::DW_TAG_reference_type);
+  FooRef.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
   DIEEntry FooEntry(Foo);
-  FooRef.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooEntry);
+  FooRef.addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooEntry);
 
-  DIE FooRefConst(dwarf::DW_TAG_const_type);
+  DIE &FooRefConst = *DIE::get(Alloc, dwarf::DW_TAG_const_type);
   DIEEntry FooRefRef(FooRef);
-  FooRefConst.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRefRef);
+  FooRefConst.addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4,
+                       FooRefRef);
 
   DIEEntry FooRefConstRef(FooRefConst);
-  Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRefConstRef);
+  Mem->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRefConstRef);
 
   Foo.addChild(std::move(Mem));
 
@@ -253,29 +261,31 @@ TEST_F(DIEHashTest, Reference) {
 
 // struct foo { foo &&mem; };
 TEST_F(DIEHashTest, RValueReference) {
-  DIE Foo(dwarf::DW_TAG_structure_type);
+  DIE &Foo = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger Eight(8);
-  Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+  Foo.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
   DIEString FooStr = getString("foo");
-  Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+  Foo.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
 
-  auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
+  auto Mem = DIE::get(Alloc, dwarf::DW_TAG_member);
   DIEString MemStr = getString("mem");
-  Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
+  Mem->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
   DIEInteger Zero(0);
-  Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, Zero);
+  Mem->addValue(Alloc, dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
+                Zero);
 
-  DIE FooRef(dwarf::DW_TAG_rvalue_reference_type);
-  FooRef.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+  DIE &FooRef = *DIE::get(Alloc, dwarf::DW_TAG_rvalue_reference_type);
+  FooRef.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
   DIEEntry FooEntry(Foo);
-  FooRef.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooEntry);
+  FooRef.addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooEntry);
 
-  DIE FooRefConst(dwarf::DW_TAG_const_type);
+  DIE &FooRefConst = *DIE::get(Alloc, dwarf::DW_TAG_const_type);
   DIEEntry FooRefRef(FooRef);
-  FooRefConst.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRefRef);
+  FooRefConst.addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4,
+                       FooRefRef);
 
   DIEEntry FooRefConstRef(FooRefConst);
-  Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRefConstRef);
+  Mem->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooRefConstRef);
 
   Foo.addChild(std::move(Mem));
 
@@ -286,26 +296,27 @@ TEST_F(DIEHashTest, RValueReference) {
 
 // struct foo { foo foo::*mem; };
 TEST_F(DIEHashTest, PtrToMember) {
-  DIE Foo(dwarf::DW_TAG_structure_type);
+  DIE &Foo = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger Eight(8);
-  Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+  Foo.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
   DIEString FooStr = getString("foo");
-  Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+  Foo.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
 
-  auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
+  auto Mem = DIE::get(Alloc, dwarf::DW_TAG_member);
   DIEString MemStr = getString("mem");
-  Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
+  Mem->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
   DIEInteger Zero(0);
-  Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, Zero);
+  Mem->addValue(Alloc, dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
+                Zero);
 
-  DIE PtrToFooMem(dwarf::DW_TAG_ptr_to_member_type);
+  DIE &PtrToFooMem = *DIE::get(Alloc, dwarf::DW_TAG_ptr_to_member_type);
   DIEEntry FooEntry(Foo);
-  PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooEntry);
-  PtrToFooMem.addValue(dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+  PtrToFooMem.addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FooEntry);
+  PtrToFooMem.addValue(Alloc, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
                        FooEntry);
 
   DIEEntry PtrToFooMemRef(PtrToFooMem);
-  Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PtrToFooMemRef);
+  Mem->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PtrToFooMemRef);
 
   Foo.addChild(std::move(Mem));
 
@@ -328,28 +339,31 @@ TEST_F(DIEHashTest, PtrToMemberDeclDefMatch) {
   DIEString MemStr = getString("mem");
   uint64_t MD5ResDecl;
   {
-    DIE Bar(dwarf::DW_TAG_structure_type);
-    Bar.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, BarStr);
-    Bar.addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, One);
+    DIE &Bar = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
+    Bar.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, BarStr);
+    Bar.addValue(Alloc, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present,
+                 One);
 
-    DIE Foo(dwarf::DW_TAG_structure_type);
-    Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
-    Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+    DIE &Foo = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
+    Foo.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+    Foo.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
 
-    auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
-    Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
-    Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
-                  Zero);
+    auto Mem = DIE::get(Alloc, dwarf::DW_TAG_member);
+    Mem->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
+    Mem->addValue(Alloc, dwarf::DW_AT_data_member_location,
+                  dwarf::DW_FORM_data1, Zero);
 
-    DIE PtrToFooMem(dwarf::DW_TAG_ptr_to_member_type);
+    DIE &PtrToFooMem = *DIE::get(Alloc, dwarf::DW_TAG_ptr_to_member_type);
     DIEEntry BarEntry(Bar);
-    PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, BarEntry);
+    PtrToFooMem.addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4,
+                         BarEntry);
     DIEEntry FooEntry(Foo);
-    PtrToFooMem.addValue(dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
-                         FooEntry);
+    PtrToFooMem.addValue(Alloc, dwarf::DW_AT_containing_type,
+                         dwarf::DW_FORM_ref4, FooEntry);
 
     DIEEntry PtrToFooMemRef(PtrToFooMem);
-    Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PtrToFooMemRef);
+    Mem->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4,
+                  PtrToFooMemRef);
 
     Foo.addChild(std::move(Mem));
 
@@ -357,28 +371,30 @@ TEST_F(DIEHashTest, PtrToMemberDeclDefMatch) {
   }
   uint64_t MD5ResDef;
   {
-    DIE Bar(dwarf::DW_TAG_structure_type);
-    Bar.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, BarStr);
-    Bar.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+    DIE &Bar = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
+    Bar.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, BarStr);
+    Bar.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
 
-    DIE Foo(dwarf::DW_TAG_structure_type);
-    Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
-    Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+    DIE &Foo = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
+    Foo.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+    Foo.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
 
-    auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
-    Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
-    Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
-                  Zero);
+    auto Mem = DIE::get(Alloc, dwarf::DW_TAG_member);
+    Mem->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
+    Mem->addValue(Alloc, dwarf::DW_AT_data_member_location,
+                  dwarf::DW_FORM_data1, Zero);
 
-    DIE PtrToFooMem(dwarf::DW_TAG_ptr_to_member_type);
+    DIE &PtrToFooMem = *DIE::get(Alloc, dwarf::DW_TAG_ptr_to_member_type);
     DIEEntry BarEntry(Bar);
-    PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, BarEntry);
+    PtrToFooMem.addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4,
+                         BarEntry);
     DIEEntry FooEntry(Foo);
-    PtrToFooMem.addValue(dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
-                         FooEntry);
+    PtrToFooMem.addValue(Alloc, dwarf::DW_AT_containing_type,
+                         dwarf::DW_FORM_ref4, FooEntry);
 
     DIEEntry PtrToFooMemRef(PtrToFooMem);
-    Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PtrToFooMemRef);
+    Mem->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4,
+                  PtrToFooMemRef);
 
     Foo.addChild(std::move(Mem));
 
@@ -401,27 +417,30 @@ TEST_F(DIEHashTest, PtrToMemberDeclDefMisMatch) {
   DIEString MemStr = getString("mem");
   uint64_t MD5ResDecl;
   {
-    DIE Bar(dwarf::DW_TAG_structure_type);
-    Bar.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, BarStr);
-    Bar.addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, One);
+    DIE &Bar = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
+    Bar.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, BarStr);
+    Bar.addValue(Alloc, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present,
+                 One);
 
-    DIE Foo(dwarf::DW_TAG_structure_type);
-    Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
-    Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+    DIE &Foo = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
+    Foo.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+    Foo.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
 
-    auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
-    Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
-    Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
-                  Zero);
+    auto Mem = DIE::get(Alloc, dwarf::DW_TAG_member);
+    Mem->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
+    Mem->addValue(Alloc, dwarf::DW_AT_data_member_location,
+                  dwarf::DW_FORM_data1, Zero);
 
-    DIE PtrToFooMem(dwarf::DW_TAG_ptr_to_member_type);
+    DIE &PtrToFooMem = *DIE::get(Alloc, dwarf::DW_TAG_ptr_to_member_type);
     DIEEntry BarEntry(Bar);
-    PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, BarEntry);
-    PtrToFooMem.addValue(dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+    PtrToFooMem.addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4,
                          BarEntry);
+    PtrToFooMem.addValue(Alloc, dwarf::DW_AT_containing_type,
+                         dwarf::DW_FORM_ref4, BarEntry);
 
     DIEEntry PtrToFooMemRef(PtrToFooMem);
-    Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PtrToFooMemRef);
+    Mem->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4,
+                  PtrToFooMemRef);
 
     Foo.addChild(std::move(Mem));
 
@@ -429,27 +448,29 @@ TEST_F(DIEHashTest, PtrToMemberDeclDefMisMatch) {
   }
   uint64_t MD5ResDef;
   {
-    DIE Bar(dwarf::DW_TAG_structure_type);
-    Bar.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, BarStr);
-    Bar.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+    DIE &Bar = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
+    Bar.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, BarStr);
+    Bar.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
 
-    DIE Foo(dwarf::DW_TAG_structure_type);
-    Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
-    Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+    DIE &Foo = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
+    Foo.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+    Foo.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
 
-    auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
-    Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
-    Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
-                  Zero);
+    auto Mem = DIE::get(Alloc, dwarf::DW_TAG_member);
+    Mem->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
+    Mem->addValue(Alloc, dwarf::DW_AT_data_member_location,
+                  dwarf::DW_FORM_data1, Zero);
 
-    DIE PtrToFooMem(dwarf::DW_TAG_ptr_to_member_type);
+    DIE &PtrToFooMem = *DIE::get(Alloc, dwarf::DW_TAG_ptr_to_member_type);
     DIEEntry BarEntry(Bar);
-    PtrToFooMem.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, BarEntry);
-    PtrToFooMem.addValue(dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+    PtrToFooMem.addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4,
                          BarEntry);
+    PtrToFooMem.addValue(Alloc, dwarf::DW_AT_containing_type,
+                         dwarf::DW_FORM_ref4, BarEntry);
 
     DIEEntry PtrToFooMemRef(PtrToFooMem);
-    Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PtrToFooMemRef);
+    Mem->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4,
+                  PtrToFooMemRef);
 
     Foo.addChild(std::move(Mem));
 
@@ -472,24 +493,27 @@ TEST_F(DIEHashTest, RefUnnamedType) {
   DIEString FooStr = getString("foo");
   DIEString MemStr = getString("mem");
 
-  DIE Unnamed(dwarf::DW_TAG_structure_type);
-  Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+  DIE &Unnamed = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
+  Unnamed.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
 
-  DIE Foo(dwarf::DW_TAG_structure_type);
-  Foo.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
-  Foo.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+  DIE &Foo = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
+  Foo.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+  Foo.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
 
-  auto Mem = make_unique<DIE>(dwarf::DW_TAG_member);
-  Mem->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
-  Mem->addValue(dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1, Zero);
+  auto Mem = DIE::get(Alloc, dwarf::DW_TAG_member);
+  Mem->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, MemStr);
+  Mem->addValue(Alloc, dwarf::DW_AT_data_member_location, dwarf::DW_FORM_data1,
+                Zero);
 
-  DIE UnnamedPtr(dwarf::DW_TAG_pointer_type);
-  UnnamedPtr.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Eight);
+  DIE &UnnamedPtr = *DIE::get(Alloc, dwarf::DW_TAG_pointer_type);
+  UnnamedPtr.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1,
+                      Eight);
   DIEEntry UnnamedRef(Unnamed);
-  UnnamedPtr.addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, UnnamedRef);
+  UnnamedPtr.addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4,
+                      UnnamedRef);
 
   DIEEntry UnnamedPtrRef(UnnamedPtr);
-  Mem->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, UnnamedPtrRef);
+  Mem->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, UnnamedPtrRef);
 
   Foo.addChild(std::move(Mem));
 
@@ -500,14 +524,14 @@ TEST_F(DIEHashTest, RefUnnamedType) {
 
 // struct { struct foo { }; };
 TEST_F(DIEHashTest, NestedType) {
-  DIE Unnamed(dwarf::DW_TAG_structure_type);
+  DIE &Unnamed = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger One(1);
-  Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+  Unnamed.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
 
-  auto Foo = make_unique<DIE>(dwarf::DW_TAG_structure_type);
+  auto Foo = DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEString FooStr = getString("foo");
-  Foo->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
-  Foo->addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+  Foo->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FooStr);
+  Foo->addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
 
   Unnamed.addChild(std::move(Foo));
 
@@ -519,13 +543,13 @@ TEST_F(DIEHashTest, NestedType) {
 
 // struct { static void func(); };
 TEST_F(DIEHashTest, MemberFunc) {
-  DIE Unnamed(dwarf::DW_TAG_structure_type);
+  DIE &Unnamed = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger One(1);
-  Unnamed.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+  Unnamed.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
 
-  auto Func = make_unique<DIE>(dwarf::DW_TAG_subprogram);
+  auto Func = DIE::get(Alloc, dwarf::DW_TAG_subprogram);
   DIEString FuncStr = getString("func");
-  Func->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FuncStr);
+  Func->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FuncStr);
 
   Unnamed.addChild(std::move(Func));
 
@@ -539,24 +563,27 @@ TEST_F(DIEHashTest, MemberFunc) {
 //   static void func();
 // };
 TEST_F(DIEHashTest, MemberFuncFlag) {
-  DIE A(dwarf::DW_TAG_structure_type);
+  DIE &A = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger One(1);
   DIEString AStr = getString("A");
-  A.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, AStr);
-  A.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
-  A.addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
-  A.addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, One);
+  A.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, AStr);
+  A.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+  A.addValue(Alloc, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+  A.addValue(Alloc, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, One);
 
-  auto Func = make_unique<DIE>(dwarf::DW_TAG_subprogram);
+  auto Func = DIE::get(Alloc, dwarf::DW_TAG_subprogram);
   DIEString FuncStr = getString("func");
   DIEString FuncLinkage = getString("_ZN1A4funcEv");
   DIEInteger Two(2);
-  Func->addValue(dwarf::DW_AT_external, dwarf::DW_FORM_flag_present, One);
-  Func->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FuncStr);
-  Func->addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
-  Func->addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, Two);
-  Func->addValue(dwarf::DW_AT_linkage_name, dwarf::DW_FORM_strp, FuncLinkage);
-  Func->addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, One);
+  Func->addValue(Alloc, dwarf::DW_AT_external, dwarf::DW_FORM_flag_present,
+                 One);
+  Func->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FuncStr);
+  Func->addValue(Alloc, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+  Func->addValue(Alloc, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, Two);
+  Func->addValue(Alloc, dwarf::DW_AT_linkage_name, dwarf::DW_FORM_strp,
+                 FuncLinkage);
+  Func->addValue(Alloc, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present,
+                 One);
 
   A.addChild(std::move(Func));
 
@@ -572,38 +599,39 @@ TEST_F(DIEHashTest, MemberFuncFlag) {
 // };
 // A a;
 TEST_F(DIEHashTest, MemberSdata) {
-  DIE A(dwarf::DW_TAG_structure_type);
+  DIE &A = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger One(1);
   DIEString AStr = getString("A");
-  A.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, AStr);
-  A.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
-  A.addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
-  A.addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, One);
+  A.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, AStr);
+  A.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+  A.addValue(Alloc, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+  A.addValue(Alloc, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, One);
 
   DIEInteger Four(4);
   DIEInteger Five(5);
   DIEString FStr = getString("int");
-  DIE IntTyDIE(dwarf::DW_TAG_base_type);
-  IntTyDIE.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
-  IntTyDIE.addValue(dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Five);
-  IntTyDIE.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FStr);
+  DIE &IntTyDIE = *DIE::get(Alloc, dwarf::DW_TAG_base_type);
+  IntTyDIE.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
+  IntTyDIE.addValue(Alloc, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Five);
+  IntTyDIE.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FStr);
 
   DIEEntry IntTy(IntTyDIE);
-  auto PITyDIE = make_unique<DIE>(dwarf::DW_TAG_const_type);
-  PITyDIE->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IntTy);
+  auto PITyDIE = DIE::get(Alloc, dwarf::DW_TAG_const_type);
+  PITyDIE->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IntTy);
 
   DIEEntry PITy(*PITyDIE);
-  auto PI = make_unique<DIE>(dwarf::DW_TAG_member);
+  auto PI = DIE::get(Alloc, dwarf::DW_TAG_member);
   DIEString PIStr = getString("PI");
   DIEInteger Two(2);
   DIEInteger NegThree(-3);
-  PI->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, PIStr);
-  PI->addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
-  PI->addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, Two);
-  PI->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PITy);
-  PI->addValue(dwarf::DW_AT_external, dwarf::DW_FORM_flag_present, One);
-  PI->addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, One);
-  PI->addValue(dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, NegThree);
+  PI->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, PIStr);
+  PI->addValue(Alloc, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+  PI->addValue(Alloc, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, Two);
+  PI->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PITy);
+  PI->addValue(Alloc, dwarf::DW_AT_external, dwarf::DW_FORM_flag_present, One);
+  PI->addValue(Alloc, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present,
+               One);
+  PI->addValue(Alloc, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, NegThree);
 
   A.addChild(std::move(PI));
 
@@ -617,35 +645,37 @@ TEST_F(DIEHashTest, MemberSdata) {
 // };
 // A a;
 TEST_F(DIEHashTest, MemberBlock) {
-  DIE A(dwarf::DW_TAG_structure_type);
+  DIE &A = *DIE::get(Alloc, dwarf::DW_TAG_structure_type);
   DIEInteger One(1);
   DIEString AStr = getString("A");
-  A.addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, AStr);
-  A.addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
-  A.addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
-  A.addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, One);
+  A.addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, AStr);
+  A.addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, One);
+  A.addValue(Alloc, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+  A.addValue(Alloc, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, One);
 
   DIEInteger Four(4);
   DIEString FStr = getString("float");
-  auto FloatTyDIE = make_unique<DIE>(dwarf::DW_TAG_base_type);
-  FloatTyDIE->addValue(dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1, Four);
-  FloatTyDIE->addValue(dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Four);
-  FloatTyDIE->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, FStr);
-
+  auto FloatTyDIE = DIE::get(Alloc, dwarf::DW_TAG_base_type);
+  FloatTyDIE->addValue(Alloc, dwarf::DW_AT_byte_size, dwarf::DW_FORM_data1,
+                       Four);
+  FloatTyDIE->addValue(Alloc, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+                       Four);
+  FloatTyDIE->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, FStr);
   DIEEntry FloatTy(*FloatTyDIE);
-  auto PITyDIE = make_unique<DIE>(dwarf::DW_TAG_const_type);
-  PITyDIE->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FloatTy);
+  auto PITyDIE = DIE::get(Alloc, dwarf::DW_TAG_const_type);
+  PITyDIE->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, FloatTy);
 
   DIEEntry PITy(*PITyDIE);
-  auto PI = make_unique<DIE>(dwarf::DW_TAG_member);
+  auto PI = DIE::get(Alloc, dwarf::DW_TAG_member);
   DIEString PIStr = getString("PI");
   DIEInteger Two(2);
-  PI->addValue(dwarf::DW_AT_name, dwarf::DW_FORM_strp, PIStr);
-  PI->addValue(dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
-  PI->addValue(dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, Two);
-  PI->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PITy);
-  PI->addValue(dwarf::DW_AT_external, dwarf::DW_FORM_flag_present, One);
-  PI->addValue(dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present, One);
+  PI->addValue(Alloc, dwarf::DW_AT_name, dwarf::DW_FORM_strp, PIStr);
+  PI->addValue(Alloc, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data1, One);
+  PI->addValue(Alloc, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data1, Two);
+  PI->addValue(Alloc, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, PITy);
+  PI->addValue(Alloc, dwarf::DW_AT_external, dwarf::DW_FORM_flag_present, One);
+  PI->addValue(Alloc, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag_present,
+               One);
 
   DIEBlock PIBlock;
   DIEInteger Blk1(0xc3);
@@ -653,12 +683,13 @@ TEST_F(DIEHashTest, MemberBlock) {
   DIEInteger Blk3(0x48);
   DIEInteger Blk4(0x40);
 
-  PIBlock.addValue((dwarf::Attribute)0, dwarf::DW_FORM_data1, Blk1);
-  PIBlock.addValue((dwarf::Attribute)0, dwarf::DW_FORM_data1, Blk2);
-  PIBlock.addValue((dwarf::Attribute)0, dwarf::DW_FORM_data1, Blk3);
-  PIBlock.addValue((dwarf::Attribute)0, dwarf::DW_FORM_data1, Blk4);
+  PIBlock.addValue(Alloc, (dwarf::Attribute)0, dwarf::DW_FORM_data1, Blk1);
+  PIBlock.addValue(Alloc, (dwarf::Attribute)0, dwarf::DW_FORM_data1, Blk2);
+  PIBlock.addValue(Alloc, (dwarf::Attribute)0, dwarf::DW_FORM_data1, Blk3);
+  PIBlock.addValue(Alloc, (dwarf::Attribute)0, dwarf::DW_FORM_data1, Blk4);
 
-  PI->addValue(dwarf::DW_AT_const_value, dwarf::DW_FORM_block1, &PIBlock);
+  PI->addValue(Alloc, dwarf::DW_AT_const_value, dwarf::DW_FORM_block1,
+               &PIBlock);
 
   A.addChild(std::move(PI));
 
diff --git a/unittests/ExecutionEngine/Orc/CMakeLists.txt b/unittests/ExecutionEngine/Orc/CMakeLists.txt
index 67b215e..30bd19f 100644
--- a/unittests/ExecutionEngine/Orc/CMakeLists.txt
+++ b/unittests/ExecutionEngine/Orc/CMakeLists.txt
@@ -7,5 +7,6 @@ set(LLVM_LINK_COMPONENTS
 add_llvm_unittest(OrcJITTests
   IndirectionUtilsTest.cpp
   LazyEmittingLayerTest.cpp
+  ObjectTransformLayerTest.cpp
   OrcTestCommon.cpp
   )
diff --git a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp
new file mode 100644
index 0000000..41b2307
--- /dev/null
+++ b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp
@@ -0,0 +1,302 @@
+//===- ObjectTransformLayerTest.cpp - Unit tests for ObjectTransformLayer -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "gtest/gtest.h"
+
+using namespace llvm::orc;
+
+namespace {
+
+// Stand-in for RuntimeDyld::MemoryManager
+typedef int MockMemoryManager;
+
+// Stand-in for RuntimeDyld::SymbolResolver
+typedef int MockSymbolResolver;
+
+// stand-in for object::ObjectFile
+typedef int MockObjectFile;
+
+// stand-in for llvm::MemoryBuffer set
+typedef int MockMemoryBufferSet;
+
+// Mock transform that operates on unique pointers to object files, and
+// allocates new object files rather than mutating the given ones.
+struct AllocatingTransform {
+  std::unique_ptr<MockObjectFile>
+  operator()(std::unique_ptr<MockObjectFile> Obj) const {
+    return llvm::make_unique<MockObjectFile>(*Obj + 1);
+  }
+};
+
+// Mock base layer for verifying behavior of transform layer.
+// Each method "T foo(args)" is accompanied by two auxiliary methods:
+//  - "void expectFoo(args)", to be called before calling foo on the transform
+//      layer; saves values of args, which mock layer foo then verifies against.
+// - "void verifyFoo(T)", to be called after foo, which verifies that the
+//      transform layer called the base layer and forwarded any return value.
+class MockBaseLayer {
+public:
+  typedef int ObjSetHandleT;
+
+  MockBaseLayer() : MockSymbol(nullptr) { resetExpectations(); }
+
+  template <typename ObjSetT, typename MemoryManagerPtrT,
+            typename SymbolResolverPtrT>
+  ObjSetHandleT addObjectSet(ObjSetT &Objects, MemoryManagerPtrT MemMgr,
+                             SymbolResolverPtrT Resolver) {
+    EXPECT_EQ(MockManager, *MemMgr) << "MM should pass through";
+    EXPECT_EQ(MockResolver, *Resolver) << "Resolver should pass through";
+    size_t I = 0;
+    for (auto &ObjPtr : Objects) {
+      EXPECT_EQ(MockObjects[I++] + 1, *ObjPtr) << "Transform should be applied";
+    }
+    EXPECT_EQ(MockObjects.size(), I) << "Number of objects should match";
+    LastCalled = "addObjectSet";
+    MockObjSetHandle = 111;
+    return MockObjSetHandle;
+  }
+  template <typename ObjSetT>
+  void expectAddObjectSet(ObjSetT &Objects, MockMemoryManager *MemMgr,
+                          MockSymbolResolver *Resolver) {
+    MockManager = *MemMgr;
+    MockResolver = *Resolver;
+    for (auto &ObjPtr : Objects) {
+      MockObjects.push_back(*ObjPtr);
+    }
+  }
+  void verifyAddObjectSet(ObjSetHandleT Returned) {
+    EXPECT_EQ("addObjectSet", LastCalled);
+    EXPECT_EQ(MockObjSetHandle, Returned) << "Return should pass through";
+    resetExpectations();
+  }
+
+  void removeObjectSet(ObjSetHandleT H) {
+    EXPECT_EQ(MockObjSetHandle, H);
+    LastCalled = "removeObjectSet";
+  }
+  void expectRemoveObjectSet(ObjSetHandleT H) { MockObjSetHandle = H; }
+  void verifyRemoveObjectSet() {
+    EXPECT_EQ("removeObjectSet", LastCalled);
+    resetExpectations();
+  }
+
+  JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
+    EXPECT_EQ(MockName, Name) << "Name should pass through";
+    EXPECT_EQ(MockBool, ExportedSymbolsOnly) << "Flag should pass through";
+    LastCalled = "findSymbol";
+    MockSymbol = JITSymbol(122, llvm::JITSymbolFlags::None);
+    return MockSymbol;
+  }
+  void expectFindSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
+    MockName = Name;
+    MockBool = ExportedSymbolsOnly;
+  }
+  void verifyFindSymbol(llvm::orc::JITSymbol Returned) {
+    EXPECT_EQ("findSymbol", LastCalled);
+    EXPECT_EQ(MockSymbol.getAddress(), Returned.getAddress())
+        << "Return should pass through";
+    resetExpectations();
+  }
+
+  JITSymbol findSymbolIn(ObjSetHandleT H, const std::string &Name,
+                         bool ExportedSymbolsOnly) {
+    EXPECT_EQ(MockObjSetHandle, H) << "Handle should pass through";
+    EXPECT_EQ(MockName, Name) << "Name should pass through";
+    EXPECT_EQ(MockBool, ExportedSymbolsOnly) << "Flag should pass through";
+    LastCalled = "findSymbolIn";
+    MockSymbol = JITSymbol(122, llvm::JITSymbolFlags::None);
+    return MockSymbol;
+  }
+  void expectFindSymbolIn(ObjSetHandleT H, const std::string &Name,
+                          bool ExportedSymbolsOnly) {
+    MockObjSetHandle = H;
+    MockName = Name;
+    MockBool = ExportedSymbolsOnly;
+  }
+  void verifyFindSymbolIn(llvm::orc::JITSymbol Returned) {
+    EXPECT_EQ("findSymbolIn", LastCalled);
+    EXPECT_EQ(MockSymbol.getAddress(), Returned.getAddress())
+        << "Return should pass through";
+    resetExpectations();
+  }
+
+  void emitAndFinalize(ObjSetHandleT H) {
+    EXPECT_EQ(MockObjSetHandle, H) << "Handle should pass through";
+    LastCalled = "emitAndFinalize";
+  }
+  void expectEmitAndFinalize(ObjSetHandleT H) { MockObjSetHandle = H; }
+  void verifyEmitAndFinalize() {
+    EXPECT_EQ("emitAndFinalize", LastCalled);
+    resetExpectations();
+  }
+
+  void mapSectionAddress(ObjSetHandleT H, const void *LocalAddress,
+                         TargetAddress TargetAddr) {
+    EXPECT_EQ(MockObjSetHandle, H);
+    EXPECT_EQ(MockLocalAddress, LocalAddress);
+    EXPECT_EQ(MockTargetAddress, TargetAddr);
+    LastCalled = "mapSectionAddress";
+  }
+  void expectMapSectionAddress(ObjSetHandleT H, const void *LocalAddress,
+                               TargetAddress TargetAddr) {
+    MockObjSetHandle = H;
+    MockLocalAddress = LocalAddress;
+    MockTargetAddress = TargetAddr;
+  }
+  void verifyMapSectionAddress() {
+    EXPECT_EQ("mapSectionAddress", LastCalled);
+    resetExpectations();
+  }
+
+  template <typename OwningMBSet>
+  void takeOwnershipOfBuffers(ObjSetHandleT H, OwningMBSet MBs) {
+    EXPECT_EQ(MockObjSetHandle, H);
+    EXPECT_EQ(MockBufferSet, *MBs);
+    LastCalled = "takeOwnershipOfBuffers";
+  }
+  void expectTakeOwnershipOfBuffers(ObjSetHandleT H, MockMemoryBufferSet *MBs) {
+    MockObjSetHandle = H;
+    MockBufferSet = *MBs;
+  }
+  void verifyTakeOwnershipOfBuffers() {
+    EXPECT_EQ("takeOwnershipOfBuffers", LastCalled);
+    resetExpectations();
+  }
+
+private:
+  // Backing fields for remembering parameter/return values
+  std::string LastCalled;
+  MockMemoryManager MockManager;
+  MockSymbolResolver MockResolver;
+  std::vector<MockObjectFile> MockObjects;
+  ObjSetHandleT MockObjSetHandle;
+  std::string MockName;
+  bool MockBool;
+  JITSymbol MockSymbol;
+  const void *MockLocalAddress;
+  TargetAddress MockTargetAddress;
+  MockMemoryBufferSet MockBufferSet;
+
+  // Clear remembered parameters between calls
+  void resetExpectations() {
+    LastCalled = "nothing";
+    MockManager = 0;
+    MockResolver = 0;
+    MockObjects.clear();
+    MockObjSetHandle = 0;
+    MockName = "bogus";
+    MockSymbol = JITSymbol(nullptr);
+    MockLocalAddress = nullptr;
+    MockTargetAddress = 0;
+    MockBufferSet = 0;
+  }
+};
+
+// Test each operation on ObjectTransformLayer.
+TEST(ObjectTransformLayerTest, Main) {
+  MockBaseLayer M;
+
+  // Create one object transform layer using a transform (as a functor)
+  // that allocates new objects, and deals in unique pointers.
+  ObjectTransformLayer<MockBaseLayer, AllocatingTransform> T1(M);
+
+  // Create a second object transform layer using a transform (as a lambda)
+  // that mutates objects in place, and deals in naked pointers
+  ObjectTransformLayer<MockBaseLayer,
+                       std::function<MockObjectFile *(MockObjectFile *)>>
+  T2(M, [](MockObjectFile *Obj) {
+    ++(*Obj);
+    return Obj;
+  });
+
+  // Instantiate some mock objects to use below
+  MockObjectFile MockObject1 = 211;
+  MockObjectFile MockObject2 = 222;
+  MockMemoryManager MockManager = 233;
+  MockSymbolResolver MockResolver = 244;
+
+  // Test addObjectSet with T1 (allocating, unique pointers)
+  std::vector<std::unique_ptr<MockObjectFile>> Objs1;
+  Objs1.push_back(llvm::make_unique<MockObjectFile>(MockObject1));
+  Objs1.push_back(llvm::make_unique<MockObjectFile>(MockObject2));
+  auto MM = llvm::make_unique<MockMemoryManager>(MockManager);
+  auto SR = llvm::make_unique<MockSymbolResolver>(MockResolver);
+  M.expectAddObjectSet(Objs1, MM.get(), SR.get());
+  auto H = T1.addObjectSet(Objs1, std::move(MM), std::move(SR));
+  M.verifyAddObjectSet(H);
+
+  // Test addObjectSet with T2 (mutating, naked pointers)
+  llvm::SmallVector<MockObjectFile *, 2> Objs2;
+  Objs2.push_back(&MockObject1);
+  Objs2.push_back(&MockObject2);
+  M.expectAddObjectSet(Objs2, &MockManager, &MockResolver);
+  H = T2.addObjectSet(Objs2, &MockManager, &MockResolver);
+  M.verifyAddObjectSet(H);
+  EXPECT_EQ(212, MockObject1) << "Expected mutation";
+  EXPECT_EQ(223, MockObject2) << "Expected mutation";
+
+  // Test removeObjectSet
+  M.expectRemoveObjectSet(H);
+  T1.removeObjectSet(H);
+  M.verifyRemoveObjectSet();
+
+  // Test findSymbol
+  std::string Name = "foo";
+  bool ExportedOnly = true;
+  M.expectFindSymbol(Name, ExportedOnly);
+  JITSymbol Symbol = T2.findSymbol(Name, ExportedOnly);
+  M.verifyFindSymbol(Symbol);
+
+  // Test findSymbolIn
+  Name = "bar";
+  ExportedOnly = false;
+  M.expectFindSymbolIn(H, Name, ExportedOnly);
+  Symbol = T1.findSymbolIn(H, Name, ExportedOnly);
+  M.verifyFindSymbolIn(Symbol);
+
+  // Test emitAndFinalize
+  M.expectEmitAndFinalize(H);
+  T2.emitAndFinalize(H);
+  M.verifyEmitAndFinalize();
+
+  // Test mapSectionAddress
+  char Buffer[24];
+  TargetAddress MockAddress = 255;
+  M.expectMapSectionAddress(H, Buffer, MockAddress);
+  T1.mapSectionAddress(H, Buffer, MockAddress);
+  M.verifyMapSectionAddress();
+
+  // Test takeOwnershipOfBuffers, using unique pointer to buffer set
+  auto MockBufferSetPtr = llvm::make_unique<MockMemoryBufferSet>(366);
+  M.expectTakeOwnershipOfBuffers(H, MockBufferSetPtr.get());
+  T2.takeOwnershipOfBuffers(H, std::move(MockBufferSetPtr));
+  M.verifyTakeOwnershipOfBuffers();
+
+  // Test takeOwnershipOfBuffers, using naked pointer to buffer set
+  MockMemoryBufferSet MockBufferSet = 266;
+  M.expectTakeOwnershipOfBuffers(H, &MockBufferSet);
+  T1.takeOwnershipOfBuffers(H, &MockBufferSet);
+  M.verifyTakeOwnershipOfBuffers();
+
+  // Verify transform getter (non-const)
+  MockObjectFile Mutatee = 277;
+  MockObjectFile *Out = T2.getTransform()(&Mutatee);
+  EXPECT_EQ(&Mutatee, Out) << "Expected in-place transform";
+  EXPECT_EQ(278, Mutatee) << "Expected incrementing transform";
+
+  // Verify transform getter (const)
+  auto OwnedObj = llvm::make_unique<MockObjectFile>(288);
+  const auto &T1C = T1;
+  OwnedObj = T1C.getTransform()(std::move(OwnedObj));
+  EXPECT_EQ(289, *OwnedObj) << "Expected incrementing transform";
+}
+}
diff --git a/unittests/IR/IRBuilderTest.cpp b/unittests/IR/IRBuilderTest.cpp
index f189349..f3db68f 100644
--- a/unittests/IR/IRBuilderTest.cpp
+++ b/unittests/IR/IRBuilderTest.cpp
@@ -333,4 +333,39 @@ TEST_F(IRBuilderTest, CreateGlobalStringPtr) {
   EXPECT_TRUE(String2->getType()->getPointerAddressSpace() == 1);
   EXPECT_TRUE(String3->getType()->getPointerAddressSpace() == 2);
 }
+
+TEST_F(IRBuilderTest, DebugLoc) {
+  auto CalleeTy = FunctionType::get(Type::getVoidTy(Ctx),
+                                    /*isVarArg=*/false);
+  auto Callee =
+      Function::Create(CalleeTy, Function::ExternalLinkage, "", M.get());
+
+  DIBuilder DIB(*M);
+  auto File = DIB.createFile("tmp.cpp", "/");
+  auto CU = DIB.createCompileUnit(dwarf::DW_LANG_C_plus_plus_11, "tmp.cpp", "/",
+                                  "", true, "", 0);
+  auto SPType = DIB.createSubroutineType(File, DIB.getOrCreateTypeArray(None));
+  auto SP =
+      DIB.createFunction(CU, "foo", "foo", File, 1, SPType, false, true, 1);
+  DebugLoc DL1 = DILocation::get(Ctx, 2, 0, SP);
+  DebugLoc DL2 = DILocation::get(Ctx, 3, 0, SP);
+
+  auto BB2 = BasicBlock::Create(Ctx, "bb2", F);
+  auto Br = BranchInst::Create(BB2, BB);
+  Br->setDebugLoc(DL1);
+
+  IRBuilder<> Builder(Ctx);
+  Builder.SetInsertPoint(Br);
+  EXPECT_EQ(DL1, Builder.getCurrentDebugLocation());
+  auto Call1 = Builder.CreateCall(Callee, None);
+  EXPECT_EQ(DL1, Call1->getDebugLoc());
+
+  Call1->setDebugLoc(DL2);
+  Builder.SetInsertPoint(Call1->getParent(), Call1);
+  EXPECT_EQ(DL2, Builder.getCurrentDebugLocation());
+  auto Call2 = Builder.CreateCall(Callee, None);
+  EXPECT_EQ(DL2, Call2->getDebugLoc());
+
+  DIB.finalize();
+}
 }
diff --git a/unittests/IR/MetadataTest.cpp b/unittests/IR/MetadataTest.cpp
index b255ba8..b58615c 100644
--- a/unittests/IR/MetadataTest.cpp
+++ b/unittests/IR/MetadataTest.cpp
@@ -16,6 +16,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Support/raw_ostream.h"
@@ -356,6 +357,10 @@ TEST_F(MDNodeTest, PrintFromFunction) {
 
   EXPECT_PRINTER_EQ("!0 = distinct !{}", N0->print(OS, &M));
   EXPECT_PRINTER_EQ("!1 = distinct !{}", N1->print(OS, &M));
+
+  ModuleSlotTracker MST(&M);
+  EXPECT_PRINTER_EQ("!0 = distinct !{}", N0->print(OS, MST));
+  EXPECT_PRINTER_EQ("!1 = distinct !{}", N1->print(OS, MST));
 }
 
 TEST_F(MDNodeTest, PrintFromMetadataAsValue) {
@@ -384,6 +389,14 @@ TEST_F(MDNodeTest, PrintFromMetadataAsValue) {
   EXPECT_PRINTER_EQ("!1", MAV1->printAsOperand(OS, false));
   EXPECT_PRINTER_EQ("metadata !0", MAV0->printAsOperand(OS, true));
   EXPECT_PRINTER_EQ("metadata !1", MAV1->printAsOperand(OS, true));
+
+  ModuleSlotTracker MST(&M);
+  EXPECT_PRINTER_EQ("!0 = distinct !{}", MAV0->print(OS, MST));
+  EXPECT_PRINTER_EQ("!1 = distinct !{}", MAV1->print(OS, MST));
+  EXPECT_PRINTER_EQ("!0", MAV0->printAsOperand(OS, false, MST));
+  EXPECT_PRINTER_EQ("!1", MAV1->printAsOperand(OS, false, MST));
+  EXPECT_PRINTER_EQ("metadata !0", MAV0->printAsOperand(OS, true, MST));
+  EXPECT_PRINTER_EQ("metadata !1", MAV1->printAsOperand(OS, true, MST));
 }
 #undef EXPECT_PRINTER_EQ
 
@@ -1691,6 +1704,40 @@ TEST_F(DINamespaceTest, get) {
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
 }
 
+typedef MetadataTest DIModuleTest;
+
+TEST_F(DIModuleTest, get) {
+  DIScope *Scope = getFile();
+  StringRef Name = "module";
+  StringRef ConfigMacro = "-DNDEBUG";
+  StringRef Includes = "-I.";
+  StringRef Sysroot = "/";
+
+  auto *N = DIModule::get(Context, Scope, Name, ConfigMacro, Includes, Sysroot);
+
+  EXPECT_EQ(dwarf::DW_TAG_module, N->getTag());
+  EXPECT_EQ(Scope, N->getScope());
+  EXPECT_EQ(Name, N->getName());
+  EXPECT_EQ(ConfigMacro, N->getConfigurationMacros());
+  EXPECT_EQ(Includes, N->getIncludePath());
+  EXPECT_EQ(Sysroot, N->getISysRoot());
+  EXPECT_EQ(N, DIModule::get(Context, Scope, Name,
+                             ConfigMacro, Includes, Sysroot));
+  EXPECT_NE(N, DIModule::get(Context, getFile(), Name,
+                             ConfigMacro, Includes, Sysroot));
+  EXPECT_NE(N, DIModule::get(Context, Scope, "other",
+                             ConfigMacro, Includes, Sysroot));
+  EXPECT_NE(N, DIModule::get(Context, Scope, Name,
+                             "other", Includes, Sysroot));
+  EXPECT_NE(N, DIModule::get(Context, Scope, Name,
+                             ConfigMacro, "other", Sysroot));
+  EXPECT_NE(N, DIModule::get(Context, Scope, Name,
+                             ConfigMacro, Includes, "other"));
+
+  TempDIModule Temp = N->clone();
+  EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
+}
+
 typedef MetadataTest DITemplateTypeParameterTest;
 
 TEST_F(DITemplateTypeParameterTest, get) {
diff --git a/unittests/IR/ValueTest.cpp b/unittests/IR/ValueTest.cpp
index 4dd0c2c..32d66a1 100644
--- a/unittests/IR/ValueTest.cpp
+++ b/unittests/IR/ValueTest.cpp
@@ -11,6 +11,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/SourceMgr.h"
 #include "gtest/gtest.h"
@@ -106,4 +107,72 @@ TEST(GlobalTest, AlignDeath) {
 #endif
 #endif
 
+TEST(ValueTest, printSlots) {
+  // Check that Value::print() and Value::printAsOperand() work with and
+  // without a slot tracker.
+  LLVMContext C;
+
+  const char *ModuleString = "define void @f(i32 %x, i32 %y) {\n"
+                             "entry:\n"
+                             "  %0 = add i32 %y, 1\n"
+                             "  %1 = add i32 %y, 1\n"
+                             "  ret void\n"
+                             "}\n";
+  SMDiagnostic Err;
+  std::unique_ptr<Module> M = parseAssemblyString(ModuleString, Err, C);
+
+  Function *F = M->getFunction("f");
+  ASSERT_TRUE(F);
+  ASSERT_FALSE(F->empty());
+  BasicBlock &BB = F->getEntryBlock();
+  ASSERT_EQ(3u, BB.size());
+
+  Instruction *I0 = BB.begin();
+  ASSERT_TRUE(I0);
+  Instruction *I1 = ++BB.begin();
+  ASSERT_TRUE(I1);
+
+  ModuleSlotTracker MST(M.get());
+
+#define CHECK_PRINT(INST, STR)                                                 \
+  do {                                                                         \
+    {                                                                          \
+      std::string S;                                                           \
+      raw_string_ostream OS(S);                                                \
+      INST->print(OS);                                                         \
+      EXPECT_EQ(STR, OS.str());                                                \
+    }                                                                          \
+    {                                                                          \
+      std::string S;                                                           \
+      raw_string_ostream OS(S);                                                \
+      INST->print(OS, MST);                                                    \
+      EXPECT_EQ(STR, OS.str());                                                \
+    }                                                                          \
+  } while (false)
+  CHECK_PRINT(I0, "  %0 = add i32 %y, 1");
+  CHECK_PRINT(I1, "  %1 = add i32 %y, 1");
+#undef CHECK_PRINT
+
+#define CHECK_PRINT_AS_OPERAND(INST, TYPE, STR)                                \
+  do {                                                                         \
+    {                                                                          \
+      std::string S;                                                           \
+      raw_string_ostream OS(S);                                                \
+      INST->printAsOperand(OS, TYPE);                                          \
+      EXPECT_EQ(StringRef(STR), StringRef(OS.str()));                          \
+    }                                                                          \
+    {                                                                          \
+      std::string S;                                                           \
+      raw_string_ostream OS(S);                                                \
+      INST->printAsOperand(OS, TYPE, MST);                                     \
+      EXPECT_EQ(StringRef(STR), StringRef(OS.str()));                          \
+    }                                                                          \
+  } while (false)
+  CHECK_PRINT_AS_OPERAND(I0, false, "%0");
+  CHECK_PRINT_AS_OPERAND(I1, false, "%1");
+  CHECK_PRINT_AS_OPERAND(I0, true, "i32 %0");
+  CHECK_PRINT_AS_OPERAND(I1, true, "i32 %1");
+#undef CHECK_PRINT_AS_OPERAND
+}
+
 } // end anonymous namespace
diff --git a/unittests/Option/OptionParsingTest.cpp b/unittests/Option/OptionParsingTest.cpp
index 521009a..55cf8a9 100644
--- a/unittests/Option/OptionParsingTest.cpp
+++ b/unittests/Option/OptionParsingTest.cpp
@@ -67,27 +67,26 @@ const char *Args[] = {
 TEST(Option, OptionParsing) {
   TestOptTable T;
   unsigned MAI, MAC;
-  std::unique_ptr<InputArgList> AL(
-      T.ParseArgs(std::begin(Args), std::end(Args), MAI, MAC));
+  InputArgList AL = T.ParseArgs(Args, MAI, MAC);
 
   // Check they all exist.
-  EXPECT_TRUE(AL->hasArg(OPT_A));
-  EXPECT_TRUE(AL->hasArg(OPT_B));
-  EXPECT_TRUE(AL->hasArg(OPT_C));
-  EXPECT_TRUE(AL->hasArg(OPT_D));
-  EXPECT_TRUE(AL->hasArg(OPT_E));
-  EXPECT_TRUE(AL->hasArg(OPT_F));
-  EXPECT_TRUE(AL->hasArg(OPT_G));
+  EXPECT_TRUE(AL.hasArg(OPT_A));
+  EXPECT_TRUE(AL.hasArg(OPT_B));
+  EXPECT_TRUE(AL.hasArg(OPT_C));
+  EXPECT_TRUE(AL.hasArg(OPT_D));
+  EXPECT_TRUE(AL.hasArg(OPT_E));
+  EXPECT_TRUE(AL.hasArg(OPT_F));
+  EXPECT_TRUE(AL.hasArg(OPT_G));
 
   // Check the values.
-  EXPECT_EQ(AL->getLastArgValue(OPT_B), "hi");
-  EXPECT_EQ(AL->getLastArgValue(OPT_C), "bye");
-  EXPECT_EQ(AL->getLastArgValue(OPT_D), "adena");
-  std::vector<std::string> Es = AL->getAllArgValues(OPT_E);
+  EXPECT_EQ(AL.getLastArgValue(OPT_B), "hi");
+  EXPECT_EQ(AL.getLastArgValue(OPT_C), "bye");
+  EXPECT_EQ(AL.getLastArgValue(OPT_D), "adena");
+  std::vector<std::string> Es = AL.getAllArgValues(OPT_E);
   EXPECT_EQ(Es[0], "apple");
   EXPECT_EQ(Es[1], "bloom");
-  EXPECT_EQ(AL->getLastArgValue(OPT_F), "42");
-  std::vector<std::string> Gs = AL->getAllArgValues(OPT_G);
+  EXPECT_EQ(AL.getLastArgValue(OPT_F), "42");
+  std::vector<std::string> Gs = AL.getAllArgValues(OPT_G);
   EXPECT_EQ(Gs[0], "chuu");
   EXPECT_EQ(Gs[1], "2");
 
@@ -98,11 +97,11 @@ TEST(Option, OptionParsing) {
   EXPECT_NE(Help.find("-A"), std::string::npos);
 
   // Test aliases.
-  arg_iterator Cs = AL->filtered_begin(OPT_C);
-  ASSERT_NE(Cs, AL->filtered_end());
+  arg_iterator Cs = AL.filtered_begin(OPT_C);
+  ASSERT_NE(Cs, AL.filtered_end());
   EXPECT_EQ(StringRef((*Cs)->getValue()), "desu");
   ArgStringList ASL;
-  (*Cs)->render(*AL, ASL);
+  (*Cs)->render(AL, ASL);
   ASSERT_EQ(ASL.size(), 2u);
   EXPECT_EQ(StringRef(ASL[0]), "-C");
   EXPECT_EQ(StringRef(ASL[1]), "desu");
@@ -111,30 +110,29 @@ TEST(Option, OptionParsing) {
 TEST(Option, ParseWithFlagExclusions) {
   TestOptTable T;
   unsigned MAI, MAC;
-  std::unique_ptr<InputArgList> AL;
 
   // Exclude flag3 to avoid parsing as OPT_SLASH_C.
-  AL.reset(T.ParseArgs(std::begin(Args), std::end(Args), MAI, MAC,
-                       /*FlagsToInclude=*/0,
-                       /*FlagsToExclude=*/OptFlag3));
-  EXPECT_TRUE(AL->hasArg(OPT_A));
-  EXPECT_TRUE(AL->hasArg(OPT_C));
-  EXPECT_FALSE(AL->hasArg(OPT_SLASH_C));
+  InputArgList AL = T.ParseArgs(Args, MAI, MAC,
+                                /*FlagsToInclude=*/0,
+                                /*FlagsToExclude=*/OptFlag3);
+  EXPECT_TRUE(AL.hasArg(OPT_A));
+  EXPECT_TRUE(AL.hasArg(OPT_C));
+  EXPECT_FALSE(AL.hasArg(OPT_SLASH_C));
 
   // Exclude flag1 to avoid parsing as OPT_C.
-  AL.reset(T.ParseArgs(std::begin(Args), std::end(Args), MAI, MAC,
-                       /*FlagsToInclude=*/0,
-                       /*FlagsToExclude=*/OptFlag1));
-  EXPECT_TRUE(AL->hasArg(OPT_B));
-  EXPECT_FALSE(AL->hasArg(OPT_C));
-  EXPECT_TRUE(AL->hasArg(OPT_SLASH_C));
+  AL = T.ParseArgs(Args, MAI, MAC,
+                   /*FlagsToInclude=*/0,
+                   /*FlagsToExclude=*/OptFlag1);
+  EXPECT_TRUE(AL.hasArg(OPT_B));
+  EXPECT_FALSE(AL.hasArg(OPT_C));
+  EXPECT_TRUE(AL.hasArg(OPT_SLASH_C));
 
   const char *NewArgs[] = { "/C", "foo", "--C=bar" };
-  AL.reset(T.ParseArgs(std::begin(NewArgs), std::end(NewArgs), MAI, MAC));
-  EXPECT_TRUE(AL->hasArg(OPT_SLASH_C));
-  EXPECT_TRUE(AL->hasArg(OPT_C));
-  EXPECT_EQ(AL->getLastArgValue(OPT_SLASH_C), "foo");
-  EXPECT_EQ(AL->getLastArgValue(OPT_C), "bar");
+  AL = T.ParseArgs(NewArgs, MAI, MAC);
+  EXPECT_TRUE(AL.hasArg(OPT_SLASH_C));
+  EXPECT_TRUE(AL.hasArg(OPT_C));
+  EXPECT_EQ(AL.getLastArgValue(OPT_SLASH_C), "foo");
+  EXPECT_EQ(AL.getLastArgValue(OPT_C), "bar");
 }
 
 TEST(Option, ParseAliasInGroup) {
@@ -142,9 +140,8 @@ TEST(Option, ParseAliasInGroup) {
   unsigned MAI, MAC;
 
   const char *MyArgs[] = { "-I" };
-  std::unique_ptr<InputArgList> AL(
-      T.ParseArgs(std::begin(MyArgs), std::end(MyArgs), MAI, MAC));
-  EXPECT_TRUE(AL->hasArg(OPT_H));
+  InputArgList AL = T.ParseArgs(MyArgs, MAI, MAC);
+  EXPECT_TRUE(AL.hasArg(OPT_H));
 }
 
 TEST(Option, AliasArgs) {
@@ -152,11 +149,10 @@ TEST(Option, AliasArgs) {
   unsigned MAI, MAC;
 
   const char *MyArgs[] = { "-J", "-Joo" };
-  std::unique_ptr<InputArgList> AL(
-      T.ParseArgs(std::begin(MyArgs), std::end(MyArgs), MAI, MAC));
-  EXPECT_TRUE(AL->hasArg(OPT_B));
-  EXPECT_EQ(AL->getAllArgValues(OPT_B)[0], "foo");
-  EXPECT_EQ(AL->getAllArgValues(OPT_B)[1], "bar");
+  InputArgList AL = T.ParseArgs(MyArgs, MAI, MAC);
+  EXPECT_TRUE(AL.hasArg(OPT_B));
+  EXPECT_EQ(AL.getAllArgValues(OPT_B)[0], "foo");
+  EXPECT_EQ(AL.getAllArgValues(OPT_B)[1], "bar");
 }
 
 TEST(Option, IgnoreCase) {
@@ -164,10 +160,9 @@ TEST(Option, IgnoreCase) {
   unsigned MAI, MAC;
 
   const char *MyArgs[] = { "-a", "-joo" };
-  std::unique_ptr<InputArgList> AL(
-      T.ParseArgs(std::begin(MyArgs), std::end(MyArgs), MAI, MAC));
-  EXPECT_TRUE(AL->hasArg(OPT_A));
-  EXPECT_TRUE(AL->hasArg(OPT_B));
+  InputArgList AL = T.ParseArgs(MyArgs, MAI, MAC);
+  EXPECT_TRUE(AL.hasArg(OPT_A));
+  EXPECT_TRUE(AL.hasArg(OPT_B));
 }
 
 TEST(Option, DoNotIgnoreCase) {
@@ -175,10 +170,9 @@ TEST(Option, DoNotIgnoreCase) {
   unsigned MAI, MAC;
 
   const char *MyArgs[] = { "-a", "-joo" };
-  std::unique_ptr<InputArgList> AL(
-      T.ParseArgs(std::begin(MyArgs), std::end(MyArgs), MAI, MAC));
-  EXPECT_FALSE(AL->hasArg(OPT_A));
-  EXPECT_FALSE(AL->hasArg(OPT_B));
+  InputArgList AL = T.ParseArgs(MyArgs, MAI, MAC);
+  EXPECT_FALSE(AL.hasArg(OPT_A));
+  EXPECT_FALSE(AL.hasArg(OPT_B));
 }
 
 TEST(Option, SlurpEmpty) {
@@ -186,11 +180,10 @@ TEST(Option, SlurpEmpty) {
   unsigned MAI, MAC;
 
   const char *MyArgs[] = { "-A", "-slurp" };
-  std::unique_ptr<InputArgList> AL(
-      T.ParseArgs(std::begin(MyArgs), std::end(MyArgs), MAI, MAC));
-  EXPECT_TRUE(AL->hasArg(OPT_A));
-  EXPECT_TRUE(AL->hasArg(OPT_Slurp));
-  EXPECT_EQ(AL->getAllArgValues(OPT_Slurp).size(), 0U);
+  InputArgList AL = T.ParseArgs(MyArgs, MAI, MAC);
+  EXPECT_TRUE(AL.hasArg(OPT_A));
+  EXPECT_TRUE(AL.hasArg(OPT_Slurp));
+  EXPECT_EQ(AL.getAllArgValues(OPT_Slurp).size(), 0U);
 }
 
 TEST(Option, Slurp) {
@@ -198,16 +191,15 @@ TEST(Option, Slurp) {
   unsigned MAI, MAC;
 
   const char *MyArgs[] = { "-A", "-slurp", "-B", "--", "foo" };
-  std::unique_ptr<InputArgList> AL(
-      T.ParseArgs(std::begin(MyArgs), std::end(MyArgs), MAI, MAC));
-  EXPECT_EQ(AL->size(), 2U);
-  EXPECT_TRUE(AL->hasArg(OPT_A));
-  EXPECT_FALSE(AL->hasArg(OPT_B));
-  EXPECT_TRUE(AL->hasArg(OPT_Slurp));
-  EXPECT_EQ(AL->getAllArgValues(OPT_Slurp).size(), 3U);
-  EXPECT_EQ(AL->getAllArgValues(OPT_Slurp)[0], "-B");
-  EXPECT_EQ(AL->getAllArgValues(OPT_Slurp)[1], "--");
-  EXPECT_EQ(AL->getAllArgValues(OPT_Slurp)[2], "foo");
+  InputArgList AL = T.ParseArgs(MyArgs, MAI, MAC);
+  EXPECT_EQ(AL.size(), 2U);
+  EXPECT_TRUE(AL.hasArg(OPT_A));
+  EXPECT_FALSE(AL.hasArg(OPT_B));
+  EXPECT_TRUE(AL.hasArg(OPT_Slurp));
+  EXPECT_EQ(AL.getAllArgValues(OPT_Slurp).size(), 3U);
+  EXPECT_EQ(AL.getAllArgValues(OPT_Slurp)[0], "-B");
+  EXPECT_EQ(AL.getAllArgValues(OPT_Slurp)[1], "--");
+  EXPECT_EQ(AL.getAllArgValues(OPT_Slurp)[2], "foo");
 }
 
 TEST(Option, FlagAliasToJoined) {
@@ -216,10 +208,9 @@ TEST(Option, FlagAliasToJoined) {
 
   // Check that a flag alias provides an empty argument to a joined option.
   const char *MyArgs[] = { "-K" };
-  std::unique_ptr<InputArgList> AL(
-      T.ParseArgs(std::begin(MyArgs), std::end(MyArgs), MAI, MAC));
-  EXPECT_EQ(AL->size(), 1U);
-  EXPECT_TRUE(AL->hasArg(OPT_B));
-  EXPECT_EQ(AL->getAllArgValues(OPT_B).size(), 1U);
-  EXPECT_EQ(AL->getAllArgValues(OPT_B)[0], "");
+  InputArgList AL = T.ParseArgs(MyArgs, MAI, MAC);
+  EXPECT_EQ(AL.size(), 1U);
+  EXPECT_TRUE(AL.hasArg(OPT_B));
+  EXPECT_EQ(AL.getAllArgValues(OPT_B).size(), 1U);
+  EXPECT_EQ(AL.getAllArgValues(OPT_B)[0], "");
 }
diff --git a/unittests/ProfileData/InstrProfTest.cpp b/unittests/ProfileData/InstrProfTest.cpp
index 26ea0e4..2cedd59 100644
--- a/unittests/ProfileData/InstrProfTest.cpp
+++ b/unittests/ProfileData/InstrProfTest.cpp
@@ -68,6 +68,7 @@ TEST_F(InstrProfTest, write_and_read_one_function) {
 
 TEST_F(InstrProfTest, get_function_counts) {
   Writer.addFunctionCounts("foo", 0x1234, {1, 2});
+  Writer.addFunctionCounts("foo", 0x1235, {3, 4});
   auto Profile = Writer.writeBuffer();
   readProfile(std::move(Profile));
 
@@ -77,6 +78,11 @@ TEST_F(InstrProfTest, get_function_counts) {
   ASSERT_EQ(1U, Counts[0]);
   ASSERT_EQ(2U, Counts[1]);
 
+  ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1235, Counts)));
+  ASSERT_EQ(2U, Counts.size());
+  ASSERT_EQ(3U, Counts[0]);
+  ASSERT_EQ(4U, Counts[1]);
+
   std::error_code EC;
   EC = Reader->getFunctionCounts("foo", 0x5678, Counts);
   ASSERT_TRUE(ErrorEquals(instrprof_error::hash_mismatch, EC));
diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp
index 18d3ca6..e267149 100644
--- a/unittests/Transforms/Utils/Cloning.cpp
+++ b/unittests/Transforms/Utils/Cloning.cpp
@@ -415,4 +415,39 @@ TEST_F(CloneFunc, DebugIntrinsics) {
   }
 }
 
+class CloneModule : public ::testing::Test {
+protected:
+  void SetUp() override {
+    SetupModule();
+    CreateOldModule();
+    CreateNewModule();
+  }
+
+  void SetupModule() { OldM = new Module("", C); }
+
+  void CreateOldModule() {
+    IRBuilder<> IBuilder(C);
+
+    auto *FuncType = FunctionType::get(Type::getVoidTy(C), false);
+    auto *PersFn = Function::Create(FuncType, GlobalValue::ExternalLinkage,
+                                    "persfn", OldM);
+    auto *F =
+        Function::Create(FuncType, GlobalValue::PrivateLinkage, "f", OldM);
+    F->setPersonalityFn(PersFn);
+    auto *Entry = BasicBlock::Create(C, "", F);
+    IBuilder.SetInsertPoint(Entry);
+    IBuilder.CreateRetVoid();
+  }
+
+  void CreateNewModule() { NewM = llvm::CloneModule(OldM); }
+
+  LLVMContext C;
+  Module *OldM;
+  Module *NewM;
+};
+
+TEST_F(CloneModule, Verify) {
+  EXPECT_FALSE(verifyModule(*NewM));
+}
+
 }
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index fa6fd43..ae1cc0c 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -2143,7 +2143,8 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
         Operator->getName() != "tblockaddress" &&
         Operator->getName() != "tglobaladdr" &&
         Operator->getName() != "bb" &&
-        Operator->getName() != "vt")
+        Operator->getName() != "vt" &&
+        Operator->getName() != "mcsym")
       error("Cannot use '" + Operator->getName() + "' in an output pattern!");
   }
 
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index a8423a9..7506e91 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -1094,6 +1094,8 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
      << "const TargetRegisterClass *RC) const override;\n"
      << "  const int *getRegUnitPressureSets("
      << "unsigned RegUnit) const override;\n"
+     << "  ArrayRef<const char *> getRegMaskNames() const override;\n"
+     << "  ArrayRef<const uint32_t *> getRegMasks() const override;\n"
      << "};\n\n";
 
   const auto &RegisterClasses = RegBank.getRegClasses();
@@ -1445,6 +1447,26 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   }
   OS << "\n\n";
 
+  OS << "ArrayRef<const uint32_t *> " << ClassName
+     << "::getRegMasks() const {\n";
+  OS << "  static const uint32_t *Masks[] = {\n";
+  for (Record *CSRSet : CSRSets)
+    OS << "    " << CSRSet->getName() << "_RegMask, \n";
+  OS << "    nullptr\n  };\n";
+  OS << "  return ArrayRef<const uint32_t *>(Masks, (size_t)" << CSRSets.size()
+     << ");\n";
+  OS << "}\n\n";
+
+  OS << "ArrayRef<const char *> " << ClassName
+     << "::getRegMaskNames() const {\n";
+  OS << "  static const char *Names[] = {\n";
+  for (Record *CSRSet : CSRSets)
+    OS << "    " << '"' << CSRSet->getName() << '"' << ",\n";
+  OS << "    nullptr\n  };\n";
+  OS << "  return ArrayRef<const char *>(Names, (size_t)" << CSRSets.size()
+     << ");\n";
+  OS << "}\n\n";
+
   OS << "} // End llvm namespace\n";
   OS << "#endif // GET_REGINFO_TARGET_DESC\n\n";
 }
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index dde21c6..efcb0c8 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -1027,9 +1027,12 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("GR32_NOAX",           TYPE_Rv)
   TYPE("GR64_NOAX",           TYPE_R64)
   TYPE("vx32mem",             TYPE_M32)
+  TYPE("vx32xmem",            TYPE_M32)
   TYPE("vy32mem",             TYPE_M32)
+  TYPE("vy32xmem",            TYPE_M32)
   TYPE("vz32mem",             TYPE_M32)
   TYPE("vx64mem",             TYPE_M64)
+  TYPE("vx64xmem",            TYPE_M64)
   TYPE("vy64mem",             TYPE_M64)
   TYPE("vy64xmem",            TYPE_M64)
   TYPE("vz64mem",             TYPE_M64)
@@ -1213,9 +1216,12 @@ RecognizableInstr::memoryEncodingFromString(const std::string &s,
   ENCODING("opaque80mem",     ENCODING_RM)
   ENCODING("opaque512mem",    ENCODING_RM)
   ENCODING("vx32mem",         ENCODING_RM)
+  ENCODING("vx32xmem",        ENCODING_RM)
   ENCODING("vy32mem",         ENCODING_RM)
+  ENCODING("vy32xmem",        ENCODING_RM)
   ENCODING("vz32mem",         ENCODING_RM)
   ENCODING("vx64mem",         ENCODING_RM)
+  ENCODING("vx64xmem",        ENCODING_RM)
   ENCODING("vy64mem",         ENCODING_RM)
   ENCODING("vy64xmem",        ENCODING_RM)
   ENCODING("vz64mem",         ENCODING_RM)
diff --git a/utils/lit/lit/TestingConfig.py b/utils/lit/lit/TestingConfig.py
index 1d51c1c..d49d0c0 100644
--- a/utils/lit/lit/TestingConfig.py
+++ b/utils/lit/lit/TestingConfig.py
@@ -24,7 +24,7 @@ class TestingConfig:
 
         pass_vars = ['LIBRARY_PATH', 'LD_LIBRARY_PATH', 'SYSTEMROOT', 'TERM',
                      'LD_PRELOAD', 'ASAN_OPTIONS', 'UBSAN_OPTIONS',
-                     'LSAN_OPTIONS']
+                     'LSAN_OPTIONS', 'ADB', 'ADB_SERIAL']
         for var in pass_vars:
             val = os.environ.get(var, '')
             # Check for empty string as some variables such as LD_PRELOAD cannot be empty
diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh
index 3cb868b..04127c8 100755
--- a/utils/release/test-release.sh
+++ b/utils/release/test-release.sh
@@ -38,9 +38,9 @@ BuildDir="`pwd`"
 BuildTriple=""
 
 function usage() {
-    echo "usage: `basename $0` -release X.Y -rc NUM [OPTIONS]"
+    echo "usage: `basename $0` -release X.Y.Z -rc NUM [OPTIONS]"
     echo ""
-    echo " -release X.Y         The release number to test."
+    echo " -release X.Y.Z       The release version to test."
     echo " -rc NUM              The pre-release candidate number."
     echo " -final               The final release candidate."
     echo " -triple TRIPLE       The target triple for this machine."
@@ -48,10 +48,7 @@ function usage() {
     echo " -build-dir DIR       Directory to perform testing in. [default: pwd]"
     echo " -no-checkout         Don't checkout the sources from SVN."
     echo " -no-64bit            Don't test the 64-bit version. [default: yes]"
-    echo " -enable-ada          Build Ada. [default: disable]"
     echo " -disable-clang       Do not test clang. [default: enable]"
-    echo " -enable-fortran      Enable Fortran build. [default: disable]"
-    echo " -disable-objc        Disable ObjC build. [default: enable]"
     echo " -test-debug          Test the debug build. [default: no]"
     echo " -test-asserts        Test with asserts on. [default: no]"
     echo " -no-compare-files    Don't test that phase 2 and 3 files are identical."
@@ -274,7 +271,7 @@ function configure_llvmCore() {
         --disable-timestamps \
         $build_triple_option"
     env CC="$c_compiler" CXX="$cxx_compiler" \
-    $BuildDir/llvm.src/configure --prefix=$InstallDir \
+        $BuildDir/llvm.src/configure --prefix=$InstallDir \
         --enable-optimized=$Optimized \
         --enable-assertions=$Assertions \
         --disable-timestamps \
-- 
cgit v1.1


From e7bcad327814a78ecb8d5f5545d2e3df84c67a5c Mon Sep 17 00:00:00 2001
From: dim <dim@FreeBSD.org>
Date: Sun, 5 Jul 2015 14:23:59 +0000
Subject: Vendor import of clang trunk r241361:
 https://llvm.org/svn/llvm-project/cfe/trunk@241361

---
 docs/AddressSanitizer.rst                          |  146 +-
 docs/AttributeReference.rst                        | 1115 +----------
 docs/CMakeLists.txt                                |    3 +
 docs/ClangFormatStyleOptions.rst                   |   19 +-
 docs/CommandGuide/clang.rst                        |  484 +++++
 docs/CommandGuide/index.rst                        |   17 +
 docs/DriverInternals.rst                           |    6 +-
 docs/LeakSanitizer.rst                             |    3 +-
 docs/LibASTMatchersReference.html                  |   10 +
 docs/Makefile                                      |    1 -
 docs/ObjectiveCLiterals.rst                        |   62 +-
 docs/SafeStack.rst                                 |  120 +-
 docs/conf.py                                       |   40 +-
 docs/index.rst                                     |    1 +
 docs/tools/Makefile                                |  116 --
 docs/tools/clang.pod                               |  614 ------
 docs/tools/manpage.css                             |  256 ---
 examples/analyzer-plugin/MainCallChecker.cpp       |    5 +-
 include/clang-c/Index.h                            |   11 +-
 include/clang/AST/ASTContext.h                     |   14 +
 include/clang/AST/ASTMutationListener.h            |   10 +
 include/clang/AST/DataRecursiveASTVisitor.h        |   20 +-
 include/clang/AST/EvaluatedExprVisitor.h           |    6 +-
 include/clang/AST/ExprObjC.h                       |    2 +-
 include/clang/AST/ExternalASTSource.h              |   14 +
 include/clang/AST/Mangle.h                         |    4 +
 include/clang/AST/NSAPI.h                          |    3 +-
 include/clang/AST/OpenMPClause.h                   |   88 +
 include/clang/AST/RecursiveASTVisitor.h            |   24 +-
 include/clang/AST/StmtIterator.h                   |   10 +-
 include/clang/AST/StmtOpenMP.h                     |  115 ++
 include/clang/AST/Type.h                           |    1 +
 include/clang/ASTMatchers/ASTMatchers.h            |   17 +
 include/clang/Analysis/Analyses/FormatString.h     |   25 +-
 include/clang/Basic/Attr.td                        |   22 +-
 include/clang/Basic/AttrDocs.td                    |  116 ++
 include/clang/Basic/BuiltinsARM.def                |    8 +-
 include/clang/Basic/BuiltinsNVPTX.def              |    2 +-
 include/clang/Basic/BuiltinsPPC.def                |   12 +
 include/clang/Basic/BuiltinsX86.def                |  117 +-
 include/clang/Basic/Diagnostic.h                   |    8 +
 include/clang/Basic/DiagnosticCommonKinds.td       |   22 +-
 include/clang/Basic/DiagnosticSemaKinds.td         |   59 +-
 include/clang/Basic/IdentifierTable.h              |   13 -
 include/clang/Basic/LangOptions.def                |    1 +
 include/clang/Basic/LangOptions.h                  |    2 +
 include/clang/Basic/Module.h                       |    3 +
 include/clang/Basic/OpenMPKinds.def                |   13 +
 include/clang/Basic/OpenMPKinds.h                  |    8 +
 include/clang/Basic/Specifiers.h                   |    3 +-
 include/clang/Basic/StmtNodes.td                   |    2 +
 include/clang/Basic/TargetInfo.h                   |    8 +
 include/clang/Basic/TokenKinds.def                 |    9 +-
 include/clang/Basic/TypeTraits.h                   |    3 +-
 include/clang/CodeGen/CodeGenABITypes.h            |    6 +-
 include/clang/CodeGen/ModuleBuilder.h              |    4 +
 include/clang/Driver/CC1Options.td                 |    4 +
 include/clang/Driver/CLCompatOptions.td            |   61 +-
 include/clang/Driver/Compilation.h                 |    5 +-
 include/clang/Driver/Driver.h                      |    6 +-
 include/clang/Driver/Job.h                         |   63 +-
 include/clang/Driver/Options.td                    |    7 +
 include/clang/Driver/SanitizerArgs.h               |    5 +-
 include/clang/Format/Format.h                      |  561 +++---
 include/clang/Lex/ExternalPreprocessorSource.h     |    7 +-
 include/clang/Lex/HeaderSearch.h                   |   20 +-
 include/clang/Lex/ModuleMap.h                      |    8 +-
 include/clang/Lex/Preprocessor.h                   |    8 +-
 include/clang/Parse/Parser.h                       |    7 +-
 include/clang/Sema/DeclSpec.h                      |   16 +-
 include/clang/Sema/Sema.h                          |   78 +-
 include/clang/Serialization/ASTBitCodes.h          |    2 +
 include/clang/Serialization/ASTReader.h            |    6 +-
 include/clang/Serialization/ASTWriter.h            |    8 +
 include/clang/Serialization/Module.h               |    3 +-
 .../StaticAnalyzer/Core/BugReporter/BugReporter.h  |    2 +-
 .../Core/PathSensitive/CheckerContext.h            |    4 +-
 .../Core/PathSensitive/CheckerHelpers.h            |    7 +-
 lib/ARCMigrate/ObjCMT.cpp                          |    4 +-
 lib/AST/ASTContext.cpp                             |   21 +
 lib/AST/ASTDumper.cpp                              |   11 +-
 lib/AST/DeclBase.cpp                               |    1 +
 lib/AST/DeclPrinter.cpp                            |    7 +-
 lib/AST/Expr.cpp                                   |    8 +-
 lib/AST/ExprConstant.cpp                           |    7 +
 lib/AST/ExternalASTSource.cpp                      |   10 +
 lib/AST/ItaniumMangle.cpp                          |   10 +-
 lib/AST/MicrosoftMangle.cpp                        |   39 +-
 lib/AST/NSAPI.cpp                                  |    3 +-
 lib/AST/ParentMap.cpp                              |   12 +-
 lib/AST/Stmt.cpp                                   |   64 +
 lib/AST/StmtIterator.cpp                           |    4 +-
 lib/AST/StmtPrinter.cpp                            |   26 +
 lib/AST/StmtProfile.cpp                            |   18 +-
 lib/AST/Type.cpp                                   |    5 +
 lib/AST/TypePrinter.cpp                            |   12 +-
 lib/ASTMatchers/ASTMatchersInternal.cpp            |    4 +-
 lib/ASTMatchers/Dynamic/Registry.cpp               |    1 +
 lib/Analysis/AnalysisDeclContext.cpp               |    6 +-
 lib/Analysis/CFG.cpp                               |   11 +-
 lib/Analysis/CallGraph.cpp                         |    6 +-
 lib/Analysis/LiveVariables.cpp                     |    9 +-
 lib/Analysis/PrintfFormatString.cpp                |   62 +
 lib/Analysis/PseudoConstantAnalysis.cpp            |    6 +-
 lib/Basic/Diagnostic.cpp                           |   21 +
 lib/Basic/IdentifierTable.cpp                      |   11 +-
 lib/Basic/Module.cpp                               |    2 +-
 lib/Basic/OpenMPKinds.cpp                          |   16 +
 lib/Basic/TargetInfo.cpp                           |    1 +
 lib/Basic/Targets.cpp                              |  265 +--
 lib/Basic/VirtualFileSystem.cpp                    |   14 -
 lib/CodeGen/BackendUtil.cpp                        |    6 +-
 lib/CodeGen/CGBuiltin.cpp                          |  339 +++-
 lib/CodeGen/CGCXXABI.cpp                           |    7 +-
 lib/CodeGen/CGCXXABI.h                             |    2 +-
 lib/CodeGen/CGCall.cpp                             |   41 +-
 lib/CodeGen/CGClass.cpp                            |   22 +-
 lib/CodeGen/CGDebugInfo.cpp                        |   57 +-
 lib/CodeGen/CGDebugInfo.h                          |   10 +
 lib/CodeGen/CGDecl.cpp                             |   10 +-
 lib/CodeGen/CGExpr.cpp                             |   15 +-
 lib/CodeGen/CGExprCXX.cpp                          |    3 +-
 lib/CodeGen/CGExprConstant.cpp                     |    2 +-
 lib/CodeGen/CGExprScalar.cpp                       |    8 +
 lib/CodeGen/CGObjC.cpp                             |   34 +-
 lib/CodeGen/CGOpenMPRuntime.cpp                    |  366 +++-
 lib/CodeGen/CGOpenMPRuntime.h                      |   80 +-
 lib/CodeGen/CGStmt.cpp                             |    8 +-
 lib/CodeGen/CGStmtOpenMP.cpp                       |  133 +-
 lib/CodeGen/CGVTables.cpp                          |   27 +-
 lib/CodeGen/CodeGenABITypes.cpp                    |    7 +-
 lib/CodeGen/CodeGenAction.cpp                      |    8 +-
 lib/CodeGen/CodeGenFunction.cpp                    |   13 +-
 lib/CodeGen/CodeGenFunction.h                      |   43 +-
 lib/CodeGen/CodeGenModule.cpp                      |   28 +-
 lib/CodeGen/CodeGenModule.h                        |   18 +-
 lib/CodeGen/CodeGenPGO.cpp                         |    7 +-
 lib/CodeGen/CodeGenTypes.cpp                       |   12 +-
 lib/CodeGen/CoverageMappingGen.cpp                 |   13 +-
 lib/CodeGen/ItaniumCXXABI.cpp                      |    7 +-
 lib/CodeGen/MicrosoftCXXABI.cpp                    |  376 ++--
 lib/CodeGen/ModuleBuilder.cpp                      |   33 +-
 lib/CodeGen/TargetInfo.cpp                         |  104 +-
 lib/CodeGen/TargetInfo.h                           |    7 -
 lib/Driver/CMakeLists.txt                          |    1 +
 lib/Driver/Compilation.cpp                         |   16 +-
 lib/Driver/CrossWindowsToolChain.cpp               |    5 +-
 lib/Driver/Driver.cpp                              |  462 +++--
 lib/Driver/Job.cpp                                 |   14 +-
 lib/Driver/MSVCToolChain.cpp                       |    4 +-
 lib/Driver/MinGWToolChain.cpp                      |  143 ++
 lib/Driver/SanitizerArgs.cpp                       |   35 +-
 lib/Driver/ToolChains.cpp                          | 1406 +++++++-------
 lib/Driver/ToolChains.h                            |  221 +--
 lib/Driver/Tools.cpp                               | 1952 +++++++++++---------
 lib/Driver/Tools.h                                 |  498 ++---
 lib/Format/ContinuationIndenter.cpp                |    5 +-
 lib/Format/Format.cpp                              |  159 +-
 lib/Format/TokenAnnotator.cpp                      |   31 +-
 lib/Format/UnwrappedLineParser.cpp                 |   15 +-
 lib/Frontend/CompilerInvocation.cpp                |   44 +-
 lib/Frontend/FrontendActions.cpp                   |    7 +
 lib/Frontend/InitHeaderSearch.cpp                  |   65 +-
 lib/Frontend/InitPreprocessor.cpp                  |    8 +
 lib/Frontend/MultiplexConsumer.cpp                 |    9 +
 lib/Frontend/Rewrite/InclusionRewriter.cpp         |   92 +-
 lib/Frontend/Rewrite/RewriteModernObjC.cpp         |   32 +-
 lib/Frontend/Rewrite/RewriteObjC.cpp               |   50 +-
 lib/Headers/CMakeLists.txt                         |    3 +
 lib/Headers/Intrin.h                               |  180 +-
 lib/Headers/__wmmintrin_aes.h                      |   14 +-
 lib/Headers/adxintrin.h                            |   12 +-
 lib/Headers/altivec.h                              |  242 ++-
 lib/Headers/ammintrin.h                            |   12 +-
 lib/Headers/avx2intrin.h                           |  286 +--
 lib/Headers/avx512bwintrin.h                       |  883 ++++++++-
 lib/Headers/avx512cdintrin.h                       |  131 ++
 lib/Headers/avx512dqintrin.h                       |   58 +-
 lib/Headers/avx512fintrin.h                        | 1164 +++++++++---
 lib/Headers/avx512vlbwintrin.h                     |  236 +--
 lib/Headers/avx512vldqintrin.h                     |   80 +-
 lib/Headers/avx512vlintrin.h                       |  973 ++++++++--
 lib/Headers/avxintrin.h                            |  308 +--
 lib/Headers/bmi2intrin.h                           |   20 +-
 lib/Headers/bmiintrin.h                            |   34 +-
 lib/Headers/emmintrin.h                            |  432 ++---
 lib/Headers/f16cintrin.h                           |    8 +-
 lib/Headers/fma4intrin.h                           |   68 +-
 lib/Headers/fmaintrin.h                            |   68 +-
 lib/Headers/fxsrintrin.h                           |   55 +
 lib/Headers/immintrin.h                            |    4 +
 lib/Headers/inttypes.h                             |  102 +
 lib/Headers/lzcntintrin.h                          |   14 +-
 lib/Headers/mm3dnow.h                              |   56 +-
 lib/Headers/mmintrin.h                             |  138 +-
 lib/Headers/module.modulemap                       |   25 -
 lib/Headers/pmmintrin.h                            |   28 +-
 lib/Headers/popcntintrin.h                         |    8 +-
 lib/Headers/rdseedintrin.h                         |   10 +-
 lib/Headers/rtmintrin.h                            |    8 +-
 lib/Headers/shaintrin.h                            |   16 +-
 lib/Headers/smmintrin.h                            |   82 +-
 lib/Headers/tbmintrin.h                            |   40 +-
 lib/Headers/tmmintrin.h                            |   64 +-
 lib/Headers/xmmintrin.h                            |  254 +--
 lib/Headers/xopintrin.h                            |  228 +--
 lib/Lex/HeaderSearch.cpp                           |   39 +-
 lib/Lex/ModuleMap.cpp                              |   47 +-
 lib/Lex/PPDirectives.cpp                           |    3 +-
 lib/Lex/PPLexerChange.cpp                          |   17 +-
 lib/Lex/PPMacroExpansion.cpp                       |    6 +-
 lib/Lex/Preprocessor.cpp                           |    4 +
 lib/Parse/ParseCXXInlineMethods.cpp                |   10 +-
 lib/Parse/ParseDecl.cpp                            |   40 +-
 lib/Parse/ParseDeclCXX.cpp                         |  109 +-
 lib/Parse/ParseExpr.cpp                            |   30 +-
 lib/Parse/ParseExprCXX.cpp                         |    2 +-
 lib/Parse/ParseObjc.cpp                            |    6 +-
 lib/Parse/ParseOpenMP.cpp                          |   80 +-
 lib/Parse/ParseTemplate.cpp                        |   13 +-
 lib/Parse/ParseTentative.cpp                       |   12 +-
 lib/Sema/AnalysisBasedWarnings.cpp                 |    9 +-
 lib/Sema/DeclSpec.cpp                              |   12 +
 lib/Sema/JumpDiagnostics.cpp                       |    3 +-
 lib/Sema/Sema.cpp                                  |    3 +-
 lib/Sema/SemaChecking.cpp                          |   73 +-
 lib/Sema/SemaCodeComplete.cpp                      |    6 +-
 lib/Sema/SemaDecl.cpp                              |  111 +-
 lib/Sema/SemaDeclAttr.cpp                          |   33 +
 lib/Sema/SemaDeclCXX.cpp                           |   31 +-
 lib/Sema/SemaDeclObjC.cpp                          |   40 +-
 lib/Sema/SemaExceptionSpec.cpp                     |   10 +-
 lib/Sema/SemaExpr.cpp                              |   32 +-
 lib/Sema/SemaExprCXX.cpp                           |    6 +
 lib/Sema/SemaExprObjC.cpp                          |  127 +-
 lib/Sema/SemaInit.cpp                              |    6 +-
 lib/Sema/SemaObjCProperty.cpp                      |    4 +-
 lib/Sema/SemaOpenMP.cpp                            |  297 ++-
 lib/Sema/SemaStmt.cpp                              |   13 +-
 lib/Sema/SemaTemplateDeduction.cpp                 |   84 +-
 lib/Sema/SemaTemplateInstantiate.cpp               |   50 +-
 lib/Sema/SemaTemplateInstantiateDecl.cpp           |   24 +-
 lib/Sema/SemaType.cpp                              |   71 +-
 lib/Sema/TreeTransform.h                           |   73 +-
 lib/Serialization/ASTCommon.h                      |    3 +-
 lib/Serialization/ASTReader.cpp                    |   50 +-
 lib/Serialization/ASTReaderDecl.cpp                |   41 +-
 lib/Serialization/ASTReaderStmt.cpp                |   37 +
 lib/Serialization/ASTWriter.cpp                    |   21 +-
 lib/Serialization/ASTWriterStmt.cpp                |   25 +
 lib/Serialization/GeneratePCH.cpp                  |    1 -
 lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp  |    5 +-
 .../Checkers/ArrayBoundCheckerV2.cpp               |    3 +-
 .../Checkers/BasicObjCFoundationChecks.cpp         |   20 +-
 .../Checkers/BoolAssignmentChecker.cpp             |    2 +-
 lib/StaticAnalyzer/Checkers/CStringChecker.cpp     |   32 +-
 .../Checkers/CStringSyntaxChecker.cpp              |    7 +-
 .../Checkers/CallAndMessageChecker.cpp             |   32 +-
 lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp    |    4 +-
 .../Checkers/CastToStructChecker.cpp               |    4 +-
 lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp   |    4 +-
 .../Checkers/CheckSecuritySyntaxOnly.cpp           |   14 +-
 lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp |    6 +-
 lib/StaticAnalyzer/Checkers/ChrootChecker.cpp      |    7 +-
 lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp |   14 +-
 .../Checkers/DirectIvarAssignment.cpp              |    6 +-
 lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp     |    4 +-
 .../Checkers/ExprInspectionChecker.cpp             |   11 +-
 .../Checkers/FixedAddressChecker.cpp               |    4 +-
 .../Checkers/GenericTaintChecker.cpp               |    4 +-
 .../Checkers/IvarInvalidationChecker.cpp           |    6 +-
 .../Checkers/LLVMConventionsChecker.cpp            |    7 +-
 .../Checkers/MacOSKeychainAPIChecker.cpp           |   47 +-
 lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp   |    4 +-
 lib/StaticAnalyzer/Checkers/MallocChecker.cpp      |   57 +-
 .../Checkers/MallocSizeofChecker.cpp               |    7 +-
 .../Checkers/NSAutoreleasePoolChecker.cpp          |    7 +-
 lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp     |    3 +-
 .../Checkers/NonNullParamChecker.cpp               |   33 +-
 lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp  |   12 +-
 .../Checkers/ObjCContainersASTChecker.cpp          |    6 +-
 .../Checkers/ObjCContainersChecker.cpp             |    4 +-
 .../Checkers/ObjCSelfInitChecker.cpp               |    3 +-
 .../Checkers/ObjCUnusedIVarsChecker.cpp            |    4 +-
 .../Checkers/PointerArithChecker.cpp               |    4 +-
 lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp  |    4 +-
 lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp |   39 +-
 lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp |   60 +-
 .../Checkers/ReturnPointerRangeChecker.cpp         |    5 +-
 lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp |    4 +-
 .../Checkers/SimpleStreamChecker.cpp               |    8 +-
 .../Checkers/StackAddrEscapeChecker.cpp            |    8 +-
 lib/StaticAnalyzer/Checkers/StreamChecker.cpp      |   19 +-
 lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp |    4 +-
 .../Checkers/TestAfterDivZeroChecker.cpp           |   10 +-
 lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp |   14 +-
 .../Checkers/UndefCapturedBlockVarChecker.cpp      |   13 +-
 lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp |    4 +-
 .../Checkers/UndefinedArraySubscriptChecker.cpp    |    4 +-
 .../Checkers/UndefinedAssignmentChecker.cpp        |    4 +-
 lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp     |   12 +-
 lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp     |    4 +-
 lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp |    6 +-
 lib/StaticAnalyzer/Core/BasicValueFactory.cpp      |    4 +
 lib/StaticAnalyzer/Core/BugReporter.cpp            |   19 +-
 lib/StaticAnalyzer/Core/BugReporterVisitors.cpp    |    5 +-
 lib/StaticAnalyzer/Core/CheckerHelpers.cpp         |   32 +-
 lib/StaticAnalyzer/Core/ExprEngine.cpp             |    2 +
 lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp   |    5 +-
 lib/Tooling/CompilationDatabase.cpp                |   13 +-
 test/Analysis/division-by-zero.c                   |    7 +
 test/Analysis/retain-release.m                     |    2 +-
 test/Analysis/test-include-cpp.cpp                 |   13 +
 test/Analysis/test-include-cpp.h                   |    9 +
 test/Analysis/test-include.c                       |   21 +
 test/Analysis/test-include.h                       |    2 +
 .../dcl.spec/dcl.type/dcl.spec.auto/p7-1y.cpp      |    4 +
 test/CXX/drs/dr9xx.cpp                             |   29 +
 test/CodeCompletion/macros-in-modules.c            |   11 +
 test/CodeCompletion/macros-in-modules.m            |   10 +
 test/CodeGen/2004-11-27-StaticFunctionRedeclare.c  |    2 +-
 test/CodeGen/2007-04-14-FNoBuiltin.c               |    2 +-
 test/CodeGen/PR8880.c                              |   12 +-
 test/CodeGen/arm-microsoft-intrinsics.c            |   12 +-
 test/CodeGen/arm-target-features.c                 |    6 +-
 test/CodeGen/attr-nodebug.c                        |   26 +-
 test/CodeGen/attr-target.c                         |    7 +-
 test/CodeGen/avx512bw-builtins.c                   |  406 ++++
 test/CodeGen/avx512cdintrin.c                      |   62 +
 test/CodeGen/avx512f-builtins.c                    |  483 ++++-
 test/CodeGen/avx512vl-builtins.c                   |  436 +++++
 test/CodeGen/builtin-cpu-supports.c                |   16 +
 test/CodeGen/builtins-nvptx.c                      |  122 +-
 test/CodeGen/builtins-ppc-p8vector.c               |   93 +-
 test/CodeGen/builtins-ppc-vsx.c                    |   85 +
 test/CodeGen/builtins-x86.c                        |    4 +
 test/CodeGen/builtinshufflevector2.c               |    6 +-
 test/CodeGen/c-strings.c                           |   22 +-
 test/CodeGen/c11atomics.c                          |   20 +-
 test/CodeGen/call.c                                |    2 +-
 test/CodeGen/captured-statements-nested.c          |    4 +-
 test/CodeGen/captured-statements.c                 |    6 +-
 test/CodeGen/complex-convert.c                     |    2 +-
 test/CodeGen/debug-info-packed-struct.c            |   91 +
 test/CodeGen/dostmt.c                              |    2 +-
 test/CodeGen/fast-math.c                           |    2 +-
 test/CodeGen/finite-math.c                         |    2 +-
 test/CodeGen/linkage-redecl.c                      |    2 +-
 test/CodeGen/nomathbuiltin.c                       |    2 +-
 test/CodeGen/openmp_default_simd_align.c           |   11 +
 test/CodeGen/pr9614.c                              |   32 +-
 test/CodeGen/redefine_extname.c                    |   11 +
 test/CodeGen/stack-protector.c                     |   10 +-
 test/CodeGen/static-order.c                        |    2 +-
 test/CodeGen/ubsan-blacklist.c                     |   12 +-
 test/CodeGen/volatile-1.c                          |    6 +-
 test/CodeGen/x86_64-arguments.c                    |   80 +-
 test/CodeGenCXX/PR5093-static-member-function.cpp  |    2 +-
 test/CodeGenCXX/address-of-fntemplate.cpp          |    4 +-
 test/CodeGenCXX/attr-cleanup.cpp                   |    2 +-
 test/CodeGenCXX/block-byref-cxx-objc.cpp           |    4 +-
 test/CodeGenCXX/c-linkage.cpp                      |    4 +-
 test/CodeGenCXX/captured-statements.cpp            |   16 +-
 test/CodeGenCXX/constructor-attr.cpp               |    2 +-
 test/CodeGenCXX/ctor-globalopt.cpp                 |    4 +-
 test/CodeGenCXX/debug-info-line.cpp                |    2 +-
 test/CodeGenCXX/debug-info-method-nodebug.cpp      |   12 +
 test/CodeGenCXX/debug-info-namespace.cpp           |    4 +-
 test/CodeGenCXX/deferred-global-init.cpp           |    6 +-
 .../derived-to-virtual-base-class-calls-final.cpp  |    2 +-
 test/CodeGenCXX/destructor-crash.cpp               |   19 +
 test/CodeGenCXX/dllexport.cpp                      |   12 +-
 test/CodeGenCXX/dynamic_cast-no-rtti.cpp           |    8 +-
 test/CodeGenCXX/extern-c.cpp                       |    2 +-
 .../function-template-explicit-specialization.cpp  |    4 +-
 test/CodeGenCXX/globalinit-loc.cpp                 |    2 +-
 test/CodeGenCXX/inline-dllexport-member.cpp        |    2 +-
 test/CodeGenCXX/linetable-virtual-variadic.cpp     |    2 +-
 test/CodeGenCXX/mangle-address-space.cpp           |    6 +-
 test/CodeGenCXX/mangle-ms-templates-memptrs-2.cpp  |   15 +
 test/CodeGenCXX/mangle-ms-templates-memptrs.cpp    |   16 +-
 test/CodeGenCXX/mangle-nullptr-arg.cpp             |    6 +-
 test/CodeGenCXX/mangle-template.cpp                |   10 +-
 test/CodeGenCXX/microsoft-abi-array-cookies.cpp    |   10 +
 test/CodeGenCXX/microsoft-abi-member-pointers.cpp  |   23 +-
 test/CodeGenCXX/microsoft-abi-thunks.cpp           |    4 +-
 test/CodeGenCXX/pr11797.cpp                        |    2 +-
 test/CodeGenCXX/pr18661.cpp                        |    2 +-
 test/CodeGenCXX/pr9965.cpp                         |    2 +-
 test/CodeGenCXX/pragma-weak.cpp                    |    8 +-
 test/CodeGenCXX/predefined-expr.cpp                |    4 +-
 test/CodeGenCXX/redefine_extname.cpp               |   14 +-
 .../template-dependent-bind-temporary.cpp          |    2 +-
 test/CodeGenCXX/thunks.cpp                         |    1 +
 test/CodeGenCXX/trap-fnattr.cpp                    |   36 +
 test/CodeGenCXX/typeid-should-throw.cpp            |   30 +-
 test/CodeGenCXX/vararg-non-pod.cpp                 |    2 +-
 test/CodeGenCXX/virtual-destructor-synthesis.cpp   |    2 +-
 test/CodeGenCXX/vla-lambda-capturing.cpp           |   26 +-
 test/CodeGenCXX/volatile-1.cpp                     |    2 +-
 test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp      |   17 +-
 test/CodeGenObjC/arc-block-copy-escape.m           |    8 +-
 test/CodeGenObjC/objc2-legacy-dispatch.m           |    4 +-
 test/CodeGenObjC/related-result-type.m             |    8 +-
 test/CodeGenObjCXX/arc-mangle.mm                   |   18 +-
 test/CodeGenObjCXX/debug-info-line.mm              |    2 +-
 test/CodeGenOpenCL/event_t.cl                      |    4 +-
 test/CodeGenOpenCL/local.cl                        |    2 +-
 test/CodeGenOpenCL/opencl_types.cl                 |    6 +-
 test/CoverageMapping/implicit-def-in-macro.m       |   16 +
 test/CoverageMapping/ir.c                          |    2 +-
 .../lib/powerpc64le-linux-gnu/.keep                |    0
 .../lib/gcc/powerpc64le-linux-gnu/4.5/crtbegin.o   |    0
 .../usr/lib/powerpc64le-linux-gnu/.keep            |    0
 test/Driver/arm-mfpu.c                             |   55 +
 test/Driver/bindings.c                             |    8 +-
 test/Driver/cl-link.c                              |    8 +-
 test/Driver/cl-x86-flags.c                         |   11 +-
 test/Driver/darwin-debug-flags.c                   |    2 +-
 test/Driver/darwin-dsymutil.c                      |    2 +-
 test/Driver/darwin-sdkroot.c                       |   36 +
 test/Driver/darwin-verify-debug.c                  |    2 +-
 test/Driver/fsanitize.c                            |   24 +-
 test/Driver/integrated-as.s                        |    3 +
 test/Driver/ios-version-min.c                      |    7 +
 test/Driver/linux-ld.c                             |   13 +
 test/Driver/lit.local.cfg                          |   12 +
 test/Driver/msan.c                                 |   12 +
 test/Driver/pic.c                                  |    6 +
 test/FixIt/fixit-nullability-declspec.cpp          |    6 +-
 test/Index/annotate-literals.m                     |   81 +-
 test/Index/complete-objc-message.m                 |    2 +-
 test/Index/complete-stmt.c                         |    4 +-
 test/Modules/Inputs/DebugModule.h                  |    1 +
 test/Modules/Inputs/ImportNameInDir.h              |    4 +
 .../Inputs/NameInDir.framework/Headers/NameInDir.h |    2 +
 .../NameInDir.framework/Modules/module.modulemap   |    5 +
 .../NameInDir2.framework/Headers/NameInDir2.h      |    1 +
 .../NameInDir2.framework/Modules/module.modulemap  |    5 +
 .../Headers/NameInDirInferred.h                    |    1 +
 test/Modules/Inputs/crash.h                        |    1 +
 test/Modules/Inputs/explicit-build-prefer-self/a.h |    2 +
 test/Modules/Inputs/explicit-build-prefer-self/b.h |    2 +
 test/Modules/Inputs/explicit-build-prefer-self/map |    2 +
 test/Modules/Inputs/explicit-build-prefer-self/x.h |    0
 .../Inputs/merge-class-definition-visibility/b.h   |    2 +
 .../Inputs/merge-class-definition-visibility/d.h   |    2 +-
 .../Inputs/merge-class-definition-visibility/e.h   |    3 +
 .../merge-class-definition-visibility/modmap       |    3 +-
 test/Modules/Inputs/module.map                     |   12 +
 test/Modules/Inputs/submodule-visibility/a.h       |    8 +
 test/Modules/Inputs/submodule-visibility/b.h       |    9 +
 test/Modules/Inputs/submodule-visibility/c.h       |    6 +
 .../Inputs/submodule-visibility/module.modulemap   |    1 +
 test/Modules/Inputs/submodule-visibility/other.h   |    1 +
 test/Modules/Inputs/submodules-merge-defs/defs.h   |   22 +-
 test/Modules/cxx-irgen.cpp                         |   12 +-
 test/Modules/debug-info-moduleimport.m             |    7 +
 test/Modules/direct-module-import.m                |    2 +-
 test/Modules/explicit-build-prefer-self.cpp        |    3 +
 test/Modules/framework-name.m                      |   33 +
 test/Modules/merge-class-definition-visibility.cpp |   14 +-
 test/Modules/module-feature.m                      |   14 +
 test/Modules/module_file_info.m                    |    4 +-
 test/Modules/modules-with-same-name.m              |    6 +-
 test/Modules/pch-used.m                            |    2 +-
 test/Modules/signal.m                              |   11 +
 test/Modules/submodule-visibility.cpp              |    8 +
 test/Modules/submodules-merge-defs.cpp             |   36 +-
 test/OpenMP/barrier_codegen.cpp                    |    6 +-
 test/OpenMP/cancel_ast_print.cpp                   |   47 +
 test/OpenMP/cancel_messages.cpp                    |   83 +
 test/OpenMP/cancellation_point_ast_print.cpp       |   47 +
 test/OpenMP/cancellation_point_codegen.cpp         |   95 +
 test/OpenMP/cancellation_point_messages.cpp        |   83 +
 test/OpenMP/critical_codegen.cpp                   |   14 +-
 test/OpenMP/flush_codegen.cpp                      |    8 +-
 test/OpenMP/for_codegen.cpp                        |   14 +-
 test/OpenMP/for_firstprivate_codegen.cpp           |   10 +-
 test/OpenMP/for_private_codegen.cpp                |   25 +-
 test/OpenMP/for_simd_codegen.cpp                   |   16 +-
 test/OpenMP/master_codegen.cpp                     |   14 +-
 test/OpenMP/ordered_codegen.cpp                    |    8 +-
 test/OpenMP/parallel_codegen.cpp                   |   28 +-
 test/OpenMP/parallel_copyin_codegen.cpp            |   68 +-
 test/OpenMP/parallel_firstprivate_codegen.cpp      |   28 +-
 test/OpenMP/parallel_for_codegen.cpp               |   20 +-
 test/OpenMP/parallel_for_simd_codegen.cpp          |   20 +-
 test/OpenMP/parallel_if_codegen.cpp                |   66 +-
 test/OpenMP/parallel_num_threads_codegen.cpp       |   30 +-
 test/OpenMP/parallel_private_codegen.cpp           |   26 +-
 test/OpenMP/parallel_proc_bind_codegen.cpp         |   16 +-
 test/OpenMP/parallel_sections_codegen.cpp          |    4 +-
 test/OpenMP/sections_codegen.cpp                   |    6 +-
 test/OpenMP/sections_firstprivate_codegen.cpp      |    8 +-
 test/OpenMP/sections_private_codegen.cpp           |   24 +-
 test/OpenMP/simd_metadata.c                        |    3 +
 test/OpenMP/single_codegen.cpp                     |    2 +-
 test/OpenMP/single_firstprivate_codegen.cpp        |    2 +-
 test/OpenMP/single_private_codegen.cpp             |   24 +-
 test/OpenMP/task_ast_print.cpp                     |   16 +-
 test/OpenMP/task_codegen.cpp                       |   97 +-
 test/OpenMP/task_depend_messages.cpp               |   39 +
 test/OpenMP/task_if_codegen.cpp                    |  113 +-
 test/OpenMP/task_private_codegen.cpp               |   45 +-
 test/OpenMP/taskgroup_codegen.cpp                  |   18 +-
 test/PCH/cxx-mangling.cpp                          |    6 +-
 test/PCH/objc_container.m                          |    2 +-
 test/PCH/objc_literals.m                           |    2 +-
 test/PCH/objc_literals.mm                          |    2 +-
 test/PCH/subscripting-literals.m                   |   13 +
 test/Parser/cxx-class.cpp                          |   10 +
 test/Parser/cxx-concept-declaration.cpp            |   30 +
 test/Parser/cxx-concepts-ambig-constraint-expr.cpp |   29 +
 test/Parser/cxx-concepts-requires-clause.cpp       |   82 +
 test/Parser/nullability.c                          |    8 +-
 test/Preprocessor/cxx_true.cpp                     |   13 +-
 test/Preprocessor/predefined-nullability.c         |   12 +
 test/Profile/cxx-lambda.cpp                        |    8 +-
 test/Profile/cxx-rangefor.cpp                      |    2 +-
 test/Profile/cxx-templates.cpp                     |    8 +-
 test/Profile/profile-does-not-exist.c              |    2 +-
 test/Sema/arm-microsoft-intrinsics.c               |    9 +
 test/Sema/attr-malloc.c                            |    4 +-
 test/Sema/builtin-cpu-supports.c                   |   21 +
 test/Sema/enable_if.c                              |    2 +-
 test/Sema/non-null-warning.c                       |   16 +-
 test/Sema/nullability.c                            |   96 +-
 test/Sema/typo-correction.c                        |    9 +
 .../cxx0x-initializer-stdinitializerlist.cpp       |    7 +
 test/SemaCXX/cxx1y-generic-lambdas.cpp             |   15 +
 test/SemaCXX/nullability-declspec.cpp              |   10 +-
 test/SemaCXX/nullability.cpp                       |   47 +-
 test/SemaCXX/openmp_default_simd_align.cpp         |   83 +
 test/SemaCXX/typo-correction-cxx11.cpp             |   26 +
 test/SemaCXX/virtual-function-in-union.cpp         |    5 +
 test/SemaCXX/virtuals.cpp                          |   10 +
 test/SemaObjC/arc-unavailable-for-weakref.m        |    6 +-
 test/SemaObjC/format-strings-objc.m                |   13 +
 test/SemaObjC/nullability-arc.m                    |    2 +-
 test/SemaObjC/nullability.m                        |  120 +-
 test/SemaObjC/nullable-weak-property.m             |    4 +-
 test/SemaObjCXX/Inputs/nullability-consistency-1.h |    2 +-
 test/SemaObjCXX/Inputs/nullability-consistency-2.h |    2 +-
 test/SemaObjCXX/Inputs/nullability-consistency-3.h |    2 +-
 test/SemaObjCXX/Inputs/nullability-consistency-4.h |    2 +-
 test/SemaObjCXX/Inputs/nullability-consistency-5.h |    2 +-
 test/SemaObjCXX/Inputs/nullability-consistency-8.h |   18 +-
 .../nullability-consistency-system.h               |    2 +-
 test/SemaObjCXX/Inputs/nullability-pragmas-1.h     |   24 +-
 test/SemaObjCXX/nullability-consistency.mm         |    2 +-
 test/SemaObjCXX/nullability-pragmas.mm             |   28 +-
 .../instantiate-explicitly-after-fatal.cpp         |    9 +
 test/SemaTemplate/instantiate-local-class.cpp      |   54 +
 test/lit.cfg                                       |    7 +-
 tools/clang-fuzzer/ClangFuzzer.cpp                 |    5 +-
 tools/driver/cc1as_main.cpp                        |   75 +-
 tools/driver/driver.cpp                            |    8 +-
 tools/libclang/CIndex.cpp                          |   28 +-
 tools/libclang/CXCursor.cpp                        |    6 +
 tools/scan-build/ccc-analyzer                      |   75 +-
 tools/scan-build/scan-build                        |   35 +-
 unittests/ASTMatchers/ASTMatchersTest.cpp          |    8 +
 unittests/CodeGen/BufferSourceTest.cpp             |    2 +
 unittests/Format/FormatTest.cpp                    |   92 +-
 unittests/Format/FormatTestJS.cpp                  |   30 +-
 unittests/Format/FormatTestJava.cpp                |    2 +
 unittests/Format/FormatTestProto.cpp               |    4 +
 utils/analyzer/CmpRuns.py                          |    2 +-
 utils/analyzer/SATestBuild.py                      |   61 +-
 570 files changed, 17358 insertions(+), 9586 deletions(-)
 create mode 100644 docs/CommandGuide/clang.rst
 create mode 100644 docs/CommandGuide/index.rst
 delete mode 100644 docs/tools/Makefile
 delete mode 100644 docs/tools/clang.pod
 delete mode 100644 docs/tools/manpage.css
 create mode 100644 lib/Driver/MinGWToolChain.cpp
 create mode 100644 lib/Headers/avx512cdintrin.h
 create mode 100644 lib/Headers/fxsrintrin.h
 create mode 100644 lib/Headers/inttypes.h
 create mode 100644 test/Analysis/division-by-zero.c
 create mode 100644 test/Analysis/test-include-cpp.cpp
 create mode 100644 test/Analysis/test-include-cpp.h
 create mode 100644 test/Analysis/test-include.c
 create mode 100644 test/Analysis/test-include.h
 create mode 100644 test/CodeCompletion/macros-in-modules.c
 create mode 100644 test/CodeCompletion/macros-in-modules.m
 create mode 100644 test/CodeGen/avx512cdintrin.c
 create mode 100644 test/CodeGen/builtin-cpu-supports.c
 create mode 100644 test/CodeGen/debug-info-packed-struct.c
 create mode 100644 test/CodeGen/openmp_default_simd_align.c
 create mode 100644 test/CodeGenCXX/debug-info-method-nodebug.cpp
 create mode 100644 test/CodeGenCXX/destructor-crash.cpp
 create mode 100644 test/CodeGenCXX/trap-fnattr.cpp
 create mode 100644 test/CoverageMapping/implicit-def-in-macro.m
 create mode 100644 test/Driver/Inputs/debian_multiarch_tree/lib/powerpc64le-linux-gnu/.keep
 create mode 100644 test/Driver/Inputs/debian_multiarch_tree/usr/lib/gcc/powerpc64le-linux-gnu/4.5/crtbegin.o
 create mode 100644 test/Driver/Inputs/debian_multiarch_tree/usr/lib/powerpc64le-linux-gnu/.keep
 create mode 100644 test/Driver/ios-version-min.c
 create mode 100644 test/Driver/msan.c
 create mode 100644 test/Modules/Inputs/DebugModule.h
 create mode 100644 test/Modules/Inputs/ImportNameInDir.h
 create mode 100644 test/Modules/Inputs/NameInDir.framework/Headers/NameInDir.h
 create mode 100644 test/Modules/Inputs/NameInDir.framework/Modules/module.modulemap
 create mode 100644 test/Modules/Inputs/NameInDir2.framework/Headers/NameInDir2.h
 create mode 100644 test/Modules/Inputs/NameInDir2.framework/Modules/module.modulemap
 create mode 100644 test/Modules/Inputs/NameInDirInferred.framework/Headers/NameInDirInferred.h
 create mode 100644 test/Modules/Inputs/crash.h
 create mode 100644 test/Modules/Inputs/explicit-build-prefer-self/a.h
 create mode 100644 test/Modules/Inputs/explicit-build-prefer-self/b.h
 create mode 100644 test/Modules/Inputs/explicit-build-prefer-self/map
 create mode 100644 test/Modules/Inputs/explicit-build-prefer-self/x.h
 create mode 100644 test/Modules/Inputs/merge-class-definition-visibility/e.h
 create mode 100644 test/Modules/Inputs/submodule-visibility/c.h
 create mode 100644 test/Modules/Inputs/submodule-visibility/other.h
 create mode 100644 test/Modules/debug-info-moduleimport.m
 create mode 100644 test/Modules/explicit-build-prefer-self.cpp
 create mode 100644 test/Modules/framework-name.m
 create mode 100644 test/Modules/module-feature.m
 create mode 100644 test/Modules/signal.m
 create mode 100644 test/OpenMP/cancel_ast_print.cpp
 create mode 100644 test/OpenMP/cancel_messages.cpp
 create mode 100644 test/OpenMP/cancellation_point_ast_print.cpp
 create mode 100644 test/OpenMP/cancellation_point_codegen.cpp
 create mode 100644 test/OpenMP/cancellation_point_messages.cpp
 create mode 100644 test/OpenMP/task_depend_messages.cpp
 create mode 100644 test/Parser/cxx-concept-declaration.cpp
 create mode 100644 test/Parser/cxx-concepts-ambig-constraint-expr.cpp
 create mode 100644 test/Parser/cxx-concepts-requires-clause.cpp
 create mode 100644 test/Preprocessor/predefined-nullability.c
 create mode 100644 test/Sema/arm-microsoft-intrinsics.c
 create mode 100644 test/Sema/builtin-cpu-supports.c
 create mode 100644 test/SemaCXX/openmp_default_simd_align.cpp
 create mode 100644 test/SemaCXX/virtual-function-in-union.cpp
 create mode 100644 test/SemaTemplate/instantiate-explicitly-after-fatal.cpp

diff --git a/docs/AddressSanitizer.rst b/docs/AddressSanitizer.rst
index 6175433..66ed344 100644
--- a/docs/AddressSanitizer.rst
+++ b/docs/AddressSanitizer.rst
@@ -60,7 +60,28 @@ or:
     % clang -g -fsanitize=address example_UseAfterFree.o
 
 If a bug is detected, the program will print an error message to stderr and
-exit with a non-zero exit code. To make AddressSanitizer symbolize its output
+exit with a non-zero exit code. AddressSanitizer exits on the first detected error.
+This is by design:
+
+* This approach allows AddressSanitizer to produce faster and smaller generated code
+  (both by ~5%).
+* Fixing bugs becomes unavoidable. AddressSanitizer does not produce
+  false alarms. Once a memory corruption occurs, the program is in an inconsistent
+  state, which could lead to confusing results and potentially misleading
+  subsequent reports.
+
+If your process is sandboxed and you are running on OS X 10.10 or earlier, you
+will need to set ``DYLD_INSERT_LIBRARIES`` environment variable and point it to
+the ASan library that is packaged with the compiler used to build the
+executable. (You can find the library by searching for dynamic libraries with
+``asan`` in their name.) If the environment variable is not set, the process will
+try to re-exec. Also keep in mind that when moving the executable to another machine,
+the ASan library will also need to be copied over.
+
+Symbolizing the Reports
+=========================
+
+To make AddressSanitizer symbolize its output
 you need to set the ``ASAN_SYMBOLIZER_PATH`` environment variable to point to
 the ``llvm-symbolizer`` binary (or make sure ``llvm-symbolizer`` is in your
 ``$PATH``):
@@ -100,14 +121,63 @@ force disabled by setting ``ASAN_OPTIONS=symbolize=0``):
 Note that on OS X you may need to run ``dsymutil`` on your binary to have the
 file\:line info in the AddressSanitizer reports.
 
-AddressSanitizer exits on the first detected error. This is by design.
-One reason: it makes the generated code smaller and faster (both by
-~5%). Another reason: this makes fixing bugs unavoidable. With Valgrind,
-it is often the case that users treat Valgrind warnings as false
-positives (which they are not) and don't fix them.
+Additional Checks
+=================
 
-``__has_feature(address_sanitizer)``
-------------------------------------
+Initialization order checking
+-----------------------------
+
+AddressSanitizer can optionally detect dynamic initialization order problems,
+when initialization of globals defined in one translation unit uses
+globals defined in another translation unit. To enable this check at runtime,
+you should set environment variable
+``ASAN_OPTIONS=check_initialization_order=1``.
+
+Note that this option is not supported on OS X.
+
+Memory leak detection
+---------------------
+
+For more information on leak detector in AddressSanitizer, see
+:doc:`LeakSanitizer`. The leak detection is turned on by default on Linux;
+however, it is not yet supported on other platforms.
+
+Issue Suppression
+=================
+
+AddressSanitizer is not expected to produce false positives. If you see one,
+look again; most likely it is a true positive!
+
+Suppressing Reports in External Libraries
+-----------------------------------------
+Runtime interposition allows AddressSanitizer to find bugs in code that is
+not being recompiled. If you run into an issue in external libraries, we
+recommend immediately reporting it to the library maintainer so that it
+gets addressed. However, you can use the following suppression mechanism
+to unblock yourself and continue on with the testing. This suppression
+mechanism should only be used for suppressing issues in external code; it
+does not work on code recompiled with AddressSanitizer. To suppress errors
+in external libraries, set the ``ASAN_OPTIONS`` environment variable to point
+to a suppression file. You can either specify the full path to the file or the
+path of the file relative to the location of your executable.
+
+.. code-block:: bash
+
+    ASAN_OPTIONS=suppressions=MyASan.supp
+
+Use the following format to specify the names of the functions or libraries
+you want to suppress. You can see these in the error report. Remember that
+the narrower the scope of the suppression, the more bugs you will be able to
+catch.
+
+.. code-block:: bash
+
+    interceptor_via_fun:NameOfCFunctionToSuppress
+    interceptor_via_fun:-[ClassName objCMethodToSuppress:]
+    interceptor_via_lib:NameOfTheLibraryToSuppress
+
+Conditional Compilation with ``__has_feature(address_sanitizer)``
+-----------------------------------------------------------------
 
 In some cases one may need to execute different code depending on whether
 AddressSanitizer is enabled.
@@ -122,28 +192,19 @@ this purpose.
     #  endif
     #endif
 
-``__attribute__((no_sanitize_address))``
------------------------------------------------
+Disabling Instrumentation with ``__attribute__((no_sanitize("address")))``
+--------------------------------------------------------------------------
 
 Some code should not be instrumented by AddressSanitizer. One may use the
-function attribute
-:ref:`no_sanitize_address <langext-address_sanitizer>`
-(or a deprecated synonym `no_address_safety_analysis`)
-to disable instrumentation of a particular function. This attribute may not be
-supported by other compilers, so we suggest to use it together with
-``__has_feature(address_sanitizer)``.
-
-Initialization order checking
------------------------------
-
-AddressSanitizer can optionally detect dynamic initialization order problems,
-when initialization of globals defined in one translation unit uses
-globals defined in another translation unit. To enable this check at runtime,
-you should set environment variable
-``ASAN_OPTIONS=check_initialization_order=1``.
+function attribute ``__attribute__((no_sanitize("address")))``
+(which has deprecated synonyms
+:ref:`no_sanitize_address <langext-address_sanitizer>` and
+`no_address_safety_analysis`) to disable instrumentation of a particular
+function. This attribute may not be supported by other compilers, so we suggest
+to use it together with ``__has_feature(address_sanitizer)``.
 
-Blacklist
----------
+Suppressing Errors in Recompiled Code (Blacklist)
+-------------------------------------------------
 
 AddressSanitizer supports ``src`` and ``fun`` entity types in
 :doc:`SanitizerSpecialCaseList`, that can be used to suppress error reports
@@ -172,24 +233,6 @@ problems happening in certain source files or with certain global variables.
     type:*BadInitClassSubstring*=init
     src:bad/init/files/*=init
 
-Memory leak detection
----------------------
-
-For the experimental memory leak detector in AddressSanitizer, see
-:doc:`LeakSanitizer`.
-
-Supported Platforms
-===================
-
-AddressSanitizer is supported on
-
-* Linux i386/x86\_64 (tested on Ubuntu 12.04);
-* MacOS 10.6 - 10.9 (i386/x86\_64).
-* Android ARM
-* FreeBSD i386/x86\_64 (tested on FreeBSD 11-current)
-
-Ports to various other platforms are in progress.
-
 Limitations
 ===========
 
@@ -202,6 +245,19 @@ Limitations
   usually expected.
 * Static linking is not supported.
 
+Supported Platforms
+===================
+
+AddressSanitizer is supported on:
+
+* Linux i386/x86\_64 (tested on Ubuntu 12.04)
+* OS X 10.7 - 10.11 (i386/x86\_64)
+* iOS Simulator
+* Android ARM
+* FreeBSD i386/x86\_64 (tested on FreeBSD 11-current)
+
+Ports to various other platforms are in progress.
+
 Current Status
 ==============
 
diff --git a/docs/AttributeReference.rst b/docs/AttributeReference.rst
index 115a217..a763dde 100644
--- a/docs/AttributeReference.rst
+++ b/docs/AttributeReference.rst
@@ -1,1116 +1,13 @@
 ..
   -------------------------------------------------------------------
   NOTE: This file is automatically generated by running clang-tblgen
-  -gen-attr-docs. Do not edit this file by hand!!
+  -gen-attr-docs. Do not edit this file by hand!! The contents for
+  this file are automatically generated by a server-side process.
+  
+  Please do not commit this file. The file exists for local testing
+  purposes only.
   -------------------------------------------------------------------
 
 ===================
 Attributes in Clang
-===================
-.. contents::
-   :local:
-
-Introduction
-============
-
-This page lists the attributes currently supported by Clang.
-
-Function Attributes
-===================
-
-
-interrupt
----------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-Clang supports the GNU style ``__attribute__((interrupt("TYPE")))`` attribute on
-ARM targets. This attribute may be attached to a function definition and
-instructs the backend to generate appropriate function entry/exit code so that
-it can be used directly as an interrupt service routine.
-
-The parameter passed to the interrupt attribute is optional, but if
-provided it must be a string literal with one of the following values: "IRQ",
-"FIQ", "SWI", "ABORT", "UNDEF".
-
-The semantics are as follows:
-
-- If the function is AAPCS, Clang instructs the backend to realign the stack to
-  8 bytes on entry. This is a general requirement of the AAPCS at public
-  interfaces, but may not hold when an exception is taken. Doing this allows
-  other AAPCS functions to be called.
-- If the CPU is M-class this is all that needs to be done since the architecture
-  itself is designed in such a way that functions obeying the normal AAPCS ABI
-  constraints are valid exception handlers.
-- If the CPU is not M-class, the prologue and epilogue are modified to save all
-  non-banked registers that are used, so that upon return the user-mode state
-  will not be corrupted. Note that to avoid unnecessary overhead, only
-  general-purpose (integer) registers are saved in this way. If VFP operations
-  are needed, that state must be saved manually.
-
-  Specifically, interrupt kinds other than "FIQ" will save all core registers
-  except "lr" and "sp". "FIQ" interrupts will save r0-r7.
-- If the CPU is not M-class, the return instruction is changed to one of the
-  canonical sequences permitted by the architecture for exception return. Where
-  possible the function itself will make the necessary "lr" adjustments so that
-  the "preferred return address" is selected.
-
-  Unfortunately the compiler is unable to make this guarantee for an "UNDEF"
-  handler, where the offset from "lr" to the preferred return address depends on
-  the execution state of the code which generated the exception. In this case
-  a sequence equivalent to "movs pc, lr" will be used.
-
-
-acquire_capability (acquire_shared_capability, clang::acquire_capability, clang::acquire_shared_capability)
------------------------------------------------------------------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","",""
-
-Marks a function as acquiring a capability.
-
-
-assert_capability (assert_shared_capability, clang::assert_capability, clang::assert_shared_capability)
--------------------------------------------------------------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","",""
-
-Marks a function that dynamically tests whether a capability is held, and halts
-the program if it is not held.
-
-
-availability
-------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-The ``availability`` attribute can be placed on declarations to describe the
-lifecycle of that declaration relative to operating system versions.  Consider
-the function declaration for a hypothetical function ``f``:
-
-.. code-block:: c++
-
-  void f(void) __attribute__((availability(macosx,introduced=10.4,deprecated=10.6,obsoleted=10.7)));
-
-The availability attribute states that ``f`` was introduced in Mac OS X 10.4,
-deprecated in Mac OS X 10.6, and obsoleted in Mac OS X 10.7.  This information
-is used by Clang to determine when it is safe to use ``f``: for example, if
-Clang is instructed to compile code for Mac OS X 10.5, a call to ``f()``
-succeeds.  If Clang is instructed to compile code for Mac OS X 10.6, the call
-succeeds but Clang emits a warning specifying that the function is deprecated.
-Finally, if Clang is instructed to compile code for Mac OS X 10.7, the call
-fails because ``f()`` is no longer available.
-
-The availability attribute is a comma-separated list starting with the
-platform name and then including clauses specifying important milestones in the
-declaration's lifetime (in any order) along with additional information.  Those
-clauses can be:
-
-introduced=\ *version*
-  The first version in which this declaration was introduced.
-
-deprecated=\ *version*
-  The first version in which this declaration was deprecated, meaning that
-  users should migrate away from this API.
-
-obsoleted=\ *version*
-  The first version in which this declaration was obsoleted, meaning that it
-  was removed completely and can no longer be used.
-
-unavailable
-  This declaration is never available on this platform.
-
-message=\ *string-literal*
-  Additional message text that Clang will provide when emitting a warning or
-  error about use of a deprecated or obsoleted declaration.  Useful to direct
-  users to replacement APIs.
-
-Multiple availability attributes can be placed on a declaration, which may
-correspond to different platforms.  Only the availability attribute with the
-platform corresponding to the target platform will be used; any others will be
-ignored.  If no availability attribute specifies availability for the current
-target platform, the availability attributes are ignored.  Supported platforms
-are:
-
-``ios``
-  Apple's iOS operating system.  The minimum deployment target is specified by
-  the ``-mios-version-min=*version*`` or ``-miphoneos-version-min=*version*``
-  command-line arguments.
-
-``macosx``
-  Apple's Mac OS X operating system.  The minimum deployment target is
-  specified by the ``-mmacosx-version-min=*version*`` command-line argument.
-
-A declaration can be used even when deploying back to a platform version prior
-to when the declaration was introduced.  When this happens, the declaration is
-`weakly linked
-<https://developer.apple.com/library/mac/#documentation/MacOSX/Conceptual/BPFrameworks/Concepts/WeakLinking.html>`_,
-as if the ``weak_import`` attribute were added to the declaration.  A
-weakly-linked declaration may or may not be present a run-time, and a program
-can determine whether the declaration is present by checking whether the
-address of that declaration is non-NULL.
-
-If there are multiple declarations of the same entity, the availability
-attributes must either match on a per-platform basis or later
-declarations must not have availability attributes for that
-platform. For example:
-
-.. code-block:: c
-
-  void g(void) __attribute__((availability(macosx,introduced=10.4)));
-  void g(void) __attribute__((availability(macosx,introduced=10.4))); // okay, matches
-  void g(void) __attribute__((availability(ios,introduced=4.0))); // okay, adds a new platform
-  void g(void); // okay, inherits both macosx and ios availability from above.
-  void g(void) __attribute__((availability(macosx,introduced=10.5))); // error: mismatch
-
-When one method overrides another, the overriding method can be more widely available than the overridden method, e.g.,:
-
-.. code-block:: objc
-
-  @interface A
-  - (id)method __attribute__((availability(macosx,introduced=10.4)));
-  - (id)method2 __attribute__((availability(macosx,introduced=10.4)));
-  @end
-
-  @interface B : A
-  - (id)method __attribute__((availability(macosx,introduced=10.3))); // okay: method moved into base class later
-  - (id)method __attribute__((availability(macosx,introduced=10.5))); // error: this method was available via the base class in 10.4
-  @end
-
-
-_Noreturn
----------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "","","","X"
-
-A function declared as ``_Noreturn`` shall not return to its caller. The
-compiler will generate a diagnostic for a function declared as ``_Noreturn``
-that appears to be capable of returning to its caller.
-
-
-noreturn
---------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "","X","",""
-
-A function declared as ``[[noreturn]]`` shall not return to its caller. The
-compiler will generate a diagnostic for a function declared as ``[[noreturn]]``
-that appears to be capable of returning to its caller.
-
-
-carries_dependency
-------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","",""
-
-The ``carries_dependency`` attribute specifies dependency propagation into and
-out of functions.
-
-When specified on a function or Objective-C method, the ``carries_dependency``
-attribute means that the return value carries a dependency out of the function, 
-so that the implementation need not constrain ordering upon return from that
-function. Implementations of the function and its caller may choose to preserve
-dependencies instead of emitting memory ordering instructions such as fences.
-
-Note, this attribute does not change the meaning of the program, but may result
-in generation of more efficient code.
-
-
-enable_if
----------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-.. Note:: Some features of this attribute are experimental. The meaning of
-  multiple enable_if attributes on a single declaration is subject to change in
-  a future version of clang. Also, the ABI is not standardized and the name
-  mangling may change in future versions. To avoid that, use asm labels.
-
-The ``enable_if`` attribute can be placed on function declarations to control
-which overload is selected based on the values of the function's arguments.
-When combined with the ``overloadable`` attribute, this feature is also
-available in C.
-
-.. code-block:: c++
-
-  int isdigit(int c);
-  int isdigit(int c) __attribute__((enable_if(c <= -1 || c > 255, "chosen when 'c' is out of range"))) __attribute__((unavailable("'c' must have the value of an unsigned char or EOF")));
-  
-  void foo(char c) {
-    isdigit(c);
-    isdigit(10);
-    isdigit(-10);  // results in a compile-time error.
-  }
-
-The enable_if attribute takes two arguments, the first is an expression written
-in terms of the function parameters, the second is a string explaining why this
-overload candidate could not be selected to be displayed in diagnostics. The
-expression is part of the function signature for the purposes of determining
-whether it is a redeclaration (following the rules used when determining
-whether a C++ template specialization is ODR-equivalent), but is not part of
-the type.
-
-The enable_if expression is evaluated as if it were the body of a
-bool-returning constexpr function declared with the arguments of the function
-it is being applied to, then called with the parameters at the callsite. If the
-result is false or could not be determined through constant expression
-evaluation, then this overload will not be chosen and the provided string may
-be used in a diagnostic if the compile fails as a result.
-
-Because the enable_if expression is an unevaluated context, there are no global
-state changes, nor the ability to pass information from the enable_if
-expression to the function body. For example, suppose we want calls to
-strnlen(strbuf, maxlen) to resolve to strnlen_chk(strbuf, maxlen, size of
-strbuf) only if the size of strbuf can be determined:
-
-.. code-block:: c++
-
-  __attribute__((always_inline))
-  static inline size_t strnlen(const char *s, size_t maxlen)
-    __attribute__((overloadable))
-    __attribute__((enable_if(__builtin_object_size(s, 0) != -1))),
-                             "chosen when the buffer size is known but 'maxlen' is not")))
-  {
-    return strnlen_chk(s, maxlen, __builtin_object_size(s, 0));
-  }
-
-Multiple enable_if attributes may be applied to a single declaration. In this
-case, the enable_if expressions are evaluated from left to right in the
-following manner. First, the candidates whose enable_if expressions evaluate to
-false or cannot be evaluated are discarded. If the remaining candidates do not
-share ODR-equivalent enable_if expressions, the overload resolution is
-ambiguous. Otherwise, enable_if overload resolution continues with the next
-enable_if attribute on the candidates that have not been discarded and have
-remaining enable_if attributes. In this way, we pick the most specific
-overload out of a number of viable overloads using enable_if.
-
-.. code-block:: c++
-
-  void f() __attribute__((enable_if(true, "")));  // #1
-  void f() __attribute__((enable_if(true, ""))) __attribute__((enable_if(true, "")));  // #2
-  
-  void g(int i, int j) __attribute__((enable_if(i, "")));  // #1
-  void g(int i, int j) __attribute__((enable_if(j, ""))) __attribute__((enable_if(true)));  // #2
-
-In this example, a call to f() is always resolved to #2, as the first enable_if
-expression is ODR-equivalent for both declarations, but #1 does not have another
-enable_if expression to continue evaluating, so the next round of evaluation has
-only a single candidate. In a call to g(1, 1), the call is ambiguous even though
-#2 has more enable_if attributes, because the first enable_if expressions are
-not ODR-equivalent.
-
-Query for this feature with ``__has_attribute(enable_if)``.
-
-
-flatten (gnu::flatten)
-----------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","",""
-
-The ``flatten`` attribute causes calls within the attributed function to
-be inlined unless it is impossible to do so, for example if the body of the
-callee is unavailable or if the callee has the ``noinline`` attribute.
-
-
-format (gnu::format)
---------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","",""
-
-Clang supports the ``format`` attribute, which indicates that the function
-accepts a ``printf`` or ``scanf``-like format string and corresponding
-arguments or a ``va_list`` that contains these arguments.
-
-Please see `GCC documentation about format attribute
-<http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html>`_ to find details
-about attribute syntax.
-
-Clang implements two kinds of checks with this attribute.
-
-#. Clang checks that the function with the ``format`` attribute is called with
-   a format string that uses format specifiers that are allowed, and that
-   arguments match the format string.  This is the ``-Wformat`` warning, it is
-   on by default.
-
-#. Clang checks that the format string argument is a literal string.  This is
-   the ``-Wformat-nonliteral`` warning, it is off by default.
-
-   Clang implements this mostly the same way as GCC, but there is a difference
-   for functions that accept a ``va_list`` argument (for example, ``vprintf``).
-   GCC does not emit ``-Wformat-nonliteral`` warning for calls to such
-   fuctions.  Clang does not warn if the format string comes from a function
-   parameter, where the function is annotated with a compatible attribute,
-   otherwise it warns.  For example:
-
-   .. code-block:: c
-
-     __attribute__((__format__ (__scanf__, 1, 3)))
-     void foo(const char* s, char *buf, ...) {
-       va_list ap;
-       va_start(ap, buf);
-
-       vprintf(s, ap); // warning: format string is not a string literal
-     }
-
-   In this case we warn because ``s`` contains a format string for a
-   ``scanf``-like function, but it is passed to a ``printf``-like function.
-
-   If the attribute is removed, clang still warns, because the format string is
-   not a string literal.
-
-   Another example:
-
-   .. code-block:: c
-
-     __attribute__((__format__ (__printf__, 1, 3)))
-     void foo(const char* s, char *buf, ...) {
-       va_list ap;
-       va_start(ap, buf);
-
-       vprintf(s, ap); // warning
-     }
-
-   In this case Clang does not warn because the format string ``s`` and
-   the corresponding arguments are annotated.  If the arguments are
-   incorrect, the caller of ``foo`` will receive a warning.
-
-
-noduplicate (clang::noduplicate)
---------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","",""
-
-The ``noduplicate`` attribute can be placed on function declarations to control
-whether function calls to this function can be duplicated or not as a result of
-optimizations. This is required for the implementation of functions with
-certain special requirements, like the OpenCL "barrier" function, that might
-need to be run concurrently by all the threads that are executing in lockstep
-on the hardware. For example this attribute applied on the function
-"nodupfunc" in the code below avoids that:
-
-.. code-block:: c
-
-  void nodupfunc() __attribute__((noduplicate));
-  // Setting it as a C++11 attribute is also valid
-  // void nodupfunc() [[clang::noduplicate]];
-  void foo();
-  void bar();
-
-  nodupfunc();
-  if (a > n) {
-    foo();
-  } else {
-    bar();
-  }
-
-gets possibly modified by some optimizations into code similar to this:
-
-.. code-block:: c
-
-  if (a > n) {
-    nodupfunc();
-    foo();
-  } else {
-    nodupfunc();
-    bar();
-  }
-
-where the call to "nodupfunc" is duplicated and sunk into the two branches
-of the condition.
-
-
-no_sanitize_address (no_address_safety_analysis, gnu::no_address_safety_analysis, gnu::no_sanitize_address)
------------------------------------------------------------------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","",""
-
-.. _langext-address_sanitizer:
-
-Use ``__attribute__((no_sanitize_address))`` on a function declaration to
-specify that address safety instrumentation (e.g. AddressSanitizer) should
-not be applied to that function.
-
-
-no_sanitize_memory
-------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-.. _langext-memory_sanitizer:
-
-Use ``__attribute__((no_sanitize_memory))`` on a function declaration to
-specify that checks for uninitialized memory should not be inserted 
-(e.g. by MemorySanitizer). The function may still be instrumented by the tool
-to avoid false positives in other places.
-
-
-no_sanitize_thread
-------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-.. _langext-thread_sanitizer:
-
-Use ``__attribute__((no_sanitize_thread))`` on a function declaration to
-specify that checks for data races on plain (non-atomic) memory accesses should
-not be inserted by ThreadSanitizer. The function is still instrumented by the
-tool to avoid false positives and provide meaningful stack traces.
-
-
-no_split_stack (gnu::no_split_stack)
-------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","",""
-
-The ``no_split_stack`` attribute disables the emission of the split stack
-preamble for a particular function. It has no effect if ``-fsplit-stack``
-is not specified.
-
-
-objc_method_family
-------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-Many methods in Objective-C have conventional meanings determined by their
-selectors. It is sometimes useful to be able to mark a method as having a
-particular conventional meaning despite not having the right selector, or as
-not having the conventional meaning that its selector would suggest. For these
-use cases, we provide an attribute to specifically describe the "method family"
-that a method belongs to.
-
-**Usage**: ``__attribute__((objc_method_family(X)))``, where ``X`` is one of
-``none``, ``alloc``, ``copy``, ``init``, ``mutableCopy``, or ``new``.  This
-attribute can only be placed at the end of a method declaration:
-
-.. code-block:: objc
-
-  - (NSString *)initMyStringValue __attribute__((objc_method_family(none)));
-
-Users who do not wish to change the conventional meaning of a method, and who
-merely want to document its non-standard retain and release semantics, should
-use the retaining behavior attributes (``ns_returns_retained``,
-``ns_returns_not_retained``, etc).
-
-Query for this feature with ``__has_attribute(objc_method_family)``.
-
-
-objc_requires_super
--------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-Some Objective-C classes allow a subclass to override a particular method in a
-parent class but expect that the overriding method also calls the overridden
-method in the parent class. For these cases, we provide an attribute to
-designate that a method requires a "call to ``super``" in the overriding
-method in the subclass.
-
-**Usage**: ``__attribute__((objc_requires_super))``.  This attribute can only
-be placed at the end of a method declaration:
-
-.. code-block:: objc
-
-  - (void)foo __attribute__((objc_requires_super));
-
-This attribute can only be applied the method declarations within a class, and
-not a protocol.  Currently this attribute does not enforce any placement of
-where the call occurs in the overriding method (such as in the case of
-``-dealloc`` where the call must appear at the end).  It checks only that it
-exists.
-
-Note that on both OS X and iOS that the Foundation framework provides a
-convenience macro ``NS_REQUIRES_SUPER`` that provides syntactic sugar for this
-attribute:
-
-.. code-block:: objc
-
-  - (void)foo NS_REQUIRES_SUPER;
-
-This macro is conditionally defined depending on the compiler's support for
-this attribute.  If the compiler does not support the attribute the macro
-expands to nothing.
-
-Operationally, when a method has this annotation the compiler will warn if the
-implementation of an override in a subclass does not call super.  For example:
-
-.. code-block:: objc
-
-   warning: method possibly missing a [super AnnotMeth] call
-   - (void) AnnotMeth{};
-                      ^
-
-
-optnone (clang::optnone)
-------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","",""
-
-The ``optnone`` attribute suppresses essentially all optimizations
-on a function or method, regardless of the optimization level applied to
-the compilation unit as a whole.  This is particularly useful when you
-need to debug a particular function, but it is infeasible to build the
-entire application without optimization.  Avoiding optimization on the
-specified function can improve the quality of the debugging information
-for that function.
-
-This attribute is incompatible with the ``always_inline`` attribute.
-
-
-overloadable
-------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-Clang provides support for C++ function overloading in C.  Function overloading
-in C is introduced using the ``overloadable`` attribute.  For example, one
-might provide several overloaded versions of a ``tgsin`` function that invokes
-the appropriate standard function computing the sine of a value with ``float``,
-``double``, or ``long double`` precision:
-
-.. code-block:: c
-
-  #include <math.h>
-  float __attribute__((overloadable)) tgsin(float x) { return sinf(x); }
-  double __attribute__((overloadable)) tgsin(double x) { return sin(x); }
-  long double __attribute__((overloadable)) tgsin(long double x) { return sinl(x); }
-
-Given these declarations, one can call ``tgsin`` with a ``float`` value to
-receive a ``float`` result, with a ``double`` to receive a ``double`` result,
-etc.  Function overloading in C follows the rules of C++ function overloading
-to pick the best overload given the call arguments, with a few C-specific
-semantics:
-
-* Conversion from ``float`` or ``double`` to ``long double`` is ranked as a
-  floating-point promotion (per C99) rather than as a floating-point conversion
-  (as in C++).
-
-* A conversion from a pointer of type ``T*`` to a pointer of type ``U*`` is
-  considered a pointer conversion (with conversion rank) if ``T`` and ``U`` are
-  compatible types.
-
-* A conversion from type ``T`` to a value of type ``U`` is permitted if ``T``
-  and ``U`` are compatible types.  This conversion is given "conversion" rank.
-
-The declaration of ``overloadable`` functions is restricted to function
-declarations and definitions.  Most importantly, if any function with a given
-name is given the ``overloadable`` attribute, then all function declarations
-and definitions with that name (and in that scope) must have the
-``overloadable`` attribute.  This rule even applies to redeclarations of
-functions whose original declaration had the ``overloadable`` attribute, e.g.,
-
-.. code-block:: c
-
-  int f(int) __attribute__((overloadable));
-  float f(float); // error: declaration of "f" must have the "overloadable" attribute
-
-  int g(int) __attribute__((overloadable));
-  int g(int) { } // error: redeclaration of "g" must also have the "overloadable" attribute
-
-Functions marked ``overloadable`` must have prototypes.  Therefore, the
-following code is ill-formed:
-
-.. code-block:: c
-
-  int h() __attribute__((overloadable)); // error: h does not have a prototype
-
-However, ``overloadable`` functions are allowed to use a ellipsis even if there
-are no named parameters (as is permitted in C++).  This feature is particularly
-useful when combined with the ``unavailable`` attribute:
-
-.. code-block:: c++
-
-  void honeypot(...) __attribute__((overloadable, unavailable)); // calling me is an error
-
-Functions declared with the ``overloadable`` attribute have their names mangled
-according to the same rules as C++ function names.  For example, the three
-``tgsin`` functions in our motivating example get the mangled names
-``_Z5tgsinf``, ``_Z5tgsind``, and ``_Z5tgsine``, respectively.  There are two
-caveats to this use of name mangling:
-
-* Future versions of Clang may change the name mangling of functions overloaded
-  in C, so you should not depend on an specific mangling.  To be completely
-  safe, we strongly urge the use of ``static inline`` with ``overloadable``
-  functions.
-
-* The ``overloadable`` attribute has almost no meaning when used in C++,
-  because names will already be mangled and functions are already overloadable.
-  However, when an ``overloadable`` function occurs within an ``extern "C"``
-  linkage specification, it's name *will* be mangled in the same way as it
-  would in C.
-
-Query for this feature with ``__has_extension(attribute_overloadable)``.
-
-
-pcs (gnu::pcs)
---------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","",""
-
-On ARM targets, this can attribute can be used to select calling conventions,
-similar to ``stdcall`` on x86. Valid parameter values are "aapcs" and
-"aapcs-vfp".
-
-
-release_capability (release_shared_capability, clang::release_capability, clang::release_shared_capability)
------------------------------------------------------------------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","",""
-
-Marks a function as releasing a capability.
-
-
-try_acquire_capability (try_acquire_shared_capability, clang::try_acquire_capability, clang::try_acquire_shared_capability)
----------------------------------------------------------------------------------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","",""
-
-Marks a function that attempts to acquire a capability. This function may fail to
-actually acquire the capability; they accept a Boolean value determining
-whether acquiring the capability means success (true), or failing to acquire
-the capability means success (false).
-
-
-Variable Attributes
-===================
-
-
-section (gnu::section, __declspec(allocate))
---------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","X",""
-
-The ``section`` attribute allows you to specify a specific section a
-global variable or function should be in after translation.
-
-
-tls_model (gnu::tls_model)
---------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","X","",""
-
-The ``tls_model`` attribute allows you to specify which thread-local storage
-model to use. It accepts the following strings:
-
-* global-dynamic
-* local-dynamic
-* initial-exec
-* local-exec
-
-TLS models are mutually exclusive.
-
-
-thread
-------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "","","X",""
-
-The ``__declspec(thread)`` attribute declares a variable with thread local
-storage.  It is available under the ``-fms-extensions`` flag for MSVC
-compatibility.  Documentation for the Visual C++ attribute is available on MSDN_.
-
-.. _MSDN: http://msdn.microsoft.com/en-us/library/9w1sdazb.aspx
-
-In Clang, ``__declspec(thread)`` is generally equivalent in functionality to the
-GNU ``__thread`` keyword.  The variable must not have a destructor and must have
-a constant initializer, if any.  The attribute only applies to variables
-declared with static storage duration, such as globals, class static data
-members, and static locals.
-
-
-Type Attributes
-===============
-
-
-__single_inhertiance, __multiple_inheritance, __virtual_inheritance
--------------------------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "","","","X"
-
-This collection of keywords is enabled under ``-fms-extensions`` and controls
-the pointer-to-member representation used on ``*-*-win32`` targets.
-
-The ``*-*-win32`` targets utilize a pointer-to-member representation which
-varies in size and alignment depending on the definition of the underlying
-class.
-
-However, this is problematic when a forward declaration is only available and
-no definition has been made yet.  In such cases, Clang is forced to utilize the
-most general representation that is available to it.
-
-These keywords make it possible to use a pointer-to-member representation other
-than the most general one regardless of whether or not the definition will ever
-be present in the current translation unit.
-
-This family of keywords belong between the ``class-key`` and ``class-name``:
-
-.. code-block:: c++
-
-  struct __single_inheritance S;
-  int S::*i;
-  struct S {};
-
-This keyword can be applied to class templates but only has an effect when used
-on full specializations:
-
-.. code-block:: c++
-
-  template <typename T, typename U> struct __single_inheritance A; // warning: inheritance model ignored on primary template
-  template <typename T> struct __multiple_inheritance A<T, T>; // warning: inheritance model ignored on partial specialization
-  template <> struct __single_inheritance A<int, float>;
-
-Note that choosing an inheritance model less general than strictly necessary is
-an error:
-
-.. code-block:: c++
-
-  struct __multiple_inheritance S; // error: inheritance model does not match definition
-  int S::*i;
-  struct S {};
-
-
-Statement Attributes
-====================
-
-
-fallthrough (clang::fallthrough)
---------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "","X","",""
-
-The ``clang::fallthrough`` attribute is used along with the
-``-Wimplicit-fallthrough`` argument to annotate intentional fall-through
-between switch labels.  It can only be applied to a null statement placed at a
-point of execution between any statement and the next switch label.  It is
-common to mark these places with a specific comment, but this attribute is
-meant to replace comments with a more strict annotation, which can be checked
-by the compiler.  This attribute doesn't change semantics of the code and can
-be used wherever an intended fall-through occurs.  It is designed to mimic
-control-flow statements like ``break;``, so it can be placed in most places
-where ``break;`` can, but only if there are no statements on the execution path
-between it and the next switch label.
-
-Here is an example:
-
-.. code-block:: c++
-
-  // compile with -Wimplicit-fallthrough
-  switch (n) {
-  case 22:
-  case 33:  // no warning: no statements between case labels
-    f();
-  case 44:  // warning: unannotated fall-through
-    g();
-    [[clang::fallthrough]];
-  case 55:  // no warning
-    if (x) {
-      h();
-      break;
-    }
-    else {
-      i();
-      [[clang::fallthrough]];
-    }
-  case 66:  // no warning
-    p();
-    [[clang::fallthrough]]; // warning: fallthrough annotation does not
-                            //          directly precede case label
-    q();
-  case 77:  // warning: unannotated fall-through
-    r();
-  }
-
-
-Consumed Annotation Checking
-============================
-Clang supports additional attributes for checking basic resource management
-properties, specifically for unique objects that have a single owning reference.
-The following attributes are currently supported, although **the implementation
-for these annotations is currently in development and are subject to change.**
-
-callable_when
--------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-Use ``__attribute__((callable_when(...)))`` to indicate what states a method
-may be called in.  Valid states are unconsumed, consumed, or unknown.  Each
-argument to this attribute must be a quoted string.  E.g.:
-
-``__attribute__((callable_when("unconsumed", "unknown")))``
-
-
-consumable
-----------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-Each ``class`` that uses any of the typestate annotations must first be marked
-using the ``consumable`` attribute.  Failure to do so will result in a warning.
-
-This attribute accepts a single parameter that must be one of the following:
-``unknown``, ``consumed``, or ``unconsumed``.
-
-
-param_typestate
----------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-This attribute specifies expectations about function parameters.  Calls to an
-function with annotated parameters will issue a warning if the corresponding
-argument isn't in the expected state.  The attribute is also used to set the
-initial state of the parameter when analyzing the function's body.
-
-
-return_typestate
-----------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-The ``return_typestate`` attribute can be applied to functions or parameters.
-When applied to a function the attribute specifies the state of the returned
-value.  The function's body is checked to ensure that it always returns a value
-in the specified state.  On the caller side, values returned by the annotated
-function are initialized to the given state.
-
-When applied to a function parameter it modifies the state of an argument after
-a call to the function returns.  The function's body is checked to ensure that
-the parameter is in the expected state before returning.
-
-
-set_typestate
--------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-Annotate methods that transition an object into a new state with
-``__attribute__((set_typestate(new_state)))``.  The new new state must be
-unconsumed, consumed, or unknown.
-
-
-test_typestate
---------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-Use ``__attribute__((test_typestate(tested_state)))`` to indicate that a method
-returns true if the object is in the specified state..
-
-
-Type Safety Checking
-====================
-Clang supports additional attributes to enable checking type safety properties
-that can't be enforced by the C type system.  Use cases include:
-
-* MPI library implementations, where these attributes enable checking that
-  the buffer type matches the passed ``MPI_Datatype``;
-* for HDF5 library there is a similar use case to MPI;
-* checking types of variadic functions' arguments for functions like
-  ``fcntl()`` and ``ioctl()``.
-
-You can detect support for these attributes with ``__has_attribute()``.  For
-example:
-
-.. code-block:: c++
-
-  #if defined(__has_attribute)
-  #  if __has_attribute(argument_with_type_tag) && \
-        __has_attribute(pointer_with_type_tag) && \
-        __has_attribute(type_tag_for_datatype)
-  #    define ATTR_MPI_PWT(buffer_idx, type_idx) __attribute__((pointer_with_type_tag(mpi,buffer_idx,type_idx)))
-  /* ... other macros ...  */
-  #  endif
-  #endif
-
-  #if !defined(ATTR_MPI_PWT)
-  # define ATTR_MPI_PWT(buffer_idx, type_idx)
-  #endif
-
-  int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */)
-      ATTR_MPI_PWT(1,3);
-
-argument_with_type_tag
-----------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-Use ``__attribute__((argument_with_type_tag(arg_kind, arg_idx,
-type_tag_idx)))`` on a function declaration to specify that the function
-accepts a type tag that determines the type of some other argument.
-``arg_kind`` is an identifier that should be used when annotating all
-applicable type tags.
-
-This attribute is primarily useful for checking arguments of variadic functions
-(``pointer_with_type_tag`` can be used in most non-variadic cases).
-
-For example:
-
-.. code-block:: c++
-
-  int fcntl(int fd, int cmd, ...)
-      __attribute__(( argument_with_type_tag(fcntl,3,2) ));
-
-
-pointer_with_type_tag
----------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-Use ``__attribute__((pointer_with_type_tag(ptr_kind, ptr_idx, type_tag_idx)))``
-on a function declaration to specify that the function accepts a type tag that
-determines the pointee type of some other pointer argument.
-
-For example:
-
-.. code-block:: c++
-
-  int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */)
-      __attribute__(( pointer_with_type_tag(mpi,1,3) ));
-
-
-type_tag_for_datatype
----------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword"
-
-   "X","","",""
-
-Clang supports annotating type tags of two forms.
-
-* **Type tag that is an expression containing a reference to some declared
-  identifier.** Use ``__attribute__((type_tag_for_datatype(kind, type)))`` on a
-  declaration with that identifier:
-
-  .. code-block:: c++
-
-    extern struct mpi_datatype mpi_datatype_int
-        __attribute__(( type_tag_for_datatype(mpi,int) ));
-    #define MPI_INT ((MPI_Datatype) &mpi_datatype_int)
-
-* **Type tag that is an integral literal.** Introduce a ``static const``
-  variable with a corresponding initializer value and attach
-  ``__attribute__((type_tag_for_datatype(kind, type)))`` on that declaration,
-  for example:
-
-  .. code-block:: c++
-
-    #define MPI_INT ((MPI_Datatype) 42)
-    static const MPI_Datatype mpi_datatype_int
-        __attribute__(( type_tag_for_datatype(mpi,int) )) = 42
-
-The attribute also accepts an optional third argument that determines how the
-expression is compared to the type tag.  There are two supported flags:
-
-* ``layout_compatible`` will cause types to be compared according to
-  layout-compatibility rules (C++11 [class.mem] p 17, 18).  This is
-  implemented to support annotating types like ``MPI_DOUBLE_INT``.
-
-  For example:
-
-  .. code-block:: c++
-
-    /* In mpi.h */
-    struct internal_mpi_double_int { double d; int i; };
-    extern struct mpi_datatype mpi_datatype_double_int
-        __attribute__(( type_tag_for_datatype(mpi, struct internal_mpi_double_int, layout_compatible) ));
-
-    #define MPI_DOUBLE_INT ((MPI_Datatype) &mpi_datatype_double_int)
-
-    /* In user code */
-    struct my_pair { double a; int b; };
-    struct my_pair *buffer;
-    MPI_Send(buffer, 1, MPI_DOUBLE_INT /*, ...  */); // no warning
-
-    struct my_int_pair { int a; int b; }
-    struct my_int_pair *buffer2;
-    MPI_Send(buffer2, 1, MPI_DOUBLE_INT /*, ...  */); // warning: actual buffer element
-                                                      // type 'struct my_int_pair'
-                                                      // doesn't match specified MPI_Datatype
-
-* ``must_be_null`` specifies that the expression should be a null pointer
-  constant, for example:
-
-  .. code-block:: c++
-
-    /* In mpi.h */
-    extern struct mpi_datatype mpi_datatype_null
-        __attribute__(( type_tag_for_datatype(mpi, void, must_be_null) ));
-
-    #define MPI_DATATYPE_NULL ((MPI_Datatype) &mpi_datatype_null)
-
-    /* In user code */
-    MPI_Send(buffer, 1, MPI_DATATYPE_NULL /*, ...  */); // warning: MPI_DATATYPE_NULL
-                                                        // was specified but buffer
-                                                        // is not a null pointer
-
-
+===================
\ No newline at end of file
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index 20a1396..47bb2e0 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -87,5 +87,8 @@ if (LLVM_ENABLE_SPHINX)
     if (${SPHINX_OUTPUT_HTML})
       add_sphinx_target(html clang)
     endif()
+    if (${SPHINX_OUTPUT_MAN})
+      add_sphinx_target(man clang)
+    endif()
   endif()
 endif()
diff --git a/docs/ClangFormatStyleOptions.rst b/docs/ClangFormatStyleOptions.rst
index f4c4c8c..ab7bd87 100644
--- a/docs/ClangFormatStyleOptions.rst
+++ b/docs/ClangFormatStyleOptions.rst
@@ -218,12 +218,19 @@ the configuration (without a prefix: ``Auto``).
   If ``true``, ``while (true) continue;`` can be put on a
   single line.
 
-**AlwaysBreakAfterDefinitionReturnType** (``bool``)
-  If ``true``, always break after function definition return types.
+**AlwaysBreakAfterDefinitionReturnType** (``DefinitionReturnTypeBreakingStyle``)
+  The function definition return type breaking style to use.
+
+  Possible values:
+
+  * ``DRTBS_None`` (in configuration: ``None``)
+    Break after return type automatically.
+    ``PenaltyReturnTypeOnItsOwnLine`` is taken into account.
+  * ``DRTBS_All`` (in configuration: ``All``)
+    Always break after the return type.
+  * ``DRTBS_TopLevel`` (in configuration: ``TopLevel``)
+    Always break after the return types of top level functions.
 
-  More truthfully called 'break before the identifier following the type
-  in a function definition'. PenaltyReturnTypeOnItsOwnLine becomes
-  irrelevant.
 
 **AlwaysBreakBeforeMultilineStrings** (``bool``)
   If ``true``, always break before multiline string literals.
@@ -327,7 +334,7 @@ the configuration (without a prefix: ``Auto``).
   alignment of & and \*. ``PointerAlignment`` is then used only as fallback.
 
 **DisableFormat** (``bool``)
-  Disables formatting at all.
+  Disables formatting completely.
 
 **ExperimentalAutoDetectBinPacking** (``bool``)
   If ``true``, clang-format detects whether function calls and
diff --git a/docs/CommandGuide/clang.rst b/docs/CommandGuide/clang.rst
new file mode 100644
index 0000000..39dbe02
--- /dev/null
+++ b/docs/CommandGuide/clang.rst
@@ -0,0 +1,484 @@
+clang - the Clang C, C++, and Objective-C compiler
+==================================================
+
+SYNOPSIS
+--------
+
+:program:`clang` [*options*] *filename ...*
+
+DESCRIPTION
+-----------
+
+:program:`clang` is a C, C++, and Objective-C compiler which encompasses
+preprocessing, parsing, optimization, code generation, assembly, and linking.
+Depending on which high-level mode setting is passed, Clang will stop before
+doing a full link.  While Clang is highly integrated, it is important to
+understand the stages of compilation, to understand how to invoke it.  These
+stages are:
+
+Driver
+    The clang executable is actually a small driver which controls the overall
+    execution of other tools such as the compiler, assembler and linker.
+    Typically you do not need to interact with the driver, but you
+    transparently use it to run the other tools.
+
+Preprocessing
+    This stage handles tokenization of the input source file, macro expansion,
+    #include expansion and handling of other preprocessor directives.  The
+    output of this stage is typically called a ".i" (for C), ".ii" (for C++),
+    ".mi" (for Objective-C), or ".mii" (for Objective-C++) file.
+
+Parsing and Semantic Analysis
+    This stage parses the input file, translating preprocessor tokens into a
+    parse tree.  Once in the form of a parse tree, it applies semantic
+    analysis to compute types for expressions as well and determine whether
+    the code is well formed. This stage is responsible for generating most of
+    the compiler warnings as well as parse errors. The output of this stage is
+    an "Abstract Syntax Tree" (AST).
+
+Code Generation and Optimization
+    This stage translates an AST into low-level intermediate code (known as
+    "LLVM IR") and ultimately to machine code.  This phase is responsible for
+    optimizing the generated code and handling target-specific code generation.
+    The output of this stage is typically called a ".s" file or "assembly" file.
+
+    Clang also supports the use of an integrated assembler, in which the code
+    generator produces object files directly. This avoids the overhead of
+    generating the ".s" file and of calling the target assembler.
+
+Assembler
+    This stage runs the target assembler to translate the output of the
+    compiler into a target object file. The output of this stage is typically
+    called a ".o" file or "object" file.
+
+Linker
+    This stage runs the target linker to merge multiple object files into an
+    executable or dynamic library. The output of this stage is typically called
+    an "a.out", ".dylib" or ".so" file.
+
+:program:`Clang Static Analyzer`
+
+The Clang Static Analyzer is a tool that scans source code to try to find bugs
+through code analysis.  This tool uses many parts of Clang and is built into
+the same driver.  Please see <http://clang-analyzer.llvm.org> for more details
+on how to use the static analyzer.
+
+OPTIONS
+-------
+
+Stage Selection Options
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. option:: -E
+
+ Run the preprocessor stage.
+
+.. option:: -fsyntax-only
+
+ Run the preprocessor, parser and type checking stages.
+
+.. option:: -S
+
+ Run the previous stages as well as LLVM generation and optimization stages
+ and target-specific code generation, producing an assembly file.
+
+.. option:: -c
+
+ Run all of the above, plus the assembler, generating a target ".o" object file.
+
+.. option:: no stage selection option
+
+ If no stage selection option is specified, all stages above are run, and the
+ linker is run to combine the results into an executable or shared library.
+
+Language Selection and Mode Options
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. option:: -x <language>
+
+ Treat subsequent input files as having type language.
+
+.. option:: -std=<language>
+
+ Specify the language standard to compile for.
+
+.. option:: -stdlib=<library>
+
+ Specify the C++ standard library to use; supported options are libstdc++ and
+ libc++.
+
+.. option:: -ansi
+
+ Same as -std=c89.
+
+.. option:: -ObjC, -ObjC++
+
+ Treat source input files as Objective-C and Object-C++ inputs respectively.
+
+.. option:: -trigraphs
+
+ Enable trigraphs.
+
+.. option:: -ffreestanding
+
+ Indicate that the file should be compiled for a freestanding, not a hosted,
+ environment.
+
+.. option:: -fno-builtin
+
+ Disable special handling and optimizations of builtin functions like
+ :c:func:`strlen` and :c:func:`malloc`.
+
+.. option:: -fmath-errno
+
+ Indicate that math functions should be treated as updating :c:data:`errno`.
+
+.. option:: -fpascal-strings
+
+ Enable support for Pascal-style strings with "\\pfoo".
+
+.. option:: -fms-extensions
+
+ Enable support for Microsoft extensions.
+
+.. option:: -fmsc-version=
+
+ Set _MSC_VER. Defaults to 1300 on Windows. Not set otherwise.
+
+.. option:: -fborland-extensions
+
+ Enable support for Borland extensions.
+
+.. option:: -fwritable-strings
+
+ Make all string literals default to writable.  This disables uniquing of
+ strings and other optimizations.
+
+.. option:: -flax-vector-conversions
+
+ Allow loose type checking rules for implicit vector conversions.
+
+.. option:: -fblocks
+
+ Enable the "Blocks" language feature.
+
+.. option:: -fobjc-gc-only
+
+ Indicate that Objective-C code should be compiled in GC-only mode, which only
+ works when Objective-C Garbage Collection is enabled.
+
+.. option:: -fobjc-gc
+
+ Indicate that Objective-C code should be compiled in hybrid-GC mode, which
+ works with both GC and non-GC mode.
+
+.. option:: -fobjc-abi-version=version
+
+ Select the Objective-C ABI version to use. Available versions are 1 (legacy
+ "fragile" ABI), 2 (non-fragile ABI 1), and 3 (non-fragile ABI 2).
+
+.. option:: -fobjc-nonfragile-abi-version=<version>
+
+ Select the Objective-C non-fragile ABI version to use by default. This will
+ only be used as the Objective-C ABI when the non-fragile ABI is enabled
+ (either via :option:`-fobjc-nonfragile-abi`, or because it is the platform
+ default).
+
+.. option:: -fobjc-nonfragile-abi
+
+ Enable use of the Objective-C non-fragile ABI. On platforms for which this is
+ the default ABI, it can be disabled with :option:`-fno-objc-nonfragile-abi`.
+
+Target Selection Options
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Clang fully supports cross compilation as an inherent part of its design.
+Depending on how your version of Clang is configured, it may have support for a
+number of cross compilers, or may only support a native target.
+
+.. option:: -arch <architecture>
+
+  Specify the architecture to build for.
+
+.. option:: -mmacosx-version-min=<version>
+
+  When building for Mac OS X, specify the minimum version supported by your
+  application.
+
+.. option:: -miphoneos-version-min
+
+  When building for iPhone OS, specify the minimum version supported by your
+  application.
+
+.. option:: -march=<cpu>
+
+  Specify that Clang should generate code for a specific processor family
+  member and later.  For example, if you specify -march=i486, the compiler is
+  allowed to generate instructions that are valid on i486 and later processors,
+  but which may not exist on earlier ones.
+
+
+Code Generation Options
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. option:: -O0, -O1, -O2, -O3, -Ofast, -Os, -Oz, -O, -O4
+
+  Specify which optimization level to use:
+
+    :option:`-O0` Means "no optimization": this level compiles the fastest and
+    generates the most debuggable code.
+
+    :option:`-O1` Somewhere between :option:`-O0` and :option:`-O2`.
+
+    :option:`-O2` Moderate level of optimization which enables most
+    optimizations.
+
+    :option:`-O3` Like :option:`-O2`, except that it enables optimizations that
+    take longer to perform or that may generate larger code (in an attempt to
+    make the program run faster).
+
+    :option:`-Ofast` Enables all the optimizations from :option:`-O3` along
+    with other aggressive optimizations that may violate strict compliance with
+    language standards.
+
+    :option:`-Os` Like :option:`-O2` with extra optimizations to reduce code
+    size.
+
+    :option:`-Oz` Like :option:`-Os` (and thus :option:`-O2`), but reduces code
+    size further.
+
+    :option:`-O` Equivalent to :option:`-O2`.
+
+    :option:`-O4` and higher
+
+      Currently equivalent to :option:`-O3`
+
+.. option:: -g
+
+  Generate debug information.  Note that Clang debug information works best at -O0.
+
+.. option:: -fstandalone-debug -fno-standalone-debug
+
+  Clang supports a number of optimizations to reduce the size of debug
+  information in the binary. They work based on the assumption that the
+  debug type information can be spread out over multiple compilation units.
+  For instance, Clang will not emit type definitions for types that are not
+  needed by a module and could be replaced with a forward declaration.
+  Further, Clang will only emit type info for a dynamic C++ class in the
+  module that contains the vtable for the class.
+
+  The :option:`-fstandalone-debug` option turns off these optimizations.
+  This is useful when working with 3rd-party libraries that don't come with
+  debug information.  This is the default on Darwin.  Note that Clang will
+  never emit type information for types that are not referenced at all by the
+  program.
+
+.. option:: -fexceptions
+
+  Enable generation of unwind information. This allows exceptions to be thrown
+  through Clang compiled stack frames.  This is on by default in x86-64.
+
+.. option:: -ftrapv
+
+  Generate code to catch integer overflow errors.  Signed integer overflow is
+  undefined in C. With this flag, extra code is generated to detect this and
+  abort when it happens.
+
+.. option:: -fvisibility
+
+  This flag sets the default visibility level.
+
+.. option:: -fcommon
+
+  This flag specifies that variables without initializers get common linkage.
+  It can be disabled with :option:`-fno-common`.
+
+.. option:: -ftls-model=<model>
+
+  Set the default thread-local storage (TLS) model to use for thread-local
+  variables. Valid values are: "global-dynamic", "local-dynamic",
+  "initial-exec" and "local-exec". The default is "global-dynamic". The default
+  model can be overridden with the tls_model attribute. The compiler will try
+  to choose a more efficient model if possible.
+
+.. option:: -flto, -emit-llvm
+
+  Generate output files in LLVM formats, suitable for link time optimization.
+  When used with :option:`-S` this generates LLVM intermediate language
+  assembly files, otherwise this generates LLVM bitcode format object files
+  (which may be passed to the linker depending on the stage selection options).
+
+Driver Options
+~~~~~~~~~~~~~~
+
+.. option:: -###
+
+  Print (but do not run) the commands to run for this compilation.
+
+.. option:: --help
+
+  Display available options.
+
+.. option:: -Qunused-arguments
+
+  Do not emit any warnings for unused driver arguments.
+
+.. option:: -Wa,<args>
+
+  Pass the comma separated arguments in args to the assembler.
+
+.. option:: -Wl,<args>
+
+  Pass the comma separated arguments in args to the linker.
+
+.. option:: -Wp,<args>
+
+  Pass the comma separated arguments in args to the preprocessor.
+
+.. option:: -Xanalyzer <arg>
+
+  Pass arg to the static analyzer.
+
+.. option:: -Xassembler <arg>
+
+  Pass arg to the assembler.
+
+.. option:: -Xlinker <arg>
+
+  Pass arg to the linker.
+
+.. option:: -Xpreprocessor <arg>
+
+  Pass arg to the preprocessor.
+
+.. option:: -o <file>
+
+  Write output to file.
+
+.. option:: -print-file-name=<file>
+
+  Print the full library path of file.
+
+.. option:: -print-libgcc-file-name
+
+  Print the library path for "libgcc.a".
+
+.. option:: -print-prog-name=<name>
+
+  Print the full program path of name.
+
+.. option:: -print-search-dirs
+
+  Print the paths used for finding libraries and programs.
+
+.. option:: -save-temps
+
+  Save intermediate compilation results.
+
+.. option:: -integrated-as, -no-integrated-as
+
+  Used to enable and disable, respectively, the use of the integrated
+  assembler. Whether the integrated assembler is on by default is target
+  dependent.
+
+.. option:: -time
+
+  Time individual commands.
+
+.. option:: -ftime-report
+
+  Print timing summary of each stage of compilation.
+
+.. option:: -v
+
+  Show commands to run and use verbose output.
+
+
+Diagnostics Options
+~~~~~~~~~~~~~~~~~~~
+
+.. option:: -fshow-column, -fshow-source-location, -fcaret-diagnostics, -fdiagnostics-fixit-info, -fdiagnostics-parseable-fixits, -fdiagnostics-print-source-range-info, -fprint-source-range-info, -fdiagnostics-show-option, -fmessage-length
+
+  These options control how Clang prints out information about diagnostics
+  (errors and warnings). Please see the Clang User's Manual for more information.
+
+Preprocessor Options
+~~~~~~~~~~~~~~~~~~~~
+
+.. option:: -D<macroname>=<value>
+
+  Adds an implicit #define into the predefines buffer which is read before the
+  source file is preprocessed.
+
+.. option:: -U<macroname>
+
+  Adds an implicit #undef into the predefines buffer which is read before the
+  source file is preprocessed.
+
+.. option:: -include <filename>
+
+  Adds an implicit #include into the predefines buffer which is read before the
+  source file is preprocessed.
+
+.. option:: -I<directory>
+
+  Add the specified directory to the search path for include files.
+
+.. option:: -F<directory>
+
+  Add the specified directory to the search path for framework include files.
+
+.. option:: -nostdinc
+
+  Do not search the standard system directories or compiler builtin directories
+  for include files.
+
+.. option:: -nostdlibinc
+
+  Do not search the standard system directories for include files, but do
+  search compiler builtin include directories.
+
+.. option:: -nobuiltininc
+
+  Do not search clang's builtin directory for include files.
+
+
+ENVIRONMENT
+-----------
+
+.. envvar:: TMPDIR, TEMP, TMP
+
+  These environment variables are checked, in order, for the location to write
+  temporary files used during the compilation process.
+
+.. envvar:: CPATH
+
+  If this environment variable is present, it is treated as a delimited list of
+  paths to be added to the default system include path list. The delimiter is
+  the platform dependent delimiter, as used in the PATH environment variable.
+
+  Empty components in the environment variable are ignored.
+
+.. envvar:: C_INCLUDE_PATH, OBJC_INCLUDE_PATH, CPLUS_INCLUDE_PATH, OBJCPLUS_INCLUDE_PATH
+
+  These environment variables specify additional paths, as for :envvar:`CPATH`, which are
+  only used when processing the appropriate language.
+
+.. envvar:: MACOSX_DEPLOYMENT_TARGET
+
+  If :option:`-mmacosx-version-min` is unspecified, the default deployment
+  target is read from this environment variable. This option only affects
+  Darwin targets.
+
+BUGS
+----
+
+To report bugs, please visit <http://llvm.org/bugs/>.  Most bug reports should
+include preprocessed source files (use the :option:`-E` option) and the full
+output of the compiler, along with information to reproduce.
+
+SEE ALSO
+--------
+
+:manpage:`as(1)`, :manpage:`ld(1)`
+
diff --git a/docs/CommandGuide/index.rst b/docs/CommandGuide/index.rst
new file mode 100644
index 0000000..826ed97
--- /dev/null
+++ b/docs/CommandGuide/index.rst
@@ -0,0 +1,17 @@
+Clang "man" pages
+-----------------
+
+The following documents are command descriptions for all of the Clang tools.
+These pages describe how to use the Clang commands and what their options are.
+Note that these pages do not describe all of the options available for all
+tools. To get a complete listing, pass the ``--help`` (general options) or
+``--help-hidden`` (general and debugging options) arguments to the tool you are
+interested in.
+
+Basic Commands
+~~~~~~~~~~~~~~
+
+.. toctree::
+   :maxdepth: 1
+
+   clang
diff --git a/docs/DriverInternals.rst b/docs/DriverInternals.rst
index 4cd2e5d..6bc5f7d 100644
--- a/docs/DriverInternals.rst
+++ b/docs/DriverInternals.rst
@@ -155,7 +155,7 @@ The driver functionality is conceptually divided into five stages:
    Subsequent stages should rarely, if ever, need to do any string
    processing.
 
-#. **Pipeline: Compilation Job Construction**
+#. **Pipeline: Compilation Action Construction**
 
    Once the arguments are parsed, the tree of subprocess jobs needed for
    the desired compilation sequence are constructed. This involves
@@ -266,7 +266,7 @@ The driver functionality is conceptually divided into five stages:
 #. **Translate: Tool Specific Argument Translation**
 
    Once a Tool has been selected to perform a particular Action, the
-   Tool must construct concrete Jobs which will be executed during
+   Tool must construct concrete Commands which will be executed during
    compilation. The main work is in translating from the gcc style
    command line options to whatever options the subprocess expects.
 
@@ -280,7 +280,7 @@ The driver functionality is conceptually divided into five stages:
    last of arguments corresponding to some option, or all arguments for
    an option.
 
-   The result of this stage is a list of Jobs (executable paths and
+   The result of this stage is a list of Commands (executable paths and
    argument strings) to execute.
 
 #. **Execute**
diff --git a/docs/LeakSanitizer.rst b/docs/LeakSanitizer.rst
index b1071ef..d4b4fa0 100644
--- a/docs/LeakSanitizer.rst
+++ b/docs/LeakSanitizer.rst
@@ -17,7 +17,8 @@ only, at a minimal performance cost.
 Current status
 ==============
 
-LeakSanitizer is experimental and supported only on x86\_64 Linux.
+LeakSanitizer is turned on by default, but it is only supported on x86\_64
+Linux.
 
 The combined mode has been tested on fairly large software projects. The
 stand-alone mode has received much less testing.
diff --git a/docs/LibASTMatchersReference.html b/docs/LibASTMatchersReference.html
index 74bbf9e..a555bb3 100644
--- a/docs/LibASTMatchersReference.html
+++ b/docs/LibASTMatchersReference.html
@@ -1434,6 +1434,16 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Charac
            Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FloatingLiteral.html">FloatingLiteral</a>&gt;, Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1IntegerLiteral.html">IntegerLiteral</a>&gt;
 </pre></td></tr>
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXCatchStmt.html">CXXCatchStmt</a>&gt;</td><td class="name" onclick="toggle('isCatchAll1')"><a name="isCatchAll1Anchor">isCatchAll</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isCatchAll1"><pre>Matches a C++ catch statement that has a handler that catches any exception type.
+
+Example matches catch(...) (matcher = catchStmt(isCatchAll()))
+  try {
+    // ...
+  } catch(...) {
+  }
+</pre></td></tr>
+
 
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;</td><td class="name" onclick="toggle('argumentCountIs1')"><a name="argumentCountIs1Anchor">argumentCountIs</a></td><td>unsigned N</td></tr>
 <tr><td colspan="4" class="doc" id="argumentCountIs1"><pre>Checks that a call expression or a constructor call expression has
diff --git a/docs/Makefile b/docs/Makefile
index 96978d8..a409cf6 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -8,7 +8,6 @@
 ##===----------------------------------------------------------------------===##
 
 CLANG_LEVEL := ..
-DIRS       := tools
 
 ifdef BUILD_FOR_WEBSITE
 PROJ_OBJ_DIR = .
diff --git a/docs/ObjectiveCLiterals.rst b/docs/ObjectiveCLiterals.rst
index 8907c1e..9fe7f66 100644
--- a/docs/ObjectiveCLiterals.rst
+++ b/docs/ObjectiveCLiterals.rst
@@ -119,8 +119,8 @@ Objective-C provides a new syntax for boxing C expressions:
 
     @( <expression> )
 
-Expressions of scalar (numeric, enumerated, BOOL) and C string pointer
-types are supported:
+Expressions of scalar (numeric, enumerated, BOOL), C string pointer
+and some C structures (via NSValue) are supported:
 
 .. code-block:: objc
 
@@ -136,6 +136,12 @@ types are supported:
     NSString *path = @(getenv("PATH"));       // [NSString stringWithUTF8String:(getenv("PATH"))]
     NSArray *pathComponents = [path componentsSeparatedByString:@":"];
 
+    // structs.
+    NSValue *center = @(view.center);         // Point p = view.center;
+                                              // [NSValue valueWithBytes:&p objCType:@encode(Point)];
+    NSValue *frame = @(view.frame);           // Rect r = view.frame;
+                                              // [NSValue valueWithBytes:&r objCType:@encode(Rect)];
+
 Boxed Enums
 -----------
 
@@ -218,6 +224,42 @@ character data is valid. Passing ``NULL`` as the character pointer will
 raise an exception at runtime. When possible, the compiler will reject
 ``NULL`` character pointers used in boxed expressions.
 
+Boxed C Structures
+------------------
+
+Boxed expressions support construction of NSValue objects.
+It said that C structures can be used, the only requirement is:
+structure should be marked with ``objc_boxable`` attribute.
+To support older version of frameworks and/or third-party libraries
+you may need to add the attribute via ``typedef``.
+
+.. code-block:: objc
+
+    struct __attribute__((objc_boxable)) Point {
+        // ...
+    };
+
+    typedef struct __attribute__((objc_boxable)) _Size {
+        // ...
+    } Size;
+
+    typedef struct _Rect {
+        // ...
+    } Rect;
+
+    struct Point p;
+    NSValue *point = @(p);          // ok
+    Size s;
+    NSValue *size = @(s);           // ok
+
+    Rect r;
+    NSValue *bad_rect = @(r);       // error
+
+    typedef struct __attribute__((objc_boxable)) _Rect Rect;
+
+    NSValue *good_rect = @(r);      // ok
+
+
 Container Literals
 ==================
 
@@ -539,6 +581,22 @@ checks. Here are examples of their use:
         }
     #endif
 
+    #if __has_attribute(objc_boxable)
+        typedef struct __attribute__((objc_boxable)) _Rect Rect;
+    #endif
+
+    #if __has_feature(objc_boxed_nsvalue_expressions)
+        CABasicAnimation animation = [CABasicAnimation animationWithKeyPath:@"position"];
+        animation.fromValue = @(layer.position);
+        animation.toValue = @(newPosition);
+        [layer addAnimation:animation forKey:@"move"];
+    #else
+        CABasicAnimation animation = [CABasicAnimation animationWithKeyPath:@"position"];
+        animation.fromValue = [NSValue valueWithCGPoint:layer.position];
+        animation.toValue = [NSValue valueWithCGPoint:newPosition];
+        [layer addAnimation:animation forKey:@"move"];
+    #endif
+
 Code can use also ``__has_feature(objc_bool)`` to check for the
 availability of numeric literals support. This checks for the new
 ``__objc_yes / __objc_no`` keywords, which enable the use of
diff --git a/docs/SafeStack.rst b/docs/SafeStack.rst
index 5115d95..21e9b6c 100644
--- a/docs/SafeStack.rst
+++ b/docs/SafeStack.rst
@@ -15,14 +15,17 @@ the safe stack and the unsafe stack. The safe stack stores return addresses,
 register spills, and local variables that are always accessed in a safe way,
 while the unsafe stack stores everything else. This separation ensures that
 buffer overflows on the unsafe stack cannot be used to overwrite anything
-on the safe stack, which includes return addresses.
+on the safe stack.
+
+SafeStack is a part of the `Code-Pointer Integrity (CPI) Project
+<http://dslab.epfl.ch/proj/cpi/>`_.
 
 Performance
 -----------
 
 The performance overhead of the SafeStack instrumentation is less than 0.1% on
 average across a variety of benchmarks (see the `Code-Pointer Integrity
-<http://dslab.epfl.ch/pubs/cpi.pdf>`_ paper for details). This is mainly
+<http://dslab.epfl.ch/pubs/cpi.pdf>`__ paper for details). This is mainly
 because most small functions do not have any variables that require the unsafe
 stack and, hence, do not need unsafe stack frames to be created. The cost of
 creating unsafe stack frames for large functions is amortized by the cost of
@@ -34,37 +37,6 @@ used through multiple stack frames. Moving such objects away from the safe
 stack increases the locality of frequently accessed values on the stack, such
 as register spills, return addresses, and small local variables.
 
-Limitations
------------
-
-SafeStack has not been subjected to a comprehensive security review, and there
-exist known weaknesses, including but not limited to the following.
-
-In its current state, the separation of local variables provides protection
-against stack buffer overflows, but the safe stack itself is not protected
-from being corrupted through a pointer dereference. The Code-Pointer
-Integrity paper describes two ways in which we may protect the safe stack:
-hardware segmentation on the 32-bit x86 architecture or information hiding
-on other architectures.
-
-Even with information hiding, the safe stack would merely be hidden
-from attackers by being somewhere in the address space. Depending on the
-application, the address could be predictable even on 64-bit address spaces
-because not all the bits are addressable, multiple threads each have their
-stack, the application could leak the safe stack address to memory via
-``__builtin_frame_address``, bugs in the low-level runtime support etc.
-Safe stack leaks could be mitigated by writing and deploying a static binary
-analysis or a dynamic binary instrumentation based tool to find leaks.
-
-This approach doesn't prevent an attacker from "imbalancing" the safe
-stack by say having just one call, and doing two rets (thereby returning
-to an address that wasn't meant as a return address). This can be at least
-partially mitigated by deploying SafeStack alongside a forward control-flow
-integrity mechanism to ensure that calls are made using the correct calling
-convention. Clang does not currently implement a comprehensive forward
-control-flow integrity protection scheme; there exists one that protects
-:doc:`virtual calls <ControlFlowIntegrity>` but not non-virtual indirect calls.
-
 Compatibility
 -------------
 
@@ -83,21 +55,73 @@ work with SafeStack. One example is mark-and-sweep garbage collection
 implementations for C/C++ (e.g., Oilpan in chromium/blink), which must be
 changed to look for the live pointers on both safe and unsafe stacks.
 
-SafeStack supports linking together modules that are compiled with and without
-SafeStack, both statically and dynamically. One corner case that is not
-supported is using ``dlopen()`` to load a dynamic library that uses SafeStack into
-a program that is not compiled with SafeStack but uses threads.
+SafeStack supports linking statically modules that are compiled with and
+without SafeStack. An executable compiled with SafeStack can load dynamic
+libraries that are not compiled with SafeStack. At the moment, compiling
+dynamic libraries with SafeStack is not supported.
 
 Signal handlers that use ``sigaltstack()`` must not use the unsafe stack (see
 ``__attribute__((no_sanitize("safe-stack")))`` below).
 
 Programs that use APIs from ``ucontext.h`` are not supported yet.
 
+Security
+--------
+
+SafeStack protects return addresses, spilled registers and local variables that
+are always accessed in a safe way by separating them in a dedicated safe stack
+region. The safe stack is automatically protected against stack-based buffer
+overflows, since it is disjoint from the unsafe stack in memory, and it itself
+is always accessed in a safe way. In the current implementation, the safe stack
+is protected against arbitrary memory write vulnerabilities though
+randomization and information hiding: the safe stack is allocated at a random
+address and the instrumentation ensures that no pointers to the safe stack are
+ever stored outside of the safe stack itself (see limitations below).
+
+Known security limitations
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A complete protection against control-flow hijack attacks requires combining
+SafeStack with another mechanism that enforces the integrity of code pointers
+that are stored on the heap or the unsafe stack, such as `CPI
+<http://dslab.epfl.ch/proj/cpi/>`_, or a forward-edge control flow integrity
+mechanism that enforces correct calling conventions at indirect call sites,
+such as `IFCC <http://research.google.com/pubs/archive/42808.pdf>`_ with arity
+checks. Clang has control-flow integrity protection scheme for :doc:`C++ virtual
+calls <ControlFlowIntegrity>`, but not non-virtual indirect calls. With
+SafeStack alone, an attacker can overwrite a function pointer on the heap or
+the unsafe stack and cause a program to call arbitrary location, which in turn
+might enable stack pivoting and return-oriented programming.
+
+In its current implementation, SafeStack provides precise protection against
+stack-based buffer overflows, but protection against arbitrary memory write
+vulnerabilities is probabilistic and relies on randomization and information
+hiding. The randomization is currently based on system-enforced ASLR and shares
+its known security limitations. The safe stack pointer hiding is not perfect
+yet either: system library functions such as ``swapcontext``, exception
+handling mechanisms, intrinsics such as ``__builtin_frame_address``, or
+low-level bugs in runtime support could leak the safe stack pointer. In the
+future, such leaks could be detected by static or dynamic analysis tools and
+prevented by adjusting such functions to either encrypt the stack pointer when
+storing it in the heap (as already done e.g., by ``setjmp``/``longjmp``
+implementation in glibc), or store it in a safe region instead.
+
+The `CPI paper <http://dslab.epfl.ch/pubs/cpi.pdf>`_ describes two alternative,
+stronger safe stack protection mechanisms, that rely on software fault
+isolation, or hardware segmentation (as available on x86-32 and some x86-64
+CPUs).
+
+At the moment, SafeStack assumes that the compiler's implementation is correct.
+This has not been verified except through manual code inspection, and could
+always regress in the future. It's therefore desirable to have a separate
+static or dynamic binary verification tool that would check the correctness of
+the SafeStack instrumentation in final binaries.
+
 Usage
 =====
 
-To enable SafeStack, just pass ``-fsanitize=safe-stack`` flag to both compile and link
-command lines.
+To enable SafeStack, just pass ``-fsanitize=safe-stack`` flag to both compile
+and link command lines.
 
 Supported Platforms
 -------------------
@@ -129,10 +153,11 @@ function, even if enabled globally (see ``-fsanitize=safe-stack`` flag). This
 attribute may be required for functions that make assumptions about the
 exact layout of their stack frames.
 
-Care should be taken when using this attribute. The return address is not
-protected against stack buffer overflows, and it is easier to leak the
-address of the safe stack to memory by taking the address of a local variable.
-
+All local variables in functions with this attribute will be stored on the safe
+stack. The safe stack remains unprotected against memory errors when accessing
+these variables, so extra care must be taken to manually ensure that all such
+accesses are safe. Furthermore, the addresses of such local variables should
+never be stored on the heap, as it would leak the location of the SafeStack.
 
 ``__builtin___get_unsafe_stack_ptr()``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -149,15 +174,14 @@ current thread.
 Design
 ======
 
-Please refer to
-`http://dslab.epfl.ch/proj/cpi/ <http://dslab.epfl.ch/proj/cpi/>`_ for more
-information about the design of the SafeStack and its related technologies.
-
+Please refer to the `Code-Pointer Integrity <http://dslab.epfl.ch/proj/cpi/>`__
+project page for more information about the design of the SafeStack and its
+related technologies.
 
 Publications
 ------------
 
-`Code-Pointer Integrity <http://dslab.epfl.ch/pubs/cpi.pdf>`_.
+`Code-Pointer Integrity <http://dslab.epfl.ch/pubs/cpi.pdf>`__.
 Volodymyr Kuznetsov, Laszlo Szekeres, Mathias Payer, George Candea, R. Sekar, Dawn Song.
 USENIX Symposium on Operating Systems Design and Implementation
 (`OSDI <https://www.usenix.org/conference/osdi14>`_), Broomfield, CO, October 2014
diff --git a/docs/conf.py b/docs/conf.py
index 9030c2c..b7ed23a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -41,7 +41,7 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'Clang'
-copyright = u'2007-2014, The Clang Team'
+copyright = u'2007-2015, The Clang Team'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -212,10 +212,40 @@ latex_documents = [
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    ('index', 'clang', u'Clang Documentation',
-     [u'The Clang Team'], 1)
-]
+man_pages = []
+
+# Automatically derive the list of man pages from the contents of the command
+# guide subdirectory. This was copied from llvm/docs/conf.py.
+basedir = os.path.dirname(__file__)
+man_page_authors = u'Maintained by the Clang / LLVM Team (<http://clang.llvm.org>)'
+command_guide_subpath = 'CommandGuide'
+command_guide_path = os.path.join(basedir, command_guide_subpath)
+for name in os.listdir(command_guide_path):
+    # Ignore non-ReST files and the index page.
+    if not name.endswith('.rst') or name in ('index.rst',):
+        continue
+
+    # Otherwise, automatically extract the description.
+    file_subpath = os.path.join(command_guide_subpath, name)
+    with open(os.path.join(command_guide_path, name)) as f:
+        title = f.readline().rstrip('\n')
+        header = f.readline().rstrip('\n')
+
+        if len(header) != len(title):
+            print >>sys.stderr, (
+                "error: invalid header in %r (does not match title)" % (
+                    file_subpath,))
+        if ' - ' not in title:
+            print >>sys.stderr, (
+                ("error: invalid title in %r "
+                 "(expected '<name> - <description>')") % (
+                    file_subpath,))
+
+        # Split the name out of the title.
+        name,description = title.split(' - ', 1)
+        man_pages.append((file_subpath.replace('.rst',''), name,
+                          description, man_page_authors, 1))
+
 
 # If true, show URL addresses after external links.
 #man_show_urls = False
diff --git a/docs/index.rst b/docs/index.rst
index dec2bc8..d50667d 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -32,6 +32,7 @@ Using Clang as a Compiler
    SafeStack
    Modules
    MSVCCompatibility
+   CommandGuide/index
    FAQ
 
 Using Clang as a Library
diff --git a/docs/tools/Makefile b/docs/tools/Makefile
deleted file mode 100644
index 5521d6b..0000000
--- a/docs/tools/Makefile
+++ /dev/null
@@ -1,116 +0,0 @@
-##===- docs/tools/Makefile ---------------------------------*- Makefile -*-===##
-# 
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-# 
-##===----------------------------------------------------------------------===##
-
-ifdef BUILD_FOR_WEBSITE
-
-# FIXME: This was copied from the CommandGuide makefile. Figure out
-# how to get this stuff on the website.
-
-# This special case is for keeping the CommandGuide on the LLVM web site
-# up to date automatically as the documents are checked in. It must build
-# the POD files to HTML only and keep them in the src directories. It must also
-# build in an unconfigured tree, hence the ifdef. To use this, run
-# make -s BUILD_FOR_WEBSITE=1 inside the cvs commit script.
-SRC_DOC_DIR=
-DST_HTML_DIR=html/
-DST_MAN_DIR=man/man1/
-DST_PS_DIR=ps/
-CLANG_VERSION := trunk
-
-# If we are in BUILD_FOR_WEBSITE mode, default to the all target.
-all:: html man ps
-
-clean:
-	rm -f pod2htm*.*~~ $(HTML) $(MAN) $(PS)
-
-# To create other directories, as needed, and timestamp their creation
-%/.dir:
-	-mkdir $* > /dev/null
-	date > $@
-
-else
-
-# Otherwise, if not in BUILD_FOR_WEBSITE mode, use the project info.
-CLANG_LEVEL := ../..
-include $(CLANG_LEVEL)/Makefile
-
-CLANG_VERSION := $(word 3,$(shell grep "CLANG_VERSION " \
-	$(PROJ_OBJ_DIR)/$(CLANG_LEVEL)/include/clang/Basic/Version.inc))
-
-SRC_DOC_DIR=$(PROJ_SRC_DIR)/
-DST_HTML_DIR=$(PROJ_OBJ_DIR)/
-DST_MAN_DIR=$(PROJ_OBJ_DIR)/
-DST_PS_DIR=$(PROJ_OBJ_DIR)/
-
-endif
-
-
-POD  := $(wildcard $(SRC_DOC_DIR)*.pod)
-HTML := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_HTML_DIR)%.html, $(POD))
-MAN  := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_MAN_DIR)%.1, $(POD))
-PS   := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_PS_DIR)%.ps, $(POD))
-
-ifdef ONLY_MAN_DOCS
-INSTALL_TARGETS := install-man
-else
-INSTALL_TARGETS := install-html install-man install-ps
-endif
-
-.SUFFIXES:
-.SUFFIXES: .html .pod .1 .ps
-
-$(DST_HTML_DIR)%.html: %.pod $(DST_HTML_DIR)/.dir
-	pod2html --css=manpage.css --htmlroot=. \
-	  --podpath=. --infile=$< --outfile=$@ --title=$*
-
-$(DST_MAN_DIR)%.1: %.pod $(DST_MAN_DIR)/.dir
-	pod2man --release "clang $(CLANG_VERSION)" --center="Clang Tools Documentation" $< $@
-
-$(DST_PS_DIR)%.ps: $(DST_MAN_DIR)%.1 $(DST_PS_DIR)/.dir
-	groff -Tps -man $< > $@
-
-
-html: $(HTML)
-man: $(MAN)
-ps: $(PS)
-
-EXTRA_DIST := $(POD)
-
-clean-local::
-	$(Verb) $(RM) -f pod2htm*.*~~ $(HTML) $(MAN) $(PS)
-
-HTML_DIR := $(DESTDIR)$(PROJ_docsdir)/html/clang
-MAN_DIR  := $(DESTDIR)$(PROJ_mandir)/man1
-PS_DIR   := $(DESTDIR)$(PROJ_docsdir)/ps
-
-install-html:: $(HTML)
-	$(Echo) Installing HTML Clang Tools Documentation
-	$(Verb) $(MKDIR) $(HTML_DIR)
-	$(Verb) $(DataInstall) $(HTML) $(HTML_DIR)
-	$(Verb) $(DataInstall) $(PROJ_SRC_DIR)/manpage.css $(HTML_DIR)
-
-install-man:: $(MAN)
-	$(Echo) Installing MAN Clang Tools Documentation
-	$(Verb) $(MKDIR) $(MAN_DIR)
-	$(Verb) $(DataInstall) $(MAN) $(MAN_DIR)
-
-install-ps:: $(PS)
-	$(Echo) Installing PS Clang Tools Documentation
-	$(Verb) $(MKDIR) $(PS_DIR)
-	$(Verb) $(DataInstall) $(PS) $(PS_DIR)
-
-install-local:: $(INSTALL_TARGETS)
-
-uninstall-local::
-	$(Echo) Uninstalling Clang Tools Documentation
-	$(Verb) $(RM) -rf $(HTML_DIR) $(MAN_DIR) $(PS_DIR)
-
-printvars::
-	$(Echo) "POD            : " '$(POD)'
-	$(Echo) "HTML           : " '$(HTML)'
diff --git a/docs/tools/clang.pod b/docs/tools/clang.pod
deleted file mode 100644
index 153f97b..0000000
--- a/docs/tools/clang.pod
+++ /dev/null
@@ -1,614 +0,0 @@
-=pod
-
-=head1 NAME
-
-clang - the Clang C, C++, and Objective-C compiler
-
-=head1 SYNOPSIS
-
-B<clang> [B<-c>|B<-S>|B<-E>] B<-std=>I<standard> B<-g>
-  [B<-O0>|B<-O1>|B<-O2>|B<-O3>|B<-Ofast>|B<-Os>|B<-Oz>|B<-O>|B<-O4>]
-  B<-W>I<warnings...> B<-pedantic>
-  B<-I>I<dir...> B<-L>I<dir...>
-  B<-D>I<macro[=defn]>
-  B<-f>I<feature-option...>
-  B<-m>I<machine-option...>
-  B<-o> I<output-file>
-  B<-stdlib=>I<library> 
-  I<input-filenames>
-
-=head1 DESCRIPTION
-
-B<clang> is a C, C++, and Objective-C compiler which encompasses preprocessing,
-parsing, optimization, code generation, assembly, and linking.  Depending on
-which high-level mode setting is passed, Clang will stop before doing a full
-link.  While Clang is highly integrated, it is important to understand the
-stages of compilation, to understand how to invoke it.  These stages are:
-
-=over
-
-=item B<Driver>
-
-The B<clang> executable is actually a small driver which controls the overall
-execution of other tools such as the compiler, assembler and linker.  Typically
-you do not need to interact with the driver, but you transparently use it to run
-the other tools.
-
-=item B<Preprocessing>
-
-This stage handles tokenization of the input source file, macro expansion,
-#include expansion and handling of other preprocessor directives.  The output of
-this stage is typically called a ".i" (for C), ".ii" (for C++), ".mi" (for 
-Objective-C), or ".mii" (for Objective-C++) file.
-
-=item B<Parsing and Semantic Analysis>
-
-This stage parses the input file, translating preprocessor tokens into a parse
-tree.  Once in the form of a parse tree, it applies semantic analysis to compute
-types for expressions as well and determine whether the code is well formed. This
-stage is responsible for generating most of the compiler warnings as well as
-parse errors.  The output of this stage is an "Abstract Syntax Tree" (AST).
-
-=item B<Code Generation and Optimization>
-
-This stage translates an AST into low-level intermediate code (known as "LLVM
-IR") and ultimately to machine code.  This phase is responsible for optimizing
-the generated code and handling target-specific code generation.  The output of
-this stage is typically called a ".s" file or "assembly" file.
-
-Clang also supports the use of an integrated assembler, in which the code
-generator produces object files directly. This avoids the overhead of generating
-the ".s" file and of calling the target assembler.
-
-=item B<Assembler>
-
-This stage runs the target assembler to translate the output of the compiler
-into a target object file.  The output of this stage is typically called a ".o"
-file or "object" file.
-
-=item B<Linker>
-
-This stage runs the target linker to merge multiple object files into an
-executable or dynamic library.  The output of this stage is typically called an
-"a.out", ".dylib" or ".so" file.
-
-=back
-
-The Clang compiler supports a large number of options to control each of these
-stages.  In addition to compilation of code, Clang also supports other tools:
-
-B<Clang Static Analyzer>
-
-The Clang Static Analyzer is a tool that scans source code to try to find bugs
-through code analysis.  This tool uses many parts of Clang and is built into the
-same driver.  Please see L<http://clang-analyzer.llvm.org> for more details
-on how to use the static analyzer.
-
-
-=head1 OPTIONS
-
-=head2 Stage Selection Options
-
-=over
-
-=item B<-E>
-
-Run the preprocessor stage.
-
-=item B<-fsyntax-only>
-
-Run the preprocessor, parser and type checking stages.
-
-=item B<-S>
-
-Run the previous stages as well as LLVM generation and optimization stages and
-target-specific code generation, producing an assembly file.
-
-=item B<-c>
-
-Run all of the above, plus the assembler, generating a target ".o" object file.
-
-=item B<no stage selection option>
-
-If no stage selection option is specified, all stages above are run, and the
-linker is run to combine the results into an executable or shared library.
-
-=back
-
-
-
-=head2 Language Selection and Mode Options
-
-=over
-
-=item B<-x> I<language>
-
-Treat subsequent input files as having type I<language>.
-
-=item B<-std>=I<language>
-
-Specify the language standard to compile for.
-
-=item B<-stdlib>=I<library>
-
-Specify the C++ standard library to use; supported options are libstdc++ and
-libc++.
-
-=item B<-ansi>
-
-Same as B<-std=c89>.
-
-=item B<-ObjC++>
-
-Treat source input files as Objective-C++ inputs.
-
-=item B<-ObjC>
-
-Treat source input files as Objective-C inputs.
-
-=item B<-trigraphs>
-
-Enable trigraphs.
-
-=item B<-ffreestanding>
-
-Indicate that the file should be compiled for a freestanding, not a hosted,
-environment.
-
-=item B<-fno-builtin>
-
-Disable special handling and optimizations of builtin functions like strlen and
-malloc.
-
-=item B<-fmath-errno>
-
-Indicate that math functions should be treated as updating errno.
-
-=item B<-fpascal-strings>
-
-Enable support for Pascal-style strings with "\pfoo".
-
-=item B<-fms-extensions>
-
-Enable support for Microsoft extensions.
-
-=item B<-fmsc-version=>
-
-Set _MSC_VER. Defaults to 1300 on Windows. Not set otherwise.
-
-=item B<-fborland-extensions>
-
-Enable support for Borland extensions.
-
-=item B<-fwritable-strings>
-
-Make all string literals default to writable.  This disables uniquing of
-strings and other optimizations.
-
-=item B<-flax-vector-conversions>
-
-Allow loose type checking rules for implicit vector conversions.
-
-=item B<-fblocks>
-
-Enable the "Blocks" language feature.
-
-=item B<-fobjc-gc-only>
-
-Indicate that Objective-C code should be compiled in GC-only mode, which only
-works when Objective-C Garbage Collection is enabled.
-
-=item B<-fobjc-gc>
-
-Indicate that Objective-C code should be compiled in hybrid-GC mode, which works
-with both GC and non-GC mode.
-
-=item B<-fobjc-abi-version>=I<version>
-
-Select the Objective-C ABI version to use. Available versions are 1 (legacy
-"fragile" ABI), 2 (non-fragile ABI 1), and 3 (non-fragile ABI 2).
-
-=item B<-fobjc-nonfragile-abi-version>=I<version>
-
-Select the Objective-C non-fragile ABI version to use by default. This will only
-be used as the Objective-C ABI when the non-fragile ABI is enabled (either via
--fobjc-nonfragile-abi, or because it is the platform default).
-
-=item B<-fobjc-nonfragile-abi>
-
-Enable use of the Objective-C non-fragile ABI. On platforms for which this is
-the default ABI, it can be disabled with B<-fno-objc-nonfragile-abi>.
-
-=back
-
-
-
-=head2 Target Selection Options
-
-Clang fully supports cross compilation as an inherent part of its design.
-Depending on how your version of Clang is configured, it may have support for
-a number of cross compilers, or may only support a native target.
-
-=over
-
-=item B<-arch> I<architecture>
-
-Specify the architecture to build for.
-
-=item B<-mmacosx-version-min>=I<version>
-
-When building for Mac OS X, specify the minimum version supported by your
-application.
-
-=item B<-miphoneos-version-min>
-
-When building for iPhone OS, specify the minimum version supported by your
-application.
-
-
-=item B<-march>=I<cpu>
-
-Specify that Clang should generate code for a specific processor family member
-and later.  For example, if you specify -march=i486, the compiler is allowed to
-generate instructions that are valid on i486 and later processors, but which
-may not exist on earlier ones.
-
-=back
-
-
-=head2 Code Generation Options
-
-=over
-
-=item B<-O0> B<-O1> B<-O2> B<-O3> B<-Ofast> B<-Os> B<-Oz> B<-O> B<-O4>
-
-Specify which optimization level to use:
-
-=over
-
-=item B<-O0>
-
-Means "no optimization": this level compiles the fastest and
-generates the most debuggable code.
-
-=item B<-O1>
-
-Somewhere between B<-O0> and B<-O2>.
-
-=item B<-O2>
-
-Moderate level of optimization which enables most optimizations.
-
-=item B<-O3>
-
-Like B<-O2>, except that it enables optimizations that take longer to perform
-or that may generate larger code (in an attempt to make the program run faster).
-
-=item B<-Ofast>
-
-Enables all the optimizations from B<-O3> along with other aggressive
-optimizations that may violate strict compliance with language standards.
-
-=item B<-Os>
-
-Like B<-O2> with extra optimizations to reduce code size.
-
-=item B<-Oz>
-
-Like B<-Os> (and thus B<-O2>), but reduces code size further.
-
-=item B<-O>
-
-Equivalent to B<-O2>.
-
-=item B<-O4> and higher
-
-Currently equivalent to B<-O3>
-
-=back
-
-=item B<-g>
-
-Generate debug information.  Note that Clang debug information works best at
-B<-O0>.
-
-=item B<-fstandalone-debug> B<-fno-standalone-debug>
-
-Clang supports a number of optimizations to reduce the size of debug
-information in the binary. They work based on the assumption that the
-debug type information can be spread out over multiple compilation
-units.  For instance, Clang will not emit type definitions for types
-that are not needed by a module and could be replaced with a forward
-declaration.  Further, Clang will only emit type info for a dynamic
-C++ class in the module that contains the vtable for the class.
-
-The B<-fstandalone-debug> option turns off these optimizations.  This
-is useful when working with 3rd-party libraries that don't come with
-debug information.  This is the default on Darwin.  Note that Clang
-will never emit type information for types that are not referenced at
-all by the program.
-
-=item B<-fexceptions>
-
-Enable generation of unwind information. This allows exceptions to be thrown
-through Clang compiled stack frames.  This is on by default in x86-64.
-
-=item B<-ftrapv>
-
-Generate code to catch integer overflow errors.  Signed integer overflow is
-undefined in C. With this flag, extra code is generated to detect this and abort
-when it happens.
-
-
-=item B<-fvisibility>
-
-This flag sets the default visibility level.
-
-=item B<-fcommon>
-
-This flag specifies that variables without initializers get common linkage.  It
-can be disabled with B<-fno-common>.
-
-=item B<-ftls-model>
-
-Set the default thread-local storage (TLS) model to use for thread-local
-variables. Valid values are: "global-dynamic", "local-dynamic", "initial-exec"
-and "local-exec". The default is "global-dynamic". The default model can be
-overridden with the tls_model attribute. The compiler will try to choose a more
-efficient model if possible.
-
-=item B<-flto> B<-emit-llvm>
-
-Generate output files in LLVM formats, suitable for link time optimization. When
-used with B<-S> this generates LLVM intermediate language assembly files,
-otherwise this generates LLVM bitcode format object files (which may be passed
-to the linker depending on the stage selection options).
-
-=cut
-
-##=item B<-fnext-runtime> B<-fobjc-nonfragile-abi> B<-fgnu-runtime>
-##These options specify which Objective-C runtime the code generator should
-##target.  FIXME: we don't want people poking these generally.
-
-=pod
-
-=back
-
-
-=head2 Driver Options
-
-=over
-
-=item B<-###>
-
-Print (but do not run) the commands to run for this compilation.
-
-=item B<--help>
-
-Display available options.
-
-=item B<-Qunused-arguments>
-
-Do not emit any warnings for unused driver arguments.
-
-=item B<-Wa,>I<args>
-
-Pass the comma separated arguments in I<args> to the assembler.
-
-=item B<-Wl,>I<args>
-
-Pass the comma separated arguments in I<args> to the linker.
-
-=item B<-Wp,>I<args>
-
-Pass the comma separated arguments in I<args> to the preprocessor.
-
-=item B<-Xanalyzer> I<arg>
-
-Pass I<arg> to the static analyzer.
-
-=item B<-Xassembler> I<arg>
-
-Pass I<arg> to the assembler.
-
-=item B<-Xlinker> I<arg>
-
-Pass I<arg> to the linker.
-
-=item B<-Xpreprocessor> I<arg>
-
-Pass I<arg> to the preprocessor.
-
-=item B<-o> I<file>
-
-Write output to I<file>.
-
-=item B<-print-file-name>=I<file>
-
-Print the full library path of I<file>.
-
-=item B<-print-libgcc-file-name>
-
-Print the library path for "libgcc.a".
-
-=item B<-print-prog-name>=I<name>
-
-Print the full program path of I<name>.
-
-=item B<-print-search-dirs>
-
-Print the paths used for finding libraries and programs.
-
-=item B<-save-temps>
-
-Save intermediate compilation results.
-
-=item B<-integrated-as> B<-no-integrated-as>
-
-Used to enable and disable, respectively, the use of the integrated
-assembler. Whether the integrated assembler is on by default is target
-dependent.
-
-=item B<-time>
-
-Time individual commands.
-
-=item B<-ftime-report>
-
-Print timing summary of each stage of compilation.
-
-=item B<-v>
-
-Show commands to run and use verbose output.
-
-=back
-
-
-=head2 Diagnostics Options
-
-=over
-
-=item B<-fshow-column>
-B<-fshow-source-location>
-B<-fcaret-diagnostics>
-B<-fdiagnostics-fixit-info>
-B<-fdiagnostics-parseable-fixits>
-B<-fdiagnostics-print-source-range-info>
-B<-fprint-source-range-info>
-B<-fdiagnostics-show-option>
-B<-fmessage-length>
-
-These options control how Clang prints out information about diagnostics (errors
-and warnings).  Please see the Clang User's Manual for more information.
-
-=back
-
-
-=head2 Preprocessor Options
-
-=over
-
-=item B<-D>I<macroname=value>
-
-Adds an implicit #define into the predefines buffer which is read before the
-source file is preprocessed.
-
-=item B<-U>I<macroname>
-
-Adds an implicit #undef into the predefines buffer which is read before the
-source file is preprocessed.
-
-=item B<-include> I<filename>
-
-Adds an implicit #include into the predefines buffer which is read before the
-source file is preprocessed.
-
-=item B<-I>I<directory>
-
-Add the specified directory to the search path for include files.
-
-=item B<-F>I<directory>
-
-Add the specified directory to the search path for framework include files.
-
-=item B<-nostdinc>
-
-Do not search the standard system directories or compiler builtin directories
-for include files.
-
-=item B<-nostdlibinc>
-
-Do not search the standard system directories for include files, but do search
-compiler builtin include directories.
-
-=item B<-nobuiltininc>
-
-Do not search clang's builtin directory for include files.
-
-=cut
-
-## TODO, but do we really want people using this stuff?
-#=item B<-idirafter>I<directory>
-#=item B<-iquote>I<directory>
-#=item B<-isystem>I<directory>
-#=item B<-iprefix>I<directory>
-#=item B<-iwithprefix>I<directory>
-#=item B<-iwithprefixbefore>I<directory>
-#=item B<-isysroot>
-
-=pod
-
-
-=back
-
-
-
-=cut
-
-### TODO someday.
-#=head2 Warning Control Options
-#=over
-#=back
-#=head2 Code Generation and Optimization Options
-#=over
-#=back
-#=head2 Assembler Options
-#=over
-#=back
-#=head2 Linker Options
-#=over
-#=back
-#=head2 Static Analyzer Options
-#=over
-#=back
-
-=pod
-
-
-=head1 ENVIRONMENT
-
-=over
-
-=item B<TMPDIR>, B<TEMP>, B<TMP>
-
-These environment variables are checked, in order, for the location to
-write temporary files used during the compilation process.
-
-=item B<CPATH>
-
-If this environment variable is present, it is treated as a delimited
-list of paths to be added to the default system include path list. The
-delimiter is the platform dependent delimiter, as used in the I<PATH>
-environment variable.
-
-Empty components in the environment variable are ignored.
-
-=item B<C_INCLUDE_PATH>, B<OBJC_INCLUDE_PATH>, B<CPLUS_INCLUDE_PATH>,
-B<OBJCPLUS_INCLUDE_PATH>
-
-These environment variables specify additional paths, as for CPATH,
-which are only used when processing the appropriate language.
-
-=item B<MACOSX_DEPLOYMENT_TARGET>
-
-If -mmacosx-version-min is unspecified, the default deployment target
-is read from this environment variable.  This option only affects Darwin
-targets.
-
-=back
-
-=head1 BUGS
-
-To report bugs, please visit L<http://llvm.org/bugs/>.  Most bug reports should
-include preprocessed source files (use the B<-E> option) and the full output of 
-the compiler, along with information to reproduce.
-
-=head1 SEE ALSO
-
- as(1), ld(1)
-
-=head1 AUTHOR
-
-Maintained by the Clang / LLVM Team (L<http://clang.llvm.org>).
-
-=cut
diff --git a/docs/tools/manpage.css b/docs/tools/manpage.css
deleted file mode 100644
index c922564..0000000
--- a/docs/tools/manpage.css
+++ /dev/null
@@ -1,256 +0,0 @@
-/* Based on http://www.perldoc.com/css/perldoc.css */
-
-@import url("../llvm.css");
-
-body { font-family: Arial,Helvetica; }
-
-blockquote { margin: 10pt;  }
-
-h1, a { color: #336699; }
-
-
-/*** Top menu style ****/
-.mmenuon { 
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #ff6600; font-size: 10pt;
-}
-.mmenuoff { 
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #ffffff; font-size: 10pt;
-}	  
-.cpyright {
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #ffffff; font-size: xx-small;
-}
-.cpyrightText {
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #ffffff; font-size: xx-small;
-}
-.sections { 
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #336699; font-size: 11pt;
-}	 
-.dsections { 
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #336699; font-size: 12pt;
-}	
-.slink { 
- font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
- color: #000000; font-size: 9pt;
-}	 
-
-.slink2 { font-family: Arial,Helvetica; text-decoration: none; color: #336699; }	 
-
-.maintitle { 
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #336699; font-size: 18pt;
-}	 
-.dblArrow {
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #336699; font-size: small;
-}
-.menuSec {
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #336699; font-size: small;
-}
-
-.newstext {
- font-family: Arial,Helvetica; font-size: small;
-}
-
-.linkmenu {
- font-family: Arial,Helvetica; color: #000000; font-weight: bold;
- text-decoration: none;
-}
-
-P {
- font-family: Arial,Helvetica;
-}
-
-PRE {
-    font-size: 10pt;
-}
-.quote { 
- font-family: Times; text-decoration: none;
- color: #000000; font-size: 9pt; font-style: italic;
-}	
-.smstd { font-family: Arial,Helvetica; color: #000000; font-size: x-small; } 
-.std { font-family: Arial,Helvetica; color: #000000; } 
-.meerkatTitle { 
- font-family: sans-serif; font-size: x-small;  color: black;    }
-
-.meerkatDescription { font-family: sans-serif; font-size: 10pt; color: black }
-.meerkatCategory { 
- font-family: sans-serif; font-size: 9pt; font-weight: bold; font-style: italic; 
- color: brown; }
-.meerkatChannel { 
- font-family: sans-serif; font-size: 9pt; font-style: italic; color: brown; }
-.meerkatDate { font-family: sans-serif; font-size: xx-small; color: #336699; }
-
-.tocTitle {
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #333333; font-size: 10pt;
-}
-
-.toc-item {
- font-family: Arial,Helvetica; font-weight: bold; 
- color: #336699; font-size: 10pt; text-decoration: underline;
-}
-
-.perlVersion {
- font-family: Arial,Helvetica; font-weight: bold; 
- color: #336699; font-size: 10pt; text-decoration: none;
-}
-
-.podTitle {
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #000000;
-}
-
-.docTitle {
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #000000; font-size: 10pt;
-}
-.dotDot {
- font-family: Arial,Helvetica; font-weight: bold; 
- color: #000000; font-size: 9pt;
-}
-
-.docSec {
- font-family: Arial,Helvetica; font-weight: normal; 
- color: #333333; font-size: 9pt;
-}
-.docVersion {
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #336699; font-size: 10pt;
-}
-
-.docSecs-on {
- font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
- color: #ff0000; font-size: 10pt;
-}
-.docSecs-off {
- font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
- color: #333333; font-size: 10pt;
-}
-
-h2 {
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #336699; font-size: medium;
-}
-h1 {
- font-family: Verdana,Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #336699; font-size: large;
-}
-
-DL {
- font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
- color: #333333; font-size: 10pt;
-}
-
-UL > LI > A {
- font-family: Arial,Helvetica; font-weight: bold;
- color: #336699; font-size: 10pt;
-}
-
-.moduleInfo {
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #333333; font-size: 11pt;
-}
-
-.moduleInfoSec {
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
- color: #336699; font-size: 10pt;
-}
-
-.moduleInfoVal {
- font-family: Arial,Helvetica; font-weight: normal; text-decoration: underline;
- color: #000000; font-size: 10pt;
-}
-
-.cpanNavTitle {
- font-family: Arial,Helvetica; font-weight: bold; 
- color: #ffffff; font-size: 10pt;
-}
-.cpanNavLetter {
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none; 
- color: #333333; font-size: 9pt;
-}
-.cpanCat {
- font-family: Arial,Helvetica; font-weight: bold; text-decoration: none; 
- color: #336699; font-size: 9pt;
-}
-
-.bttndrkblue-bkgd-top {
-	background-color: #225688;
-	background-image: url(/global/mvc_objects/images/bttndrkblue_bgtop.gif);
-}
-.bttndrkblue-bkgd-left {
-	background-color: #225688;
-	background-image: url(/global/mvc_objects/images/bttndrkblue_bgleft.gif);
-}
-.bttndrkblue-bkgd {
-	padding-top: 0px;
-	padding-bottom: 0px;
-	margin-bottom: 0px;
-	margin-top: 0px;
-	background-repeat: no-repeat;
-	background-color: #225688;
-	background-image: url(/global/mvc_objects/images/bttndrkblue_bgmiddle.gif);
-	vertical-align: top;
-}
-.bttndrkblue-bkgd-right {
-	background-color: #225688;
-	background-image: url(/global/mvc_objects/images/bttndrkblue_bgright.gif);
-}
-.bttndrkblue-bkgd-bottom {
-	background-color: #225688;
-	background-image: url(/global/mvc_objects/images/bttndrkblue_bgbottom.gif);
-}
-.bttndrkblue-text a {
-	color: #ffffff;
-	text-decoration: none;
-}
-a.bttndrkblue-text:hover {
-	color: #ffDD3C;
-	text-decoration: none;
-}
-.bg-ltblue {
-	background-color: #f0f5fa;
-} 
-
-.border-left-b {
-	background: #f0f5fa url(/i/corner-leftline.gif) repeat-y;
-} 
-
-.border-right-b {
-	background: #f0f5fa url(/i/corner-rightline.gif) repeat-y;
-} 
-
-.border-top-b {
-	background: #f0f5fa url(/i/corner-topline.gif) repeat-x;
-} 
-
-.border-bottom-b {
-	background: #f0f5fa url(/i/corner-botline.gif) repeat-x;
-} 
-
-.border-right-w {
-	background: #ffffff url(/i/corner-rightline.gif) repeat-y;
-} 
-
-.border-top-w {
-	background: #ffffff url(/i/corner-topline.gif) repeat-x;
-} 
-
-.border-bottom-w {
-	background: #ffffff url(/i/corner-botline.gif) repeat-x;
-} 
-
-.bg-white {
-	background-color: #ffffff;
-} 
-
-.border-left-w {
-	background: #ffffff url(/i/corner-leftline.gif) repeat-y;
-} 
diff --git a/examples/analyzer-plugin/MainCallChecker.cpp b/examples/analyzer-plugin/MainCallChecker.cpp
index 2ad8c84..a6f69fd 100644
--- a/examples/analyzer-plugin/MainCallChecker.cpp
+++ b/examples/analyzer-plugin/MainCallChecker.cpp
@@ -37,9 +37,10 @@ void MainCallChecker::checkPreStmt(const CallExpr *CE, CheckerContext &C) const
     if (!BT)
       BT.reset(new BugType(this, "call to main", "example analyzer plugin"));
 
-    BugReport *report = new BugReport(*BT, BT->getName(), N);
+    std::unique_ptr<BugReport> report =
+        llvm::make_unique<BugReport>(*BT, BT->getName(), N);
     report->addRange(Callee->getSourceRange());
-    C.emitReport(report);
+    C.emitReport(std::move(report));
   }
 }
 
diff --git a/include/clang-c/Index.h b/include/clang-c/Index.h
index a224180..fad9cfa 100644
--- a/include/clang-c/Index.h
+++ b/include/clang-c/Index.h
@@ -2225,12 +2225,19 @@ enum CXCursorKind {
    */
   CXCursor_OMPTeamsDirective             = 253,
 
-  /** \brief OpenMP taskwait directive.
+  /** \brief OpenMP taskgroup directive.
    */
   CXCursor_OMPTaskgroupDirective          = 254,
 
+  /** \brief OpenMP cancellation point directive.
+   */
+  CXCursor_OMPCancellationPointDirective  = 255,
+
+  /** \brief OpenMP cancel directive.
+   */
+  CXCursor_OMPCancelDirective             = 256,
 
-  CXCursor_LastStmt                      = CXCursor_OMPTaskgroupDirective,
+  CXCursor_LastStmt                    = CXCursor_OMPCancelDirective,
 
   /**
    * \brief Cursor that represents the translation unit itself.
diff --git a/include/clang/AST/ASTContext.h b/include/clang/AST/ASTContext.h
index da288c4..682be3d 100644
--- a/include/clang/AST/ASTContext.h
+++ b/include/clang/AST/ASTContext.h
@@ -1664,6 +1664,9 @@ public:
   TypeInfo getTypeInfo(const Type *T) const;
   TypeInfo getTypeInfo(QualType T) const { return getTypeInfo(T.getTypePtr()); }
 
+  /// \brief Get default simd alignment of the specified complete type in bits.
+  unsigned getOpenMPDefaultSimdAlign(QualType T) const;
+
   /// \brief Return the size of the specified (complete) type \p T, in bits.
   uint64_t getTypeSize(QualType T) const { return getTypeInfo(T).Width; }
   uint64_t getTypeSize(const Type *T) const { return getTypeInfo(T).Width; }
@@ -1780,6 +1783,17 @@ public:
   /// \param method should be the declaration from the class definition
   void setNonKeyFunction(const CXXMethodDecl *method);
 
+  /// Loading virtual member pointers using the virtual inheritance model
+  /// always results in an adjustment using the vbtable even if the index is
+  /// zero.
+  ///
+  /// This is usually OK because the first slot in the vbtable points
+  /// backwards to the top of the MDC.  However, the MDC might be reusing a
+  /// vbptr from an nv-base.  In this case, the first slot in the vbtable
+  /// points to the start of the nv-base which introduced the vbptr and *not*
+  /// the MDC.  Modify the NonVirtualBaseAdjustment to account for this.
+  CharUnits getOffsetOfBaseWithVBPtr(const CXXRecordDecl *RD) const;
+
   /// Get the offset of a FieldDecl or IndirectFieldDecl, in bits.
   uint64_t getFieldOffset(const ValueDecl *FD) const;
 
diff --git a/include/clang/AST/ASTMutationListener.h b/include/clang/AST/ASTMutationListener.h
index 4f3acc3..f4026e9 100644
--- a/include/clang/AST/ASTMutationListener.h
+++ b/include/clang/AST/ASTMutationListener.h
@@ -14,6 +14,7 @@
 #define LLVM_CLANG_AST_ASTMUTATIONLISTENER_H
 
 namespace clang {
+  class Attr;
   class ClassTemplateDecl;
   class ClassTemplateSpecializationDecl;
   class CXXDestructorDecl;
@@ -29,6 +30,7 @@ namespace clang {
   class ObjCInterfaceDecl;
   class ObjCPropertyDecl;
   class QualType;
+  class RecordDecl;
   class TagDecl;
   class VarDecl;
   class VarTemplateDecl;
@@ -119,6 +121,14 @@ public:
   /// \param M The containing module in which the definition was made visible,
   ///        if any.
   virtual void RedefinedHiddenDefinition(const NamedDecl *D, Module *M) {}
+  
+  /// \brief An attribute was added to a RecordDecl
+  ///
+  /// \param Attr The attribute that was added to the Record
+  ///
+  /// \param Record The RecordDecl that got a new attribute
+  virtual void AddedAttributeToRecord(const Attr *Attr, 
+                                      const RecordDecl *Record) {}
 
   // NOTE: If new methods are added they should also be added to
   // MultiplexASTMutationListener.
diff --git a/include/clang/AST/DataRecursiveASTVisitor.h b/include/clang/AST/DataRecursiveASTVisitor.h
index ef88176..923d98f 100644
--- a/include/clang/AST/DataRecursiveASTVisitor.h
+++ b/include/clang/AST/DataRecursiveASTVisitor.h
@@ -1861,8 +1861,8 @@ DEF_TRAVERSE_DECL(ParmVarDecl, {
     TRY_TO(WalkUpFrom##STMT(S));                                               \
     StmtQueueAction StmtQueue(*this);                                          \
     { CODE; }                                                                  \
-    for (Stmt::child_range range = S->children(); range; ++range) {            \
-      StmtQueue.queue(*range);                                                 \
+    for (Stmt *SubStmt : S->children()) {                                      \
+      StmtQueue.queue(SubStmt);                                                \
     }                                                                          \
     return true;                                                               \
   }
@@ -2011,8 +2011,8 @@ bool RecursiveASTVisitor<Derived>::TraverseInitListExpr(InitListExpr *S) {
   TRY_TO(WalkUpFromInitListExpr(S));
   StmtQueueAction StmtQueue(*this);
   // All we need are the default actions.  FIXME: use a helper function.
-  for (Stmt::child_range range = S->children(); range; ++range) {
-    StmtQueue.queue(*range);
+  for (Stmt *SubStmt : S->children()) {
+    StmtQueue.queue(SubStmt);
   }
   return true;
 }
@@ -2358,6 +2358,12 @@ DEF_TRAVERSE_STMT(OMPTaskwaitDirective,
 DEF_TRAVERSE_STMT(OMPTaskgroupDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPCancellationPointDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
+DEF_TRAVERSE_STMT(OMPCancelDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 DEF_TRAVERSE_STMT(OMPFlushDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
@@ -2622,6 +2628,12 @@ bool RecursiveASTVisitor<Derived>::VisitOMPFlushClause(OMPFlushClause *C) {
   return true;
 }
 
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPDependClause(OMPDependClause *C) {
+  TRY_TO(VisitOMPClauseList(C));
+  return true;
+}
+
 // FIXME: look at the following tricky-seeming exprs to see if we
 // need to recurse on anything.  These are ones that have methods
 // returning decls or qualtypes or nestednamespecifier -- though I'm
diff --git a/include/clang/AST/EvaluatedExprVisitor.h b/include/clang/AST/EvaluatedExprVisitor.h
index 5cae5d9..ad52873 100644
--- a/include/clang/AST/EvaluatedExprVisitor.h
+++ b/include/clang/AST/EvaluatedExprVisitor.h
@@ -98,9 +98,9 @@ public:
   /// \brief The basis case walks all of the children of the statement or
   /// expression, assuming they are all potentially evaluated.
   void VisitStmt(PTR(Stmt) S) {
-    for (auto C = S->children(); C; ++C)
-      if (*C)
-        this->Visit(*C);
+    for (auto *SubStmt : S->children())
+      if (SubStmt)
+        this->Visit(SubStmt);
   }
 
 #undef PTR
diff --git a/include/clang/AST/ExprObjC.h b/include/clang/AST/ExprObjC.h
index f296e8f..899e648 100644
--- a/include/clang/AST/ExprObjC.h
+++ b/include/clang/AST/ExprObjC.h
@@ -86,7 +86,7 @@ public:
 };
 
 /// ObjCBoxedExpr - used for generalized expression boxing.
-/// as in: @(strdup("hello world")) or @(random())
+/// as in: @(strdup("hello world")), @(random()) or @(view.frame)
 /// Also used for boxing non-parenthesized numeric literals;
 /// as in: @42 or \@true (c++/objc++) or \@__yes (c/objc).
 class ObjCBoxedExpr : public Expr {
diff --git a/include/clang/AST/ExternalASTSource.h b/include/clang/AST/ExternalASTSource.h
index 9a76080..08c2e0c 100644
--- a/include/clang/AST/ExternalASTSource.h
+++ b/include/clang/AST/ExternalASTSource.h
@@ -156,6 +156,20 @@ public:
   /// \brief Retrieve the module that corresponds to the given module ID.
   virtual Module *getModule(unsigned ID) { return nullptr; }
 
+  /// \brief Holds everything needed to generate debug info for an
+  /// imported module or precompiled header file.
+  struct ASTSourceDescriptor {
+    std::string ModuleName;
+    std::string Path;
+    std::string ASTFile;
+    uint64_t Signature;
+  };
+
+  /// \brief Return a descriptor for the corresponding module, if one exists.
+  virtual llvm::Optional<ASTSourceDescriptor> getSourceDescriptor(unsigned ID);
+  /// \brief Return a descriptor for the module.
+  virtual ASTSourceDescriptor getSourceDescriptor(const Module &M);
+
   /// \brief Finds all declarations lexically contained within the given
   /// DeclContext, after applying an optional filter predicate.
   ///
diff --git a/include/clang/AST/Mangle.h b/include/clang/AST/Mangle.h
index c5a7ea1..735ae11 100644
--- a/include/clang/AST/Mangle.h
+++ b/include/clang/AST/Mangle.h
@@ -204,6 +204,10 @@ public:
   virtual void mangleVirtualMemPtrThunk(const CXXMethodDecl *MD,
                                         raw_ostream &) = 0;
 
+  virtual void mangleCXXVirtualDisplacementMap(const CXXRecordDecl *SrcRD,
+                                               const CXXRecordDecl *DstRD,
+                                               raw_ostream &Out) = 0;
+
   virtual void mangleCXXThrowInfo(QualType T, bool IsConst, bool IsVolatile,
                                   uint32_t NumEntries, raw_ostream &Out) = 0;
 
diff --git a/include/clang/AST/NSAPI.h b/include/clang/AST/NSAPI.h
index fc994c1..ce2c7ce 100644
--- a/include/clang/AST/NSAPI.h
+++ b/include/clang/AST/NSAPI.h
@@ -37,8 +37,9 @@ public:
     ClassId_NSMutableSet,
     ClassId_NSCountedSet,
     ClassId_NSMutableOrderedSet,
+    ClassId_NSValue
   };
-  static const unsigned NumClassIds = 10;
+  static const unsigned NumClassIds = 11;
 
   enum NSStringMethodKind {
     NSStr_stringWithString,
diff --git a/include/clang/AST/OpenMPClause.h b/include/clang/AST/OpenMPClause.h
index 386c8dd..fcfa1dd 100644
--- a/include/clang/AST/OpenMPClause.h
+++ b/include/clang/AST/OpenMPClause.h
@@ -2212,6 +2212,94 @@ public:
   }
 };
 
+/// \brief This represents implicit clause 'depend' for the '#pragma omp task'
+/// directive.
+///
+/// \code
+/// #pragma omp task depend(in:a,b)
+/// \endcode
+/// In this example directive '#pragma omp task' with clause 'depend' with the
+/// variables 'a' and 'b' with dependency 'in'.
+///
+class OMPDependClause : public OMPVarListClause<OMPDependClause> {
+  friend class OMPClauseReader;
+  /// \brief Dependency type (one of in, out, inout).
+  OpenMPDependClauseKind DepKind;
+  /// \brief Dependency type location.
+  SourceLocation DepLoc;
+  /// \brief Colon location.
+  SourceLocation ColonLoc;
+  /// \brief Build clause with number of variables \a N.
+  ///
+  /// \param StartLoc Starting location of the clause.
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc Ending location of the clause.
+  /// \param N Number of the variables in the clause.
+  ///
+  OMPDependClause(SourceLocation StartLoc, SourceLocation LParenLoc,
+                  SourceLocation EndLoc, unsigned N)
+      : OMPVarListClause<OMPDependClause>(OMPC_depend, StartLoc, LParenLoc,
+                                          EndLoc, N),
+        DepKind(OMPC_DEPEND_unknown) {}
+
+  /// \brief Build an empty clause.
+  ///
+  /// \param N Number of variables.
+  ///
+  explicit OMPDependClause(unsigned N)
+      : OMPVarListClause<OMPDependClause>(OMPC_depend, SourceLocation(),
+                                          SourceLocation(), SourceLocation(),
+                                          N),
+        DepKind(OMPC_DEPEND_unknown) {}
+  /// \brief Set dependency kind.
+  void setDependencyKind(OpenMPDependClauseKind K) { DepKind = K; }
+
+  /// \brief Set dependency kind and its location.
+  void setDependencyLoc(SourceLocation Loc) { DepLoc = Loc; }
+
+  /// \brief Set colon location.
+  void setColonLoc(SourceLocation Loc) { ColonLoc = Loc; }
+
+public:
+  /// \brief Creates clause with a list of variables \a VL.
+  ///
+  /// \param C AST context.
+  /// \param StartLoc Starting location of the clause.
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc Ending location of the clause.
+  /// \param DepKind Dependency type.
+  /// \param DepLoc Location of the dependency type.
+  /// \param ColonLoc Colon location.
+  /// \param VL List of references to the variables.
+  ///
+  static OMPDependClause *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+         SourceLocation EndLoc, OpenMPDependClauseKind DepKind,
+         SourceLocation DepLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VL);
+  /// \brief Creates an empty clause with \a N variables.
+  ///
+  /// \param C AST context.
+  /// \param N The number of variables.
+  ///
+  static OMPDependClause *CreateEmpty(const ASTContext &C, unsigned N);
+
+  /// \brief Get dependency type.
+  OpenMPDependClauseKind getDependencyKind() const { return DepKind; }
+  /// \brief Get dependency type location.
+  SourceLocation getDependencyLoc() const { return DepLoc; }
+  /// \brief Get colon location.
+  SourceLocation getColonLoc() const { return ColonLoc; }
+
+  StmtRange children() {
+    return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
+                     reinterpret_cast<Stmt **>(varlist_end()));
+  }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_depend;
+  }
+};
+
 } // end namespace clang
 
 #endif
diff --git a/include/clang/AST/RecursiveASTVisitor.h b/include/clang/AST/RecursiveASTVisitor.h
index 95d7730..b118503 100644
--- a/include/clang/AST/RecursiveASTVisitor.h
+++ b/include/clang/AST/RecursiveASTVisitor.h
@@ -1881,8 +1881,8 @@ DEF_TRAVERSE_DECL(ParmVarDecl, {
   bool RecursiveASTVisitor<Derived>::Traverse##STMT(STMT *S) {                 \
     TRY_TO(WalkUpFrom##STMT(S));                                               \
     { CODE; }                                                                  \
-    for (Stmt::child_range range = S->children(); range; ++range) {            \
-      TRY_TO(TraverseStmt(*range));                                            \
+    for (Stmt *SubStmt : S->children()) {                                      \
+      TRY_TO(TraverseStmt(SubStmt));                                           \
     }                                                                          \
     return true;                                                               \
   }
@@ -2038,15 +2038,15 @@ bool RecursiveASTVisitor<Derived>::TraverseInitListExpr(InitListExpr *S) {
   if (Syn) {
     TRY_TO(WalkUpFromInitListExpr(Syn));
     // All we need are the default actions.  FIXME: use a helper function.
-    for (Stmt::child_range range = Syn->children(); range; ++range) {
-      TRY_TO(TraverseStmt(*range));
+    for (Stmt *SubStmt : Syn->children()) {
+      TRY_TO(TraverseStmt(SubStmt));
     }
   }
   InitListExpr *Sem = S->isSemanticForm() ? S : S->getSemanticForm();
   if (Sem) {
     TRY_TO(WalkUpFromInitListExpr(Sem));
-    for (Stmt::child_range range = Sem->children(); range; ++range) {
-      TRY_TO(TraverseStmt(*range));
+    for (Stmt *SubStmt : Sem->children()) {
+      TRY_TO(TraverseStmt(SubStmt));
     }
   }
   return true;
@@ -2391,6 +2391,12 @@ DEF_TRAVERSE_STMT(OMPTaskwaitDirective,
 DEF_TRAVERSE_STMT(OMPTaskgroupDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPCancellationPointDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
+DEF_TRAVERSE_STMT(OMPCancelDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 DEF_TRAVERSE_STMT(OMPFlushDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
@@ -2655,6 +2661,12 @@ bool RecursiveASTVisitor<Derived>::VisitOMPFlushClause(OMPFlushClause *C) {
   return true;
 }
 
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPDependClause(OMPDependClause *C) {
+  TRY_TO(VisitOMPClauseList(C));
+  return true;
+}
+
 // FIXME: look at the following tricky-seeming exprs to see if we
 // need to recurse on anything.  These are ones that have methods
 // returning decls or qualtypes or nestednamespecifier -- though I'm
diff --git a/include/clang/AST/StmtIterator.h b/include/clang/AST/StmtIterator.h
index ec7329a..a5a57af 100644
--- a/include/clang/AST/StmtIterator.h
+++ b/include/clang/AST/StmtIterator.h
@@ -32,8 +32,10 @@ protected:
   enum { StmtMode = 0x0, SizeOfTypeVAMode = 0x1, DeclGroupMode = 0x2,
          Flags = 0x3 };
   
-  Stmt **stmt;
-  Decl **DGI;
+  union {
+    Stmt **stmt;
+    Decl **DGI;
+  };
   uintptr_t RawVAPtr;
   Decl **DGE;
   
@@ -64,10 +66,10 @@ protected:
 
   Stmt*& GetDeclExpr() const;
 
-  StmtIteratorBase(Stmt **s) : stmt(s), DGI(nullptr), RawVAPtr(0) {}
+  StmtIteratorBase(Stmt **s) : stmt(s), RawVAPtr(0) {}
   StmtIteratorBase(const VariableArrayType *t);
   StmtIteratorBase(Decl **dgi, Decl **dge);
-  StmtIteratorBase() : stmt(nullptr), DGI(nullptr), RawVAPtr(0) {}
+  StmtIteratorBase() : stmt(nullptr), RawVAPtr(0) {}
 };
 
 
diff --git a/include/clang/AST/StmtOpenMP.h b/include/clang/AST/StmtOpenMP.h
index 63f295d..b412daa 100644
--- a/include/clang/AST/StmtOpenMP.h
+++ b/include/clang/AST/StmtOpenMP.h
@@ -1856,6 +1856,121 @@ public:
   }
 };
 
+/// \brief This represents '#pragma omp cancellation point' directive.
+///
+/// \code
+/// #pragma omp cancellation point for
+/// \endcode
+///
+/// In this example a cancellation point is created for innermost 'for' region.
+class OMPCancellationPointDirective : public OMPExecutableDirective {
+  friend class ASTStmtReader;
+  OpenMPDirectiveKind CancelRegion;
+  /// \brief Build directive with the given start and end location.
+  ///
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending location of the directive.
+  ///
+  OMPCancellationPointDirective(SourceLocation StartLoc, SourceLocation EndLoc)
+      : OMPExecutableDirective(this, OMPCancellationPointDirectiveClass,
+                               OMPD_cancellation_point, StartLoc, EndLoc, 0, 0),
+        CancelRegion(OMPD_unknown) {}
+
+  /// \brief Build an empty directive.
+  ///
+  explicit OMPCancellationPointDirective()
+      : OMPExecutableDirective(this, OMPCancellationPointDirectiveClass,
+                               OMPD_cancellation_point, SourceLocation(),
+                               SourceLocation(), 0, 0),
+        CancelRegion(OMPD_unknown) {}
+
+  /// \brief Set cancel region for current cancellation point.
+  /// \param CR Cancellation region.
+  void setCancelRegion(OpenMPDirectiveKind CR) { CancelRegion = CR; }
+
+public:
+  /// \brief Creates directive.
+  ///
+  /// \param C AST context.
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending Location of the directive.
+  ///
+  static OMPCancellationPointDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         OpenMPDirectiveKind CancelRegion);
+
+  /// \brief Creates an empty directive.
+  ///
+  /// \param C AST context.
+  ///
+  static OMPCancellationPointDirective *CreateEmpty(const ASTContext &C,
+                                                    EmptyShell);
+
+  /// \brief Get cancellation region for the current cancellation point.
+  OpenMPDirectiveKind getCancelRegion() const { return CancelRegion; }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPCancellationPointDirectiveClass;
+  }
+};
+
+/// \brief This represents '#pragma omp cancel' directive.
+///
+/// \code
+/// #pragma omp cancel for
+/// \endcode
+///
+/// In this example a cancel is created for innermost 'for' region.
+class OMPCancelDirective : public OMPExecutableDirective {
+  friend class ASTStmtReader;
+  OpenMPDirectiveKind CancelRegion;
+  /// \brief Build directive with the given start and end location.
+  ///
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending location of the directive.
+  ///
+  OMPCancelDirective(SourceLocation StartLoc, SourceLocation EndLoc)
+      : OMPExecutableDirective(this, OMPCancelDirectiveClass, OMPD_cancel,
+                               StartLoc, EndLoc, 0, 0),
+        CancelRegion(OMPD_unknown) {}
+
+  /// \brief Build an empty directive.
+  ///
+  explicit OMPCancelDirective()
+      : OMPExecutableDirective(this, OMPCancelDirectiveClass, OMPD_cancel,
+                               SourceLocation(), SourceLocation(), 0, 0),
+        CancelRegion(OMPD_unknown) {}
+
+  /// \brief Set cancel region for current cancellation point.
+  /// \param CR Cancellation region.
+  void setCancelRegion(OpenMPDirectiveKind CR) { CancelRegion = CR; }
+
+public:
+  /// \brief Creates directive.
+  ///
+  /// \param C AST context.
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending Location of the directive.
+  ///
+  static OMPCancelDirective *Create(const ASTContext &C,
+                                    SourceLocation StartLoc,
+                                    SourceLocation EndLoc,
+                                    OpenMPDirectiveKind CancelRegion);
+
+  /// \brief Creates an empty directive.
+  ///
+  /// \param C AST context.
+  ///
+  static OMPCancelDirective *CreateEmpty(const ASTContext &C, EmptyShell);
+
+  /// \brief Get cancellation region for the current cancellation point.
+  OpenMPDirectiveKind getCancelRegion() const { return CancelRegion; }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPCancelDirectiveClass;
+  }
+};
+
 } // end namespace clang
 
 #endif
diff --git a/include/clang/AST/Type.h b/include/clang/AST/Type.h
index d903b9d..97f1331 100644
--- a/include/clang/AST/Type.h
+++ b/include/clang/AST/Type.h
@@ -1564,6 +1564,7 @@ public:
   bool isRecordType() const;
   bool isClassType() const;
   bool isStructureType() const;
+  bool isObjCBoxableRecordType() const;
   bool isInterfaceType() const;
   bool isStructureOrClassType() const;
   bool isUnionType() const;
diff --git a/include/clang/ASTMatchers/ASTMatchers.h b/include/clang/ASTMatchers/ASTMatchers.h
index 94c77f7..e7a97a7 100644
--- a/include/clang/ASTMatchers/ASTMatchers.h
+++ b/include/clang/ASTMatchers/ASTMatchers.h
@@ -2528,6 +2528,23 @@ AST_MATCHER_P2(DeclStmt, containsDeclaration, unsigned, N,
   return InnerMatcher.matches(**Iterator, Finder, Builder);
 }
 
+/// \brief Matches a C++ catch statement that has a catch-all handler.
+///
+/// Given
+/// \code
+///   try {
+///     // ...
+///   } catch (int) {
+///     // ...
+///   } catch (...) {
+///     // ...
+///   }
+/// /endcode
+/// catchStmt(isCatchAll()) matches catch(...) but not catch(int).
+AST_MATCHER(CXXCatchStmt, isCatchAll) {
+  return Node.getExceptionDecl() == nullptr;
+}
+
 /// \brief Matches a constructor initializer.
 ///
 /// Given
diff --git a/include/clang/Analysis/Analyses/FormatString.h b/include/clang/Analysis/Analyses/FormatString.h
index 2e8058da..4471311 100644
--- a/include/clang/Analysis/Analyses/FormatString.h
+++ b/include/clang/Analysis/Analyses/FormatString.h
@@ -40,6 +40,7 @@ public:
   void clear() { flag = false; }
   void setPosition(const char *position) {
     assert(position);
+    flag = true;
     this->position = position;
   }
   const char *getPosition() const {
@@ -435,12 +436,14 @@ class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
   OptionalFlag HasSpacePrefix; // ' '
   OptionalFlag HasAlternativeForm; // '#'
   OptionalFlag HasLeadingZeroes; // '0'
+  OptionalFlag HasObjCTechnicalTerm; // '[tt]'
   OptionalAmount Precision;
 public:
   PrintfSpecifier() :
     FormatSpecifier(/* isPrintf = */ true),
     HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
-    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
+    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0"),
+    HasObjCTechnicalTerm("tt") {}
 
   static PrintfSpecifier Parse(const char *beg, const char *end);
 
@@ -449,29 +452,26 @@ public:
     CS = cs;
   }
   void setHasThousandsGrouping(const char *position) {
-    HasThousandsGrouping = true;
     HasThousandsGrouping.setPosition(position);
   }
   void setIsLeftJustified(const char *position) {
-    IsLeftJustified = true;
     IsLeftJustified.setPosition(position);
   }
   void setHasPlusPrefix(const char *position) {
-    HasPlusPrefix = true;
     HasPlusPrefix.setPosition(position);
   }
   void setHasSpacePrefix(const char *position) {
-    HasSpacePrefix = true;
     HasSpacePrefix.setPosition(position);
   }
   void setHasAlternativeForm(const char *position) {
-    HasAlternativeForm = true;
     HasAlternativeForm.setPosition(position);
   }
   void setHasLeadingZeros(const char *position) {
-    HasLeadingZeroes = true;
     HasLeadingZeroes.setPosition(position);
   }
+  void setHasObjCTechnicalTerm(const char *position) {
+    HasObjCTechnicalTerm.setPosition(position);
+  }
   void setUsesPositionalArg() { UsesPositionalArg = true; }
 
     // Methods for querying the format specifier.
@@ -508,6 +508,7 @@ public:
   const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
   const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
   const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
+  const OptionalFlag &hasObjCTechnicalTerm() const { return HasObjCTechnicalTerm; }
   bool usesPositionalArg() const { return UsesPositionalArg; }
 
   /// Changes the specifier and length according to a QualType, retaining any
@@ -565,7 +566,6 @@ public:
     SuppressAssignment("*") {}
 
   void setSuppressAssignment(const char *position) {
-    SuppressAssignment = true;
     SuppressAssignment.setPosition(position);
   }
 
@@ -621,6 +621,15 @@ public:
   virtual void HandleIncompleteSpecifier(const char *startSpecifier,
                                          unsigned specifierLen) {}
 
+  virtual void HandleEmptyObjCModifierFlag(const char *startFlags,
+                                           unsigned flagsLen) {}
+
+  virtual void HandleInvalidObjCModifierFlag(const char *startFlag,
+                                             unsigned flagLen) {}
+
+  virtual void HandleObjCFlagsWithNonObjCConversion(const char *flagsStart,
+                                            const char *flagsEnd,
+                                            const char *conversionPosition) {}
   // Printf-specific handlers.
 
   virtual bool HandleInvalidPrintfConversionSpecifier(
diff --git a/include/clang/Basic/Attr.td b/include/clang/Basic/Attr.td
index f36f654..2bbce37 100644
--- a/include/clang/Basic/Attr.td
+++ b/include/clang/Basic/Attr.td
@@ -950,30 +950,30 @@ def NonNull : InheritableAttr {
   } }];
   // FIXME: We should merge duplicates into a single nonnull attribute.
   let DuplicatesAllowedWhileMerging = 1;
-  let Documentation = [Undocumented];
+  let Documentation = [NonNullDocs];
 }
 
 def ReturnsNonNull : InheritableAttr {
   let Spellings = [GCC<"returns_nonnull">];
   let Subjects = SubjectList<[ObjCMethod, Function], WarnDiag,
                              "ExpectedFunctionOrMethod">;
-  let Documentation = [Undocumented];
+  let Documentation = [ReturnsNonNullDocs];
 }
 
 // Nullability type attributes.
 def TypeNonNull : TypeAttr {
-  let Spellings = [Keyword<"__nonnull">];
-  let Documentation = [Undocumented];
+  let Spellings = [Keyword<"_Nonnull">];
+  let Documentation = [TypeNonNullDocs];
 }
 
 def TypeNullable : TypeAttr {
-  let Spellings = [Keyword<"__nullable">];
-  let Documentation = [Undocumented];
+  let Spellings = [Keyword<"_Nullable">];
+  let Documentation = [TypeNullableDocs];
 }
 
 def TypeNullUnspecified : TypeAttr {
-  let Spellings = [Keyword<"__null_unspecified">];
-  let Documentation = [Undocumented];
+  let Spellings = [Keyword<"_Null_unspecified">];
+  let Documentation = [TypeNullUnspecifiedDocs];
 }
 
 def AssumeAligned : InheritableAttr {
@@ -1125,6 +1125,12 @@ def ObjCRuntimeName : Attr {
   let Documentation = [ObjCRuntimeNameDocs];
 }
 
+def ObjCBoxable : Attr {
+  let Spellings = [GNU<"objc_boxable">];
+  let Subjects = SubjectList<[Record], ErrorDiag, "ExpectedStructOrUnion">;
+  let Documentation = [Undocumented];
+}
+
 def OptimizeNone : InheritableAttr {
   let Spellings = [GNU<"optnone">, CXX11<"clang", "optnone">];
   let Subjects = SubjectList<[Function, ObjCMethod]>;
diff --git a/include/clang/Basic/AttrDocs.td b/include/clang/Basic/AttrDocs.td
index 9314c44..107458e 100644
--- a/include/clang/Basic/AttrDocs.td
+++ b/include/clang/Basic/AttrDocs.td
@@ -181,6 +181,11 @@ to enforce the provided alignment assumption.
 def EnableIfDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
+.. Note:: Some features of this attribute are experimental. The meaning of
+  multiple enable_if attributes on a single declaration is subject to change in
+  a future version of clang. Also, the ABI is not standardized and the name
+  mangling may change in future versions. To avoid that, use asm labels.
+
 The ``enable_if`` attribute can be placed on function declarations to control
 which overload is selected based on the values of the function's arguments.
 When combined with the ``overloadable`` attribute, this feature is also
@@ -1448,3 +1453,114 @@ private address space. Kernel function arguments of a pointer or an array type
 cannot point to the private address space.
   }];
 }
+
+def NullabilityDocs : DocumentationCategory<"Nullability Attributes"> {
+  let Content = [{
+Whether a particular pointer may be "null" is an important concern when working with pointers in the C family of languages. The various nullability attributes indicate whether a particular pointer can be null or not, which makes APIs more expressive and can help static analysis tools identify bugs involving null pointers. Clang supports several kinds of nullability attributes: the ``nonnull`` and ``returns_nonnull`` attributes indicate which function or method parameters and result types can never be null, while nullability type qualifiers indicate which pointer types can be null (``_Nullable``) or cannot be null (``_Nonnull``). 
+
+The nullability (type) qualifiers express whether a value of a given pointer type can be null (the ``_Nullable`` qualifier), doesn't have a defined meaning for null (the ``_Nonnull`` qualifier), or for which the purpose of null is unclear (the ``_Null_unspecified`` qualifier). Because nullability qualifiers are expressed within the type system, they are more general than the ``nonnull`` and ``returns_nonnull`` attributes, allowing one to express (for example) a nullable pointer to an array of nonnull pointers. Nullability qualifiers are written to the right of the pointer to which they apply. For example:
+
+  .. code-block:: c
+
+    // No meaningful result when 'ptr' is null (here, it happens to be undefined behavior).
+    int fetch(int * _Nonnull ptr) { return *ptr; }
+
+    // 'ptr' may be null.
+    int fetch_or_zero(int * _Nullable ptr) {
+      return ptr ? *ptr : 0;
+    }
+
+    // A nullable pointer to non-null pointers to const characters.
+    const char *join_strings(const char * _Nonnull * _Nullable strings, unsigned n);
+
+In Objective-C, there is an alternate spelling for the nullability qualifiers that can be used in Objective-C methods and properties using context-sensitive, non-underscored keywords. For example:
+
+  .. code-block:: objective-c
+
+    @interface NSView : NSResponder
+      - (nullable NSView *)ancestorSharedWithView:(nonnull NSView *)aView;
+      @property (assign, nullable) NSView *superview;
+      @property (readonly, nonnull) NSArray *subviews;
+    @end
+  }];
+}
+
+def TypeNonNullDocs : Documentation {
+  let Category = NullabilityDocs;
+  let Heading = "_Nonnull";
+  let Content = [{
+The ``_Nonnull`` nullability qualifier indicates that null is not a meaningful value for a value of the ``_Nonnull`` pointer type. For example, given a declaration such as:
+
+  .. code-block:: c
+
+    int fetch(int * _Nonnull ptr);
+
+a caller of ``fetch`` should not provide a null value, and the compiler will produce a warning if it sees a literal null value passed to ``fetch``. Note that, unlike the declaration attribute ``nonnull``, the presence of ``_Nonnull`` does not imply that passing null is undefined behavior: ``fetch`` is free to consider null undefined behavior or (perhaps for backward-compatibility reasons) defensively handle null.
+  }];
+}
+
+def TypeNullableDocs : Documentation {
+  let Category = NullabilityDocs;
+  let Heading = "_Nullable";
+  let Content = [{
+The ``_Nullable`` nullability qualifier indicates that a value of the ``_Nullable`` pointer type can be null. For example, given:
+
+  .. code-block:: c
+
+    int fetch_or_zero(int * _Nullable ptr);
+
+a caller of ``fetch_or_zero`` can provide null. 
+  }];
+}
+
+def TypeNullUnspecifiedDocs : Documentation {
+  let Category = NullabilityDocs;
+  let Heading = "_Null_unspecified";
+  let Content = [{
+The ``_Null_unspecified`` nullability qualifier indicates that neither the ``_Nonnull`` nor ``_Nullable`` qualifiers make sense for a particular pointer type. It is used primarily to indicate that the role of null with specific pointers in a nullability-annotated header is unclear, e.g., due to overly-complex implementations or historical factors with a long-lived API.
+  }];
+}
+
+def NonNullDocs : Documentation {
+  let Category = NullabilityDocs;
+  let Heading = "nonnull";
+  let Content = [{
+The ``nonnull`` attribute indicates that some function parameters must not be null, and can be used in several different ways. It's original usage (`from GCC <https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes>`_) is as a function (or Objective-C method) attribute that specifies which parameters of the function are nonnull in a comma-separated list. For example:
+
+  .. code-block:: c
+
+    extern void * my_memcpy (void *dest, const void *src, size_t len)
+                    __attribute__((nonnull (1, 2)));
+
+Here, the ``nonnull`` attribute indicates that parameters 1 and 2
+cannot have a null value. Omitting the parenthesized list of parameter indices means that all parameters of pointer type cannot be null:
+
+  .. code-block:: c
+
+    extern void * my_memcpy (void *dest, const void *src, size_t len)
+                    __attribute__((nonnull));
+
+Clang also allows the ``nonnull`` attribute to be placed directly on a function (or Objective-C method) parameter, eliminating the need to specify the parameter index ahead of type. For example:
+
+  .. code-block:: c
+
+    extern void * my_memcpy (void *dest __attribute__((nonnull)),
+                             const void *src __attribute__((nonnull)), size_t len);
+
+Note that the ``nonnull`` attribute indicates that passing null to a non-null parameter is undefined behavior, which the optimizer may take advantage of to, e.g., remove null checks. The ``_Nonnull`` type qualifier indicates that a pointer cannot be null in a more general manner (because it is part of the type system) and does not imply undefined behavior, making it more widely applicable.
+  }];
+}
+
+def ReturnsNonNullDocs : Documentation {
+  let Category = NullabilityDocs;
+  let Heading = "returns_nonnull";
+  let Content = [{
+The ``returns_nonnull`` attribute indicates that a particular function (or Objective-C method) always returns a non-null pointer. For example, a particular system ``malloc`` might be defined to terminate a process when memory is not available rather than returning a null pointer:
+
+  .. code-block:: c
+
+    extern void * malloc (size_t size) __attribute__((returns_nonnull));
+
+The ``returns_nonnull`` attribute implies that returning a null pointer is undefined behavior, which the optimizer may take advantage of. The ``_Nonnull`` type qualifier indicates that a pointer cannot be null in a more general manner (because it is part of the type system) and does not imply undefined behavior, making it more widely applicable
+}];
+}
diff --git a/include/clang/Basic/BuiltinsARM.def b/include/clang/Basic/BuiltinsARM.def
index 0610d47..c9cdb4b 100644
--- a/include/clang/Basic/BuiltinsARM.def
+++ b/include/clang/Basic/BuiltinsARM.def
@@ -105,10 +105,10 @@ LANGBUILTIN(__dmb, "vUi", "nc", ALL_MS_LANGUAGES)
 LANGBUILTIN(__dsb, "vUi", "nc", ALL_MS_LANGUAGES)
 LANGBUILTIN(__isb, "vUi", "nc", ALL_MS_LANGUAGES)
 LANGBUILTIN(__ldrexd, "WiWiCD*", "", ALL_MS_LANGUAGES)
-LANGBUILTIN(_MoveFromCoprocessor, "UiUiUiUiUiUi", "", ALL_MS_LANGUAGES)
-LANGBUILTIN(_MoveFromCoprocessor2, "UiUiUiUiUiUi", "", ALL_MS_LANGUAGES)
-LANGBUILTIN(_MoveToCoprocessor, "vUiUiUiUiUiUi", "", ALL_MS_LANGUAGES)
-LANGBUILTIN(_MoveToCoprocessor2, "vUiUiUiUiUiUi", "", ALL_MS_LANGUAGES)
+LANGBUILTIN(_MoveFromCoprocessor, "UiIUiIUiIUiIUiIUi", "", ALL_MS_LANGUAGES)
+LANGBUILTIN(_MoveFromCoprocessor2, "UiIUiIUiIUiIUiIUi", "", ALL_MS_LANGUAGES)
+LANGBUILTIN(_MoveToCoprocessor, "vUiIUiIUiIUiIUiIUi", "", ALL_MS_LANGUAGES)
+LANGBUILTIN(_MoveToCoprocessor2, "vUiIUiIUiIUiIUiIUi", "", ALL_MS_LANGUAGES)
 
 #undef BUILTIN
 #undef LANGBUILTIN
diff --git a/include/clang/Basic/BuiltinsNVPTX.def b/include/clang/Basic/BuiltinsNVPTX.def
index 9c920dc..970f55f 100644
--- a/include/clang/Basic/BuiltinsNVPTX.def
+++ b/include/clang/Basic/BuiltinsNVPTX.def
@@ -501,7 +501,7 @@ BUILTIN(__nvvm_atom_min_s_ui, "UiUiD*3Ui", "n")
 BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n")
 BUILTIN(__nvvm_atom_min_g_l, "LiLiD*1Li", "n")
 BUILTIN(__nvvm_atom_min_s_l, "LiLiD*3Li", "n")
-BUILTIN(__nvvm_atom_min_gen_l, "LiLi10D*Li", "n")
+BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n")
 BUILTIN(__nvvm_atom_min_g_ul, "ULiULiD*1ULi", "n")
 BUILTIN(__nvvm_atom_min_s_ul, "ULiULiD*3ULi", "n")
 BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n")
diff --git a/include/clang/Basic/BuiltinsPPC.def b/include/clang/Basic/BuiltinsPPC.def
index 6f3bea8..02e5294 100644
--- a/include/clang/Basic/BuiltinsPPC.def
+++ b/include/clang/Basic/BuiltinsPPC.def
@@ -267,6 +267,18 @@ BUILTIN(__builtin_vsx_xsmindp, "ddd", "")
 BUILTIN(__builtin_vsx_xvdivdp, "V2dV2dV2d", "")
 BUILTIN(__builtin_vsx_xvdivsp, "V4fV4fV4f", "")
 
+BUILTIN(__builtin_vsx_xvrdpip, "V2dV2d", "")
+BUILTIN(__builtin_vsx_xvrspip, "V4fV4f", "")
+
+BUILTIN(__builtin_vsx_xvcmpeqdp, "V2ULLiV2dV2d", "")
+BUILTIN(__builtin_vsx_xvcmpeqsp, "V4UiV4fV4f", "")
+
+BUILTIN(__builtin_vsx_xvcmpgedp, "V2ULLiV2dV2d", "")
+BUILTIN(__builtin_vsx_xvcmpgesp, "V4UiV4fV4f", "")
+
+BUILTIN(__builtin_vsx_xvcmpgtdp, "V2ULLiV2dV2d", "")
+BUILTIN(__builtin_vsx_xvcmpgtsp, "V4UiV4fV4f", "")
+
 // HTM builtins
 BUILTIN(__builtin_tbegin, "UiUIi", "")
 BUILTIN(__builtin_tend, "UiUIi", "")
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def
index 00c1340..aaf279f 100644
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -24,6 +24,11 @@
 
 // FIXME: Are these nothrow/const?
 
+// Miscellaneous builtin for checking x86 cpu features.
+// TODO: Make this somewhat generic so that other backends
+// can use it?
+BUILTIN(__builtin_cpu_supports, "bcC*", "nc")
+
 // 3DNow!
 //
 BUILTIN(__builtin_ia32_femms, "v", "")
@@ -651,6 +656,12 @@ BUILTIN(__builtin_ia32_wrfsbase64, "vULLi", "")
 BUILTIN(__builtin_ia32_wrgsbase32, "vUi", "")
 BUILTIN(__builtin_ia32_wrgsbase64, "vULLi", "")
 
+// FXSR
+BUILTIN(__builtin_ia32_fxrstor, "vv*", "")
+BUILTIN(__builtin_ia32_fxrstor64, "vv*", "")
+BUILTIN(__builtin_ia32_fxsave, "vv*", "")
+BUILTIN(__builtin_ia32_fxsave64, "vv*", "")
+
 // ADX
 BUILTIN(__builtin_ia32_addcarryx_u32, "UcUcUiUiUi*", "")
 BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcULLiULLiULLi*", "")
@@ -722,12 +733,72 @@ BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "")
 BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "")
 BUILTIN(__builtin_ia32_vfmsubaddps256, "V8fV8fV8fV8f", "")
 BUILTIN(__builtin_ia32_vfmsubaddpd256, "V4dV4dV4dV4d", "")
-BUILTIN(__builtin_ia32_vfmaddpd512_mask,  "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfmsubpd512_mask,  "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfnmaddpd512_mask, "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfmaddps512_mask,  "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfmsubps512_mask,  "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfnmaddps512_mask, "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmaddpd128_mask,     "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmaddpd128_mask3,    "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmaddpd128_maskz,    "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmaddpd256_mask,     "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmaddpd256_mask3,    "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmaddpd256_maskz,    "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmaddpd512_mask,     "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmaddpd512_mask3,    "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmaddpd512_maskz,    "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmaddps128_mask,     "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmaddps128_mask3,    "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmaddps128_maskz,    "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmaddps256_mask,     "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmaddps256_mask3,    "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmaddps256_maskz,    "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmaddps512_mask,     "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmaddps512_mask3,    "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmaddps512_maskz,    "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd128_mask,  "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd128_mask3, "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd128_maskz, "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd256_mask,  "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd256_mask3, "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd256_maskz, "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd512_mask,  "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd512_mask3, "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd512_maskz, "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmaddsubps128_mask,  "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubps128_mask3, "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubps128_maskz, "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubps256_mask,  "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubps256_mask3, "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubps256_maskz, "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmaddsubps512_mask,  "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmaddsubps512_mask3, "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmaddsubps512_maskz, "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmsubpd128_mask3,    "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmsubpd256_mask3,    "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmsubpd512_mask3,    "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmsubps128_mask3,    "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmsubps256_mask3,    "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmsubps512_mask3,    "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfmsubaddpd128_mask3, "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfmsubaddpd256_mask3, "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfmsubaddpd512_mask3, "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfmsubaddps128_mask3, "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfmsubaddps256_mask3, "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfmsubaddps512_mask3, "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfnmaddpd128_mask,    "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfnmaddpd256_mask,    "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfnmaddpd512_mask,    "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfnmaddps128_mask,    "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfnmaddps256_mask,    "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfnmaddps512_mask,    "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfnmsubpd128_mask,    "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfnmsubpd128_mask3,   "V2dV2dV2dV2dUc", "")
+BUILTIN(__builtin_ia32_vfnmsubpd256_mask,    "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfnmsubpd256_mask3,   "V4dV4dV4dV4dUc", "")
+BUILTIN(__builtin_ia32_vfnmsubpd512_mask,    "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfnmsubpd512_mask3,   "V8dV8dV8dV8dUcIi", "")
+BUILTIN(__builtin_ia32_vfnmsubps128_mask,    "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfnmsubps128_mask3,   "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_vfnmsubps256_mask,    "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfnmsubps256_mask3,   "V8fV8fV8fV8fUc", "")
+BUILTIN(__builtin_ia32_vfnmsubps512_mask,    "V16fV16fV16fV16fUsIi", "")
+BUILTIN(__builtin_ia32_vfnmsubps512_mask3,   "V16fV16fV16fV16fUsIi", "")
 
 // XOP
 BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "")
@@ -1052,5 +1123,39 @@ BUILTIN(__builtin_ia32_orpd256_mask, "V4dV4dV4dV4dUc", "")
 BUILTIN(__builtin_ia32_orpd128_mask, "V2dV2dV2dV2dUc", "")
 BUILTIN(__builtin_ia32_orps256_mask, "V8fV8fV8fV8fUc", "")
 BUILTIN(__builtin_ia32_orps128_mask, "V4fV4fV4fV4fUc", "")
+BUILTIN(__builtin_ia32_blendmb_512_mask, "V64cV64cV64cULLi", "")
+BUILTIN(__builtin_ia32_blendmw_512_mask, "V32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_pabsb512_mask, "V64cV64cV64cULLi", "")
+BUILTIN(__builtin_ia32_pabsw512_mask, "V32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_packssdw512_mask, "V32sV16iV16iV32sUi", "")
+BUILTIN(__builtin_ia32_packsswb512_mask, "V64cV32sV32sV64cULLi", "")
+BUILTIN(__builtin_ia32_packusdw512_mask, "V32sV16iV16iV32sUi", "")
+BUILTIN(__builtin_ia32_packuswb512_mask, "V64cV32sV32sV64cULLi", "")
+BUILTIN(__builtin_ia32_paddsb512_mask, "V64cV64cV64cV64cULLi", "")
+BUILTIN(__builtin_ia32_paddsw512_mask, "V32sV32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", "")
+BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_pavgb512_mask, "V64cV64cV64cV64cULLi", "")
+BUILTIN(__builtin_ia32_pavgw512_mask, "V32sV32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_pmaxsb512_mask, "V64cV64cV64cV64cULLi", "")
+BUILTIN(__builtin_ia32_pmaxsw512_mask, "V32sV32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_pmaxub512_mask, "V64cV64cV64cV64cULLi", "")
+BUILTIN(__builtin_ia32_pmaxuw512_mask, "V32sV32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_pminsb512_mask, "V64cV64cV64cV64cULLi", "")
+BUILTIN(__builtin_ia32_pminsw512_mask, "V32sV32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_pminub512_mask, "V64cV64cV64cV64cULLi", "")
+BUILTIN(__builtin_ia32_pminuw512_mask, "V32sV32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_pshufb512_mask, "V64cV64cV64cV64cULLi", "")
+BUILTIN(__builtin_ia32_psubsb512_mask, "V64cV64cV64cV64cULLi", "")
+BUILTIN(__builtin_ia32_psubsw512_mask, "V32sV32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", "")
+BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_vpermi2varhi512_mask, "V32sV32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_vpermt2varhi512_mask, "V32sV32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_vpermt2varhi512_maskz, "V32sV32sV32sV32sUi", "")
+BUILTIN(__builtin_ia32_vpconflictdi_512_mask, "V8LLiV8LLiV8LLiUc", "")
+BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "")
+BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "")
+BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "")
 
 #undef BUILTIN
diff --git a/include/clang/Basic/Diagnostic.h b/include/clang/Basic/Diagnostic.h
index 0f3831d..3b7c282 100644
--- a/include/clang/Basic/Diagnostic.h
+++ b/include/clang/Basic/Diagnostic.h
@@ -18,6 +18,7 @@
 #include "clang/Basic/DiagnosticIDs.h"
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
@@ -1107,6 +1108,13 @@ inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB,
   return DB;
 }
 
+/// A nullability kind paired with a bit indicating whether it used a
+/// context-sensitive keyword.
+typedef std::pair<NullabilityKind, bool> DiagNullabilityKind;
+
+const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB,
+                                    DiagNullabilityKind nullability);
+
 inline DiagnosticBuilder DiagnosticsEngine::Report(SourceLocation Loc,
                                                    unsigned DiagID) {
   assert(CurDiagID == ~0U && "Multiple diagnostics in flight at once!");
diff --git a/include/clang/Basic/DiagnosticCommonKinds.td b/include/clang/Basic/DiagnosticCommonKinds.td
index 82718d9..24813c6 100644
--- a/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/include/clang/Basic/DiagnosticCommonKinds.td
@@ -102,31 +102,21 @@ def err_enum_template : Error<"enumeration cannot be a template">;
 let CategoryName = "Nullability Issue" in {
 
 def warn_nullability_duplicate : Warning<
-  "duplicate nullability specifier "
-  "'%select{__|}1%select{nonnull|nullable|null_unspecified}0'">,
+  "duplicate nullability specifier %0">,
   InGroup<Nullability>;
   
 def warn_conflicting_nullability_attr_overriding_ret_types : Warning<
-  "conflicting nullability specifier on return types, "
-  "'%select{%select{__|}1nonnull|"
-  "%select{__|}1nullable|%select{__|}1null_unspecified}0' "
-  "conflicts with existing specifier '%select{%select{__|}3nonnull|"
-  "%select{__|}3nullable|%select{__|}3null_unspecified}2'">,
+  "conflicting nullability specifier on return types, %0 "
+  "conflicts with existing specifier %1">,
   InGroup<Nullability>;
 
 def warn_conflicting_nullability_attr_overriding_param_types : Warning<
-  "conflicting nullability specifier on parameter types, "
-  "'%select{%select{__|}1nonnull|"
-  "%select{__|}1nullable|%select{__|}1null_unspecified}0' "
-  "conflicts with existing specifier '%select{%select{__|}3nonnull|"
-  "%select{__|}3nullable|%select{__|}3null_unspecified}2'">,
+  "conflicting nullability specifier on parameter types, %0 "
+  "conflicts with existing specifier %1">,
   InGroup<Nullability>;
 
 def err_nullability_conflicting : Error<
-  "nullability specifier "
-  "'%select{__|}1%select{nonnull|nullable|null_unspecified}0' conflicts with "
-  "existing specifier '%select{__|}3%select{nonnull|nullable|"
-  "null_unspecified}2'">;
+  "nullability specifier %0 conflicts with existing specifier %1">;
 
 }
 
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td
index 803203c..7502194 100644
--- a/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/include/clang/Basic/DiagnosticSemaKinds.td
@@ -426,6 +426,7 @@ def warn_redecl_library_builtin : Warning<
   InGroup<DiagGroup<"incompatible-library-redeclaration">>;
 def err_builtin_definition : Error<"definition of builtin function %0">;
 def err_arm_invalid_specialreg : Error<"invalid special register for builtin">;
+def err_invalid_cpu_supports : Error<"invalid cpu feature string for builtin">;
 def warn_builtin_unknown : Warning<"use of unknown builtin %0">,
   InGroup<ImplicitFunctionDeclare>, DefaultError;
 def warn_dyn_class_memaccess : Warning<
@@ -1050,6 +1051,7 @@ def ext_friend_tag_redecl_outside_namespace : ExtWarn<
   "unqualified friend declaration referring to type outside of the nearest "
   "enclosing namespace is a Microsoft extension; add a nested name specifier">,
   InGroup<Microsoft>;
+def err_pure_friend : Error<"friend declaration cannot have a pure-specifier">;
 
 def err_invalid_member_in_interface : Error<
   "%select{data member |non-public member function |static member function |"
@@ -1262,6 +1264,8 @@ def ext_mutable_reference : ExtWarn<
 def err_mutable_const : Error<"'mutable' and 'const' cannot be mixed">;
 def err_mutable_nonmember : Error<
   "'mutable' can only be applied to member variables">;
+def err_virtual_in_union : Error<
+  "unions cannot have virtual functions">;
 def err_virtual_non_function : Error<
   "'virtual' can only appear on non-static member functions">;
 def err_virtual_out_of_class : Error<
@@ -2081,12 +2085,16 @@ def err_attr_objc_ownership_redundant : Error<
   "the type %0 is already explicitly ownership-qualified">;
 def err_undeclared_nsnumber : Error<
   "NSNumber must be available to use Objective-C literals">;
+def err_undeclared_nsvalue : Error<
+  "NSValue must be available to use Objective-C boxed expressions">;
 def err_invalid_nsnumber_type : Error<
   "%0 is not a valid literal type for NSNumber">;
 def err_undeclared_nsstring : Error<
   "cannot box a string value because NSString has not been declared">;
 def err_objc_illegal_boxed_expression_type : Error<
   "illegal type %0 used in a boxed expression">;
+def err_objc_non_trivially_copyable_boxed_expression_type : Error<
+  "non-trivially copyable type %0 cannot be used in a boxed expression">;
 def err_objc_incomplete_boxed_expression_type : Error<
   "incomplete type %0 used in a boxed expression">;
 def err_undeclared_nsarray : Error<
@@ -2190,7 +2198,7 @@ def err_attribute_invalid_on_stmt : Error<
   "%0 attribute cannot be applied to a statement">;
 def warn_declspec_attribute_ignored : Warning<
   "attribute %0 is ignored, place it after "
-  "\"%select{class|struct|union|interface|enum}1\" to apply attribute to "
+  "\"%select{class|struct|interface|union|enum}1\" to apply attribute to "
   "type declaration">, InGroup<IgnoredAttributes>;
 def warn_attribute_precede_definition : Warning<
   "attribute declaration must precede definition">,
@@ -4690,13 +4698,15 @@ def ext_sizeof_alignof_void_type : Extension<
   "invalid application of '%select{sizeof|alignof|vec_step}0' to a void "
   "type">, InGroup<PointerArith>;
 def err_opencl_sizeof_alignof_type : Error<
-  "invalid application of '%select{sizeof|alignof|vec_step}0' to a void type">;
+  "invalid application of '%select{sizeof|alignof|vec_step|__builtin_omp_required_simd_align}0' to a void type">;
 def err_sizeof_alignof_incomplete_type : Error<
-  "invalid application of '%select{sizeof|alignof|vec_step}0' to an "
+  "invalid application of '%select{sizeof|alignof|vec_step|__builtin_omp_required_simd_align}0' to an "
   "incomplete type %1">;
 def err_sizeof_alignof_function_type : Error<
-  "invalid application of '%select{sizeof|alignof|vec_step}0' to a "
+  "invalid application of '%select{sizeof|alignof|vec_step|__builtin_omp_required_simd_align}0' to a "
   "function type">;
+def err_openmp_default_simd_align_expr : Error<
+  "invalid application of '__builtin_omp_required_simd_align' to an expression, only type is allowed">;
 def err_sizeof_alignof_bitfield : Error<
   "invalid application of '%select{sizeof|alignof}0' to bit-field">;
 def err_alignof_member_of_incomplete_type : Error<
@@ -6791,6 +6801,15 @@ def warn_format_non_standard_conversion_spec: Warning<
 def warn_printf_ignored_flag: Warning<
   "flag '%0' is ignored when flag '%1' is present">,
   InGroup<Format>;
+def warn_printf_empty_objc_flag: Warning<
+  "missing object format flag">,
+  InGroup<Format>;
+def warn_printf_ObjCflags_without_ObjCConversion: Warning<
+  "object format flags cannot be used with '%0' conversion specifier">,
+  InGroup<Format>;
+def warn_printf_invalid_objc_flag: Warning<
+    "'%0' is not a valid object format flag">,
+    InGroup<Format>;
 def warn_scanf_scanlist_incomplete : Warning<
   "no closing ']' for '%%[' in scanf format string">,
   InGroup<Format>;
@@ -7413,6 +7432,8 @@ def err_omp_unexpected_clause_value : Error<
   "expected %0 in OpenMP clause '%1'">;
 def err_omp_expected_var_name : Error<
   "expected variable name">;
+def err_omp_expected_var_name_or_array_item : Error<
+  "expected variable name, array element or array section">;
 def note_omp_task_predetermined_firstprivate_here : Note<
   "predetermined as a firstprivate in a task construct here">;
 def err_omp_clause_ref_type_arg : Error<
@@ -7601,6 +7622,12 @@ def err_omp_single_copyprivate_with_nowait : Error<
   "the 'copyprivate' clause must not be used with the 'nowait' clause">;
 def note_omp_nowait_clause_here : Note<
   "'nowait' clause is here">;
+def err_omp_wrong_cancel_region : Error<
+  "one of 'for', 'parallel', 'sections' or 'taskgroup' is expected">;
+def err_omp_parent_cancel_region_nowait : Error<
+  "parent region for 'omp %select{cancellation point/cancel}0' construct cannot be nowait">;
+def err_omp_parent_cancel_region_ordered : Error<
+  "parent region for 'omp %select{cancellation point/cancel}0' construct cannot be ordered">;
 } // end of OpenMP category
 
 let CategoryName = "Related Result Type Issue" in {
@@ -7677,28 +7704,21 @@ def warn_profile_data_unprofiled : Warning<
 let CategoryName = "Nullability Issue" in {
 
 def warn_mismatched_nullability_attr : Warning<
-  "nullability specifier "
-  "'%select{__|}1%select{nonnull|nullable|null_unspecified}0' "
-  "conflicts with existing specifier "
-  "'%select{__|}3%select{nonnull|nullable|null_unspecified}2'">,
+  "nullability specifier %0 conflicts with existing specifier %1">,
   InGroup<Nullability>;
 
 def warn_nullability_declspec : Warning<
-  "nullability specifier "
-  "'%select{__nonnull|__nullable|__null_unspecified}0' cannot be applied "
+  "nullability specifier %0 cannot be applied "
   "to non-pointer type %1; did you mean to apply the specifier to the "
   "%select{pointer|block pointer|member pointer|function pointer|"
   "member function pointer}2?">,
   InGroup<NullabilityDeclSpec>,
   DefaultError;
 
-def note_nullability_here : Note<
-  "'%select{__nonnull|__nullable|__null_unspecified}0' specified here">;
+def note_nullability_here : Note<"%0 specified here">;
 
 def err_nullability_nonpointer : Error<
-  "nullability specifier "
-  "'%select{__|}1%select{nonnull|nullable|null_unspecified}0' cannot be applied "
-  "to non-pointer type %2">;
+  "nullability specifier %0 cannot be applied to non-pointer type %1">;
 
 def warn_nullability_lost : Warning<
   "implicit conversion from nullable pointer %0 to non-nullable pointer "
@@ -7706,12 +7726,9 @@ def warn_nullability_lost : Warning<
   InGroup<NullableToNonNullConversion>, DefaultIgnore;
 
 def err_nullability_cs_multilevel : Error<
-  "nullability keyword "
-  "'%select{nonnull|nullable|null_unspecified}0' cannot be applied to "
-  "multi-level pointer type %1">;
+  "nullability keyword %0 cannot be applied to multi-level pointer type %1">;
 def note_nullability_type_specifier : Note<
-  "use nullability type specifier "
-  "'%select{__nonnull|__nullable|__null_unspecified}0' to affect the innermost "
+  "use nullability type specifier %0 to affect the innermost "
   "pointer type of %1">;
 
 def warn_null_resettable_setter : Warning<
@@ -7720,7 +7737,7 @@ def warn_null_resettable_setter : Warning<
 
 def warn_nullability_missing : Warning<
   "%select{pointer|block pointer|member pointer}0 is missing a nullability "
-  "type specifier (__nonnull, __nullable, or __null_unspecified)">,
+  "type specifier (_Nonnull, _Nullable, or _Null_unspecified)">,
   InGroup<NullabilityCompleteness>;
 
 }
diff --git a/include/clang/Basic/IdentifierTable.h b/include/clang/Basic/IdentifierTable.h
index bc586e4..33a8b74 100644
--- a/include/clang/Basic/IdentifierTable.h
+++ b/include/clang/Basic/IdentifierTable.h
@@ -406,19 +406,6 @@ public:
   virtual IdentifierIterator *getIdentifiers();
 };
 
-/// \brief An abstract class used to resolve numerical identifier
-/// references (meaningful only to some external source) into
-/// IdentifierInfo pointers.
-class ExternalIdentifierLookup {
-public:
-  virtual ~ExternalIdentifierLookup();
-
-  /// \brief Return the identifier associated with the given ID number.
-  ///
-  /// The ID 0 is associated with the NULL identifier.
-  virtual IdentifierInfo *GetIdentifier(unsigned ID) = 0;
-};
-
 /// \brief Implements an efficient mapping from strings to IdentifierInfo nodes.
 ///
 /// This has no other purpose, but this is an extremely performance-critical
diff --git a/include/clang/Basic/LangOptions.def b/include/clang/Basic/LangOptions.def
index bef122f..b27605b 100644
--- a/include/clang/Basic/LangOptions.def
+++ b/include/clang/Basic/LangOptions.def
@@ -130,6 +130,7 @@ COMPATIBLE_LANGOPT(ModulesStrictDeclUse, 1, 0, "require declaration of module us
 BENIGN_LANGOPT(ModulesErrorRecovery, 1, 1, "automatically import modules as needed when performing error recovery")
 BENIGN_LANGOPT(ImplicitModules, 1, 1, "build modules that are not specified via -fmodule-file")
 COMPATIBLE_LANGOPT(ModulesLocalVisibility, 1, 0, "local submodule visibility")
+COMPATIBLE_LANGOPT(ModulesHideInternalLinkage, 1, 1, "hiding non-visible internal linkage declarations from redeclaration lookup")
 COMPATIBLE_LANGOPT(Optimize          , 1, 0, "__OPTIMIZE__ predefined macro")
 COMPATIBLE_LANGOPT(OptimizeSize      , 1, 0, "__OPTIMIZE_SIZE__ predefined macro")
 LANGOPT(Static            , 1, 0, "__STATIC__ predefined macro (as opposed to __DYNAMIC__)")
diff --git a/include/clang/Basic/LangOptions.h b/include/clang/Basic/LangOptions.h
index 84836eb..3c9d23e 100644
--- a/include/clang/Basic/LangOptions.h
+++ b/include/clang/Basic/LangOptions.h
@@ -102,6 +102,8 @@ public:
 
   /// \brief The names of any features to enable in module 'requires' decls
   /// in addition to the hard-coded list in Module.cpp and the target features.
+  ///
+  /// This list is sorted.
   std::vector<std::string> ModuleFeatures;
 
   /// \brief Options for parsing comments.
diff --git a/include/clang/Basic/Module.h b/include/clang/Basic/Module.h
index 7470610..1bc8925 100644
--- a/include/clang/Basic/Module.h
+++ b/include/clang/Basic/Module.h
@@ -66,6 +66,9 @@ public:
   /// \brief The umbrella header or directory.
   llvm::PointerUnion<const DirectoryEntry *, const FileEntry *> Umbrella;
 
+  /// \brief The module signature.
+  uint64_t Signature;
+
   /// \brief The name of the umbrella entry, as written in the module map.
   std::string UmbrellaAsWritten;
   
diff --git a/include/clang/Basic/OpenMPKinds.def b/include/clang/Basic/OpenMPKinds.def
index b09f012..67a5068 100644
--- a/include/clang/Basic/OpenMPKinds.def
+++ b/include/clang/Basic/OpenMPKinds.def
@@ -69,6 +69,9 @@
 #ifndef OPENMP_SCHEDULE_KIND
 #define OPENMP_SCHEDULE_KIND(Name)
 #endif
+#ifndef OPENMP_DEPEND_KIND
+#define OPENMP_DEPEND_KIND(Name)
+#endif
 
 // OpenMP directives.
 OPENMP_DIRECTIVE(threadprivate)
@@ -90,10 +93,12 @@ OPENMP_DIRECTIVE(ordered)
 OPENMP_DIRECTIVE(atomic)
 OPENMP_DIRECTIVE(target)
 OPENMP_DIRECTIVE(teams)
+OPENMP_DIRECTIVE(cancel)
 OPENMP_DIRECTIVE_EXT(parallel_for, "parallel for")
 OPENMP_DIRECTIVE_EXT(parallel_for_simd, "parallel for simd")
 OPENMP_DIRECTIVE_EXT(parallel_sections, "parallel sections")
 OPENMP_DIRECTIVE_EXT(for_simd, "for simd")
+OPENMP_DIRECTIVE_EXT(cancellation_point, "cancellation point")
 
 // OpenMP clauses.
 OPENMP_CLAUSE(if, OMPIfClause)
@@ -123,6 +128,7 @@ OPENMP_CLAUSE(write, OMPWriteClause)
 OPENMP_CLAUSE(update, OMPUpdateClause)
 OPENMP_CLAUSE(capture, OMPCaptureClause)
 OPENMP_CLAUSE(seq_cst, OMPSeqCstClause)
+OPENMP_CLAUSE(depend, OMPDependClause)
 
 // Clauses allowed for OpenMP directive 'parallel'.
 OPENMP_PARALLEL_CLAUSE(if)
@@ -195,6 +201,11 @@ OPENMP_SCHEDULE_KIND(guided)
 OPENMP_SCHEDULE_KIND(auto)
 OPENMP_SCHEDULE_KIND(runtime)
 
+// Static attributes for 'depend' clause.
+OPENMP_DEPEND_KIND(in)
+OPENMP_DEPEND_KIND(out)
+OPENMP_DEPEND_KIND(inout)
+
 // Clauses allowed for OpenMP directive 'parallel for'.
 OPENMP_PARALLEL_FOR_CLAUSE(if)
 OPENMP_PARALLEL_FOR_CLAUSE(num_threads)
@@ -248,6 +259,7 @@ OPENMP_TASK_CLAUSE(firstprivate)
 OPENMP_TASK_CLAUSE(shared)
 OPENMP_TASK_CLAUSE(untied)
 OPENMP_TASK_CLAUSE(mergeable)
+OPENMP_TASK_CLAUSE(depend)
 
 // Clauses allowed for OpenMP directive 'atomic'.
 OPENMP_ATOMIC_CLAUSE(read)
@@ -268,6 +280,7 @@ OPENMP_TEAMS_CLAUSE(firstprivate)
 OPENMP_TEAMS_CLAUSE(shared)
 OPENMP_TEAMS_CLAUSE(reduction)
 
+#undef OPENMP_DEPEND_KIND
 #undef OPENMP_SCHEDULE_KIND
 #undef OPENMP_PROC_BIND_KIND
 #undef OPENMP_DEFAULT_KIND
diff --git a/include/clang/Basic/OpenMPKinds.h b/include/clang/Basic/OpenMPKinds.h
index e2f1151..83939bb 100644
--- a/include/clang/Basic/OpenMPKinds.h
+++ b/include/clang/Basic/OpenMPKinds.h
@@ -62,6 +62,14 @@ enum OpenMPScheduleClauseKind {
   OMPC_SCHEDULE_unknown
 };
 
+/// \brief OpenMP attributes for 'depend' clause.
+enum OpenMPDependClauseKind {
+#define OPENMP_DEPEND_KIND(Name) \
+  OMPC_DEPEND_##Name,
+#include "clang/Basic/OpenMPKinds.def"
+  OMPC_DEPEND_unknown
+};
+
 OpenMPDirectiveKind getOpenMPDirectiveKind(llvm::StringRef Str);
 const char *getOpenMPDirectiveName(OpenMPDirectiveKind Kind);
 
diff --git a/include/clang/Basic/Specifiers.h b/include/clang/Basic/Specifiers.h
index 5ce56c0..d95a77f 100644
--- a/include/clang/Basic/Specifiers.h
+++ b/include/clang/Basic/Specifiers.h
@@ -257,7 +257,8 @@ namespace clang {
   };
 
   /// Retrieve the spelling of the given nullability kind.
-  llvm::StringRef getNullabilitySpelling(NullabilityKind kind);
+  llvm::StringRef getNullabilitySpelling(NullabilityKind kind,
+                                         bool isContextSensitive = false);
 } // end namespace clang
 
 #endif // LLVM_CLANG_BASIC_SPECIFIERS_H
diff --git a/include/clang/Basic/StmtNodes.td b/include/clang/Basic/StmtNodes.td
index 675e91d..9d7b6fb 100644
--- a/include/clang/Basic/StmtNodes.td
+++ b/include/clang/Basic/StmtNodes.td
@@ -205,3 +205,5 @@ def OMPOrderedDirective : DStmt<OMPExecutableDirective>;
 def OMPAtomicDirective : DStmt<OMPExecutableDirective>;
 def OMPTargetDirective : DStmt<OMPExecutableDirective>;
 def OMPTeamsDirective : DStmt<OMPExecutableDirective>;
+def OMPCancellationPointDirective : DStmt<OMPExecutableDirective>;
+def OMPCancelDirective : DStmt<OMPExecutableDirective>;
diff --git a/include/clang/Basic/TargetInfo.h b/include/clang/Basic/TargetInfo.h
index e415733..a3bb535 100644
--- a/include/clang/Basic/TargetInfo.h
+++ b/include/clang/Basic/TargetInfo.h
@@ -70,6 +70,7 @@ protected:
   unsigned char MinGlobalAlign;
   unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth;
   unsigned short MaxVectorAlign;
+  unsigned short SimdDefaultAlign;
   const char *DescriptionString;
   const char *UserLabelPrefix;
   const char *MCountName;
@@ -393,6 +394,10 @@ public:
 
   /// \brief Return the maximum vector alignment supported for the given target.
   unsigned getMaxVectorAlign() const { return MaxVectorAlign; }
+  /// \brief Return default simd alignment for the given target. Generally, this
+  /// value is type-specific, but this alignment can be used for most of the
+  /// types for the given target.
+  unsigned getSimdDefaultAlign() const { return SimdDefaultAlign; }
 
   /// \brief Return the size of intmax_t and uintmax_t for this target, in bits.
   unsigned getIntMaxTWidth() const {
@@ -611,6 +616,9 @@ public:
     }
   };
 
+  // Validate the contents of the __builtin_cpu_supports(const char*) argument.
+  virtual bool validateCpuSupports(StringRef Name) const { return false; }
+
   // validateOutputConstraint, validateInputConstraint - Checks that
   // a constraint is valid and provides information about it.
   // FIXME: These should return a real error instead of just true/false.
diff --git a/include/clang/Basic/TokenKinds.def b/include/clang/Basic/TokenKinds.def
index 67b9933..ed2aa82 100644
--- a/include/clang/Basic/TokenKinds.def
+++ b/include/clang/Basic/TokenKinds.def
@@ -503,6 +503,9 @@ ALIAS("read_write", __read_write    , KEYOPENCL)
 KEYWORD(__builtin_astype            , KEYOPENCL)
 KEYWORD(vec_step                    , KEYOPENCL|KEYALTIVEC)
 
+// OpenMP Type Traits
+KEYWORD(__builtin_omp_required_simd_align, KEYALL)
+
 // Borland Extensions.
 KEYWORD(__pascal                    , KEYALL)
 
@@ -549,9 +552,9 @@ ALIAS("__volatile"   , volatile   , KEYALL)
 ALIAS("__volatile__" , volatile   , KEYALL)
 
 // Type nullability.
-KEYWORD(__nonnull                 , KEYALL)
-KEYWORD(__nullable                , KEYALL)
-KEYWORD(__null_unspecified        , KEYALL)
+KEYWORD(_Nonnull                 , KEYALL)
+KEYWORD(_Nullable                , KEYALL)
+KEYWORD(_Null_unspecified        , KEYALL)
 
 // Microsoft extensions which should be disabled in strict conformance mode
 KEYWORD(__ptr64                       , KEYMS)
diff --git a/include/clang/Basic/TypeTraits.h b/include/clang/Basic/TypeTraits.h
index ef84d2b..765246b 100644
--- a/include/clang/Basic/TypeTraits.h
+++ b/include/clang/Basic/TypeTraits.h
@@ -92,7 +92,8 @@ namespace clang {
   enum UnaryExprOrTypeTrait {
     UETT_SizeOf,
     UETT_AlignOf,
-    UETT_VecStep
+    UETT_VecStep,
+    UETT_OpenMPRequiredSimdAlign,
   };
 }
 
diff --git a/include/clang/CodeGen/CodeGenABITypes.h b/include/clang/CodeGen/CodeGenABITypes.h
index 97a9dc8..4e76cd4 100644
--- a/include/clang/CodeGen/CodeGenABITypes.h
+++ b/include/clang/CodeGen/CodeGenABITypes.h
@@ -37,9 +37,11 @@ namespace clang {
 class ASTContext;
 class CXXRecordDecl;
 class CodeGenOptions;
+class CoverageSourceInfo;
 class DiagnosticsEngine;
+class HeaderSearchOptions;
 class ObjCMethodDecl;
-class CoverageSourceInfo;
+class PreprocessorOptions;
 
 namespace CodeGen {
 class CGFunctionInfo;
@@ -74,6 +76,8 @@ private:
   /// CodeGenModule and otherwise not used. More specifically, it is
   /// not used in ABI type generation, so none of the options matter.
   CodeGenOptions *CGO;
+  HeaderSearchOptions *HSO;
+  PreprocessorOptions *PPO;
 
   /// The CodeGenModule we use get to the CodeGenTypes object.
   CodeGen::CodeGenModule *CGM;
diff --git a/include/clang/CodeGen/ModuleBuilder.h b/include/clang/CodeGen/ModuleBuilder.h
index 8facc3c..52497d9 100644
--- a/include/clang/CodeGen/ModuleBuilder.h
+++ b/include/clang/CodeGen/ModuleBuilder.h
@@ -26,6 +26,8 @@ namespace clang {
   class DiagnosticsEngine;
   class CoverageSourceInfo;
   class LangOptions;
+  class HeaderSearchOptions;
+  class PreprocessorOptions;
   class CodeGenOptions;
   class Decl;
 
@@ -42,6 +44,8 @@ namespace clang {
   /// the allocated CodeGenerator instance.
   CodeGenerator *CreateLLVMCodeGen(DiagnosticsEngine &Diags,
                                    const std::string &ModuleName,
+                                   const HeaderSearchOptions &HeaderSearchOpts,
+                                   const PreprocessorOptions &PreprocessorOpts,
                                    const CodeGenOptions &CGO,
                                    llvm::LLVMContext& C,
                                    CoverageSourceInfo *CoverageInfo = nullptr);
diff --git a/include/clang/Driver/CC1Options.td b/include/clang/Driver/CC1Options.td
index f2ef71e..60cc6ec 100644
--- a/include/clang/Driver/CC1Options.td
+++ b/include/clang/Driver/CC1Options.td
@@ -369,6 +369,10 @@ def fmodules_local_submodule_visibility :
   Flag<["-"], "fmodules-local-submodule-visibility">,
   HelpText<"Enforce name visibility rules across submodules of the same "
            "top-level module.">;
+def fno_modules_hide_internal_linkage :
+  Flag<["-"], "fno-modules-hide-internal-linkage">,
+  HelpText<"Make all declarations visible to redeclaration lookup, "
+           "even if they have internal linkage.">;
 def fconcepts_ts : Flag<["-"], "fconcepts-ts">,
   HelpText<"Enable C++ Extensions for Concepts.">;
 
diff --git a/include/clang/Driver/CLCompatOptions.td b/include/clang/Driver/CLCompatOptions.td
index 01913be..cf6b76b 100644
--- a/include/clang/Driver/CLCompatOptions.td
+++ b/include/clang/Driver/CLCompatOptions.td
@@ -52,16 +52,18 @@ class CLRemainingArgs<string name> : Option<["/", "-"], name,
 // (We don't put any of these in cl_compile_Group as the options they alias are
 // already in the right group.)
 
-def _SLASH_C : CLFlag<"C">, HelpText<"Don't discard comments when preprocessing">,
-  Alias<C>;
+def _SLASH_C : CLFlag<"C">,
+  HelpText<"Don't discard comments when preprocessing">, Alias<C>;
 def _SLASH_c : CLFlag<"c">, HelpText<"Compile only">, Alias<c>;
 def _SLASH_D : CLJoinedOrSeparate<"D">, HelpText<"Define macro">,
   MetaVarName<"<macro[=value]>">, Alias<D>;
 def _SLASH_E : CLFlag<"E">, HelpText<"Preprocess to stdout">, Alias<E>;
 def _SLASH_fp_except : CLFlag<"fp:except">, HelpText<"">, Alias<ftrapping_math>;
-def _SLASH_fp_except_ : CLFlag<"fp:except-">, HelpText<"">, Alias<fno_trapping_math>;
+def _SLASH_fp_except_ : CLFlag<"fp:except-">,
+  HelpText<"">, Alias<fno_trapping_math>;
 def _SLASH_fp_fast : CLFlag<"fp:fast">, HelpText<"">, Alias<ffast_math>;
-def _SLASH_fp_precise : CLFlag<"fp:precise">, HelpText<"">, Alias<fno_fast_math>;
+def _SLASH_fp_precise : CLFlag<"fp:precise">,
+  HelpText<"">, Alias<fno_fast_math>;
 def _SLASH_fp_strict : CLFlag<"fp:strict">, HelpText<"">, Alias<fno_fast_math>;
 def _SLASH_GA : CLFlag<"GA">, Alias<ftlsmodel_EQ>, AliasArgs<["local-exec"]>,
   HelpText<"Assume thread-local variables are defined in the executable">;
@@ -73,11 +75,13 @@ def _SLASH_Gs : CLJoined<"Gs">, HelpText<"Set stack probe size">,
   Alias<mstack_probe_size>;
 def _SLASH_Gy : CLFlag<"Gy">, HelpText<"Put each function in its own section">,
   Alias<ffunction_sections>;
-def _SLASH_Gy_ : CLFlag<"Gy-">, HelpText<"Don't put each function in its own section">,
+def _SLASH_Gy_ : CLFlag<"Gy-">,
+  HelpText<"Don't put each function in its own section">,
   Alias<fno_function_sections>;
 def _SLASH_Gw : CLFlag<"Gw">, HelpText<"Put each data item in its own section">,
   Alias<fdata_sections>;
-def _SLASH_Gw_ : CLFlag<"Gw-">, HelpText<"Don't put each data item in its own section">,
+def _SLASH_Gw_ : CLFlag<"Gw-">,
+  HelpText<"Don't put each data item in its own section">,
   Alias<fno_data_sections>;
 def _SLASH_help : CLFlag<"help">, Alias<help>,
   HelpText<"Display available options">;
@@ -109,21 +113,15 @@ def _SLASH_Oy_ : CLFlag<"Oy-">, HelpText<"Disable frame pointer omission">,
   Alias<fno_omit_frame_pointer>;
 def _SLASH_QUESTION : CLFlag<"?">, Alias<help>,
   HelpText<"Display available options">;
+def _SLASH_Qvec : CLFlag<"Qvec">,
+  HelpText<"Enable the loop vectorization passes">, Alias<fvectorize>;
+def _SLASH_Qvec_ : CLFlag<"Qvec-">,
+  HelpText<"Disable the loop vectorization passes">, Alias<fno_vectorize>;
 def _SLASH_showIncludes : CLFlag<"showIncludes">,
   HelpText<"Print info about included files to stderr">,
   Alias<show_includes>;
 def _SLASH_U : CLJoinedOrSeparate<"U">, HelpText<"Undefine macro">,
   MetaVarName<"<macro>">, Alias<U>;
-def _SLASH_vmb : CLFlag<"vmb">,
-  HelpText<"Use a best-case representation method for member pointers">;
-def _SLASH_vmg : CLFlag<"vmg">,
-  HelpText<"Use a most-general representation for member pointers">;
-def _SLASH_vms : CLFlag<"vms">,
-  HelpText<"Set the default most-general representation to single inheritance">;
-def _SLASH_vmm : CLFlag<"vmm">,
-  HelpText<"Set the default most-general representation to multiple inheritance">;
-def _SLASH_vmv : CLFlag<"vmv">,
-  HelpText<"Set the default most-general representation to virtual inheritance">;
 def _SLASH_W0 : CLFlag<"W0">, HelpText<"Disable all warnings">, Alias<w>;
 def _SLASH_W1 : CLFlag<"W1">, HelpText<"Enable -Wall">, Alias<Wall>;
 def _SLASH_W2 : CLFlag<"W2">, HelpText<"Enable -Wall">, Alias<Wall>;
@@ -181,7 +179,8 @@ def _SLASH_arch : CLCompileJoined<"arch:">,
   HelpText<"Set architecture for code generation">;
 
 def _SLASH_M_Group : OptionGroup<"</M group>">, Group<cl_compile_Group>;
-def _SLASH_volatile_Group : OptionGroup<"</volatile group>">, Group<cl_compile_Group>;
+def _SLASH_volatile_Group : OptionGroup<"</volatile group>">,
+  Group<cl_compile_Group>;
 
 def _SLASH_EH : CLJoined<"EH">, HelpText<"Exception handling model">;
 def _SLASH_EP : CLFlag<"EP">,
@@ -220,22 +219,30 @@ def _SLASH_o : CLJoinedOrSeparate<"o">,
   HelpText<"Set output file or directory (ends in / or \\)">,
   MetaVarName<"<file or directory>">;
 def _SLASH_P : CLFlag<"P">, HelpText<"Preprocess to file">;
-def _SLASH_Qvec : CLFlag<"Qvec">,
-                  HelpText<"Enable the loop vectorization passes">,
-                  Alias<fvectorize>;
-def _SLASH_Qvec_ : CLFlag<"Qvec-">,
-                   HelpText<"Disable the loop vectorization passes">,
-                   Alias<fno_vectorize>;
 def _SLASH_Tc : CLCompileJoinedOrSeparate<"Tc">,
   HelpText<"Specify a C source file">, MetaVarName<"<filename>">;
 def _SLASH_TC : CLCompileFlag<"TC">, HelpText<"Treat all source files as C">;
 def _SLASH_Tp : CLCompileJoinedOrSeparate<"Tp">,
   HelpText<"Specify a C++ source file">, MetaVarName<"<filename>">;
 def _SLASH_TP : CLCompileFlag<"TP">, HelpText<"Treat all source files as C++">;
-def _SLASH_volatile_iso : Option<["/", "-"], "volatile:iso", KIND_FLAG>, Group<_SLASH_volatile_Group>,
-  Flags<[CLOption, DriverOption]>, HelpText<"Volatile loads and stores have standard semantics">;
-def _SLASH_volatile_ms  : Option<["/", "-"], "volatile:ms", KIND_FLAG>, Group<_SLASH_volatile_Group>,
-  Flags<[CLOption, DriverOption]>, HelpText<"Volatile loads and stores have acquire and release semantics">;
+def _SLASH_volatile_iso : Option<["/", "-"], "volatile:iso", KIND_FLAG>,
+  Group<_SLASH_volatile_Group>, Flags<[CLOption, DriverOption]>,
+  HelpText<"Volatile loads and stores have standard semantics">;
+def _SLASH_vmb : CLFlag<"vmb">,
+  HelpText<"Use a best-case representation method for member pointers">;
+def _SLASH_vmg : CLFlag<"vmg">,
+  HelpText<"Use a most-general representation for member pointers">;
+def _SLASH_vms : CLFlag<"vms">,
+  HelpText<"Set the default most-general representation to single inheritance">;
+def _SLASH_vmm : CLFlag<"vmm">,
+  HelpText<"Set the default most-general representation to "
+           "multiple inheritance">;
+def _SLASH_vmv : CLFlag<"vmv">,
+  HelpText<"Set the default most-general representation to "
+           "virtual inheritance">;
+def _SLASH_volatile_ms  : Option<["/", "-"], "volatile:ms", KIND_FLAG>,
+  Group<_SLASH_volatile_Group>, Flags<[CLOption, DriverOption]>,
+  HelpText<"Volatile loads and stores have acquire and release semantics">;
 
 // Ignored:
 
diff --git a/include/clang/Driver/Compilation.h b/include/clang/Driver/Compilation.h
index 5574e2c..f0c1bed 100644
--- a/include/clang/Driver/Compilation.h
+++ b/include/clang/Driver/Compilation.h
@@ -169,8 +169,9 @@ public:
   ///
   /// \param FailingCommands - For non-zero results, this will be a vector of
   /// failing commands and their associated result code.
-  void ExecuteJob(const Job &J,
-     SmallVectorImpl< std::pair<int, const Command *> > &FailingCommands) const;
+  void ExecuteJobs(
+      const JobList &Jobs,
+      SmallVectorImpl<std::pair<int, const Command *>> &FailingCommands) const;
 
   /// initCompilationForDiagnostics - Remove stale state and suppress output
   /// so compilation can be reexecuted to generate additional diagnostic
diff --git a/include/clang/Driver/Driver.h b/include/clang/Driver/Driver.h
index d7bb1d2..15c194b 100644
--- a/include/clang/Driver/Driver.h
+++ b/include/clang/Driver/Driver.h
@@ -42,7 +42,7 @@ namespace driver {
   class Command;
   class Compilation;
   class InputInfo;
-  class Job;
+  class JobList;
   class JobAction;
   class SanitizerArgs;
   class ToolChain;
@@ -195,7 +195,7 @@ private:
                            llvm::opt::Arg **FinalPhaseArg = nullptr) const;
 
   // Before executing jobs, sets up response files for commands that need them.
-  void setUpResponseFiles(Compilation &C, Job &J);
+  void setUpResponseFiles(Compilation &C, Command &Cmd);
 
   void generatePrefixedToolNames(const char *Tool, const ToolChain &TC,
                                  SmallVectorImpl<std::string> &Names) const;
@@ -262,7 +262,7 @@ public:
 
   /// ParseArgStrings - Parse the given list of strings into an
   /// ArgList.
-  llvm::opt::InputArgList *ParseArgStrings(ArrayRef<const char *> Args);
+  llvm::opt::InputArgList ParseArgStrings(ArrayRef<const char *> Args);
 
   /// BuildInputs - Construct the list of inputs and their types from 
   /// the given arguments.
diff --git a/include/clang/Driver/Job.h b/include/clang/Driver/Job.h
index bc7e3ec..8fc2e8d 100644
--- a/include/clang/Driver/Job.h
+++ b/include/clang/Driver/Job.h
@@ -37,37 +37,9 @@ struct CrashReportInfo {
       : Filename(Filename), VFSPath(VFSPath) {}
 };
 
-class Job {
-public:
-  enum JobClass {
-    CommandClass,
-    FallbackCommandClass,
-    JobListClass
-  };
-
-private:
-  JobClass Kind;
-
-protected:
-  Job(JobClass _Kind) : Kind(_Kind) {}
-public:
-  virtual ~Job();
-
-  JobClass getKind() const { return Kind; }
-
-  /// Print - Print this Job in -### format.
-  ///
-  /// \param OS - The stream to print on.
-  /// \param Terminator - A string to print at the end of the line.
-  /// \param Quote - Should separate arguments be quoted.
-  /// \param CrashInfo - Details for inclusion in a crash report.
-  virtual void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote,
-                     CrashReportInfo *CrashInfo = nullptr) const = 0;
-};
-
 /// Command - An executable path/name and argument vector to
 /// execute.
-class Command : public Job {
+class Command {
   /// Source - The action which caused the creation of this job.
   const Action &Source;
 
@@ -106,11 +78,12 @@ class Command : public Job {
   void writeResponseFile(raw_ostream &OS) const;
 
 public:
-  Command(const Action &_Source, const Tool &_Creator, const char *_Executable,
-          const llvm::opt::ArgStringList &_Arguments);
+  Command(const Action &Source, const Tool &Creator, const char *Executable,
+          const llvm::opt::ArgStringList &Arguments);
+  virtual ~Command() {}
 
-  void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote,
-             CrashReportInfo *CrashInfo = nullptr) const override;
+  virtual void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote,
+                     CrashReportInfo *CrashInfo = nullptr) const;
 
   virtual int Execute(const StringRef **Redirects, std::string *ErrMsg,
                       bool *ExecutionFailed) const;
@@ -133,11 +106,6 @@ public:
   const char *getExecutable() const { return Executable; }
 
   const llvm::opt::ArgStringList &getArguments() const { return Arguments; }
-
-  static bool classof(const Job *J) {
-    return J->getKind() == CommandClass ||
-           J->getKind() == FallbackCommandClass;
-  }
 };
 
 /// Like Command, but with a fallback which is executed in case
@@ -154,18 +122,14 @@ public:
   int Execute(const StringRef **Redirects, std::string *ErrMsg,
               bool *ExecutionFailed) const override;
 
-  static bool classof(const Job *J) {
-    return J->getKind() == FallbackCommandClass;
-  }
-
 private:
   std::unique_ptr<Command> Fallback;
 };
 
 /// JobList - A sequence of jobs to perform.
-class JobList : public Job {
+class JobList {
 public:
-  typedef SmallVector<std::unique_ptr<Job>, 4> list_type;
+  typedef SmallVector<std::unique_ptr<Command>, 4> list_type;
   typedef list_type::size_type size_type;
   typedef llvm::pointee_iterator<list_type::iterator> iterator;
   typedef llvm::pointee_iterator<list_type::const_iterator> const_iterator;
@@ -174,14 +138,11 @@ private:
   list_type Jobs;
 
 public:
-  JobList();
-  ~JobList() override {}
-
   void Print(llvm::raw_ostream &OS, const char *Terminator,
-             bool Quote, CrashReportInfo *CrashInfo = nullptr) const override;
+             bool Quote, CrashReportInfo *CrashInfo = nullptr) const;
 
   /// Add a job to the list (taking ownership).
-  void addJob(std::unique_ptr<Job> J) { Jobs.push_back(std::move(J)); }
+  void addJob(std::unique_ptr<Command> J) { Jobs.push_back(std::move(J)); }
 
   /// Clear the job list.
   void clear();
@@ -193,10 +154,6 @@ public:
   const_iterator begin() const { return Jobs.begin(); }
   iterator end() { return Jobs.end(); }
   const_iterator end() const { return Jobs.end(); }
-
-  static bool classof(const Job *J) {
-    return J->getKind() == JobListClass;
-  }
 };
 
 } // end namespace driver
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td
index aae3776..6a75b7c 100644
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -1153,6 +1153,11 @@ def march_EQ : Joined<["-"], "march=">, Group<m_Group>;
 def masm_EQ : Joined<["-"], "masm=">, Group<m_Group>, Flags<[DriverOption]>;
 def mcmodel_EQ : Joined<["-"], "mcmodel=">, Group<m_Group>;
 def mconstant_cfstrings : Flag<["-"], "mconstant-cfstrings">, Group<clang_ignored_m_Group>;
+def mconsole : Joined<["-"], "mconsole">, Group<m_Group>, Flags<[DriverOption]>;
+def mwindows : Joined<["-"], "mwindows">, Group<m_Group>, Flags<[DriverOption]>;
+def mdll : Joined<["-"], "mdll">, Group<m_Group>, Flags<[DriverOption]>;
+def municode : Joined<["-"], "municode">, Group<m_Group>, Flags<[DriverOption]>;
+def mthreads : Joined<["-"], "mthreads">, Group<m_Group>, Flags<[DriverOption]>;
 def mcpu_EQ : Joined<["-"], "mcpu=">, Group<m_Group>;
 def mdynamic_no_pic : Joined<["-"], "mdynamic-no-pic">, Group<m_Group>;
 def mfix_and_continue : Flag<["-"], "mfix-and-continue">, Group<clang_ignored_m_Group>;
@@ -1170,6 +1175,7 @@ def miphoneos_version_min_EQ : Joined<["-"], "miphoneos-version-min=">, Group<m_
 def mios_version_min_EQ : Joined<["-"], "mios-version-min=">,
   Alias<miphoneos_version_min_EQ>, HelpText<"Set iOS deployment target">;
 def mios_simulator_version_min_EQ : Joined<["-"], "mios-simulator-version-min=">, Alias<miphoneos_version_min_EQ>;
+def miphonesimulator_version_min_EQ : Joined<["-"], "miphonesimulator-version-min=">, Alias<miphoneos_version_min_EQ>;
 def mkernel : Flag<["-"], "mkernel">, Group<m_Group>;
 def mlinker_version_EQ : Joined<["-"], "mlinker-version=">,
   Flags<[DriverOption]>;
@@ -1742,6 +1748,7 @@ def _warn_ : Joined<["--"], "warn-">, Alias<W_Joined>;
 def _write_dependencies : Flag<["--"], "write-dependencies">, Alias<MD>;
 def _write_user_dependencies : Flag<["--"], "write-user-dependencies">, Alias<MMD>;
 def _ : Joined<["--"], "">, Flags<[Unsupported]>;
+
 def mieee_rnd_near : Flag<["-"], "mieee-rnd-near">, Group<m_hexagon_Features_Group>;
 def mv1 : Flag<["-"], "mv1">, Group<m_hexagon_Features_Group>, Alias<march_EQ>,
           AliasArgs<["v1"]>;
diff --git a/include/clang/Driver/SanitizerArgs.h b/include/clang/Driver/SanitizerArgs.h
index ceba912..11bfd41 100644
--- a/include/clang/Driver/SanitizerArgs.h
+++ b/include/clang/Driver/SanitizerArgs.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_DRIVER_SANITIZERARGS_H
 
 #include "clang/Basic/Sanitizers.h"
+#include "clang/Driver/Types.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include <string>
@@ -54,8 +55,8 @@ class SanitizerArgs {
   bool requiresPIE() const;
   bool needsUnwindTables() const;
   bool linkCXXRuntimes() const { return LinkCXXRuntimes; }
-  void addArgs(const llvm::opt::ArgList &Args,
-               llvm::opt::ArgStringList &CmdArgs) const;
+  void addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
+               llvm::opt::ArgStringList &CmdArgs, types::ID InputType) const;
 
  private:
   void clear();
diff --git a/include/clang/Format/Format.h b/include/clang/Format/Format.h
index ad87a05..6f9523c 100644
--- a/include/clang/Format/Format.h
+++ b/include/clang/Format/Format.h
@@ -40,168 +40,50 @@ std::error_code make_error_code(ParseError e);
 /// \brief The \c FormatStyle is used to configure the formatting to follow
 /// specific guidelines.
 struct FormatStyle {
-  /// \brief Supported languages. When stored in a configuration file, specifies
-  /// the language, that the configuration targets. When passed to the
-  /// reformat() function, enables syntax features specific to the language.
-  enum LanguageKind {
-    /// Do not use.
-    LK_None,
-    /// Should be used for C, C++, ObjectiveC, ObjectiveC++.
-    LK_Cpp,
-    /// Should be used for Java.
-    LK_Java,
-    /// Should be used for JavaScript.
-    LK_JavaScript,
-    /// Should be used for Protocol Buffers
-    /// (https://developers.google.com/protocol-buffers/).
-    LK_Proto
-  };
-
-  /// \brief Language, this format style is targeted at.
-  LanguageKind Language;
-
-  /// \brief The column limit.
-  ///
-  /// A column limit of \c 0 means that there is no column limit. In this case,
-  /// clang-format will respect the input's line breaking decisions within
-  /// statements unless they contradict other rules.
-  unsigned ColumnLimit;
-
-  /// \brief The maximum number of consecutive empty lines to keep.
-  unsigned MaxEmptyLinesToKeep;
-
-  /// \brief If true, empty lines at the start of blocks are kept.
-  bool KeepEmptyLinesAtTheStartOfBlocks;
-
-  /// \brief The penalty for each line break introduced inside a comment.
-  unsigned PenaltyBreakComment;
-
-  /// \brief The penalty for each line break introduced inside a string literal.
-  unsigned PenaltyBreakString;
-
-  /// \brief The penalty for each character outside of the column limit.
-  unsigned PenaltyExcessCharacter;
-
-  /// \brief The penalty for breaking before the first \c <<.
-  unsigned PenaltyBreakFirstLessLess;
-
-  /// \brief The penalty for breaking a function call after "call(".
-  unsigned PenaltyBreakBeforeFirstCallParameter;
-
-  /// \brief The & and * alignment style.
-  enum PointerAlignmentStyle {
-    /// Align pointer to the left.
-    PAS_Left,
-    /// Align pointer to the right.
-    PAS_Right,
-    /// Align pointer in the middle.
-    PAS_Middle
-  };
-
-  /// Pointer and reference alignment style.
-  PointerAlignmentStyle PointerAlignment;
-
-  /// \brief If \c true, analyze the formatted file for the most common
-  /// alignment of & and *. \c PointerAlignment is then used only as fallback.
-  bool DerivePointerAlignment;
-
   /// \brief The extra indent or outdent of access modifiers, e.g. \c public:.
   int AccessModifierOffset;
 
-  /// \brief Supported language standards.
-  enum LanguageStandard {
-    /// Use C++03-compatible syntax.
-    LS_Cpp03,
-    /// Use features of C++11 (e.g. \c A<A<int>> instead of
-    /// <tt>A<A<int> ></tt>).
-    LS_Cpp11,
-    /// Automatic detection based on the input.
-    LS_Auto
-  };
-
-  /// \brief Format compatible with this standard, e.g. use
-  /// <tt>A<A<int> ></tt> instead of \c A<A<int>> for LS_Cpp03.
-  LanguageStandard Standard;
-
-  /// \brief Indent case labels one level from the switch statement.
+  /// \brief If \c true, horizontally aligns arguments after an open bracket.
   ///
-  /// When \c false, use the same indentation level as for the switch statement.
-  /// Switch statement body is always indented one level more than case labels.
-  bool IndentCaseLabels;
-
-  /// \brief Indent if a function definition or declaration is wrapped after the
-  /// type.
-  bool IndentWrappedFunctionNames;
-
-  /// \brief Different ways to indent namespace contents.
-  enum NamespaceIndentationKind {
-    /// Don't indent in namespaces.
-    NI_None,
-    /// Indent only in inner namespaces (nested in other namespaces).
-    NI_Inner,
-    /// Indent in all namespaces.
-    NI_All
-  };
-
-  /// \brief The indentation used for namespaces.
-  NamespaceIndentationKind NamespaceIndentation;
+  /// This applies to round brackets (parentheses), angle brackets and square
+  /// brackets. This will result in formattings like
+  /// \code
+  /// someLongFunction(argument1,
+  ///                  argument2);
+  /// \endcode
+  bool AlignAfterOpenBracket;
 
-  /// \brief The number of spaces before trailing line comments
-  /// (\c // - comments).
+  /// \brief If \c true, aligns consecutive assignments.
   ///
-  /// This does not affect trailing block comments (\c /**/ - comments) as those
-  /// commonly have different usage patterns and a number of special cases.
-  unsigned SpacesBeforeTrailingComments;
+  /// This will align the assignment operators of consecutive lines. This
+  /// will result in formattings like
+  /// \code
+  /// int aaaa = 12;
+  /// int b    = 23;
+  /// int ccc  = 23;
+  /// \endcode
+  bool AlignConsecutiveAssignments;
 
-  /// \brief If \c false, a function declaration's or function definition's
-  /// parameters will either all be on the same line or will have one line each.
-  bool BinPackParameters;
+  /// \brief If \c true, aligns escaped newlines as far left as possible.
+  /// Otherwise puts them into the right-most column.
+  bool AlignEscapedNewlinesLeft;
 
-  /// \brief If \c false, a function call's arguments will either be all on the
-  /// same line or will have one line each.
-  bool BinPackArguments;
+  /// \brief If \c true, horizontally align operands of binary and ternary
+  /// expressions.
+  bool AlignOperands;
 
-  /// \brief If \c true, clang-format detects whether function calls and
-  /// definitions are formatted with one parameter per line.
-  ///
-  /// Each call can be bin-packed, one-per-line or inconclusive. If it is
-  /// inconclusive, e.g. completely on one line, but a decision needs to be
-  /// made, clang-format analyzes whether there are other bin-packed cases in
-  /// the input file and act accordingly.
-  ///
-  /// NOTE: This is an experimental flag, that might go away or be renamed. Do
-  /// not use this in config files, etc. Use at your own risk.
-  bool ExperimentalAutoDetectBinPacking;
+  /// \brief If \c true, aligns trailing comments.
+  bool AlignTrailingComments;
 
   /// \brief Allow putting all parameters of a function declaration onto
   /// the next line even if \c BinPackParameters is \c false.
   bool AllowAllParametersOfDeclarationOnNextLine;
 
-  /// \brief Penalty for putting the return type of a function onto its own
-  /// line.
-  unsigned PenaltyReturnTypeOnItsOwnLine;
-
-  /// \brief If the constructor initializers don't fit on a line, put each
-  /// initializer on its own line.
-  bool ConstructorInitializerAllOnOneLineOrOnePerLine;
-
-  /// \brief Always break constructor initializers before commas and align
-  /// the commas with the colon.
-  bool BreakConstructorInitializersBeforeComma;
-
   /// \brief Allows contracting simple braced statements to a single line.
   ///
   /// E.g., this allows <tt>if (a) { return; }</tt> to be put on a single line.
   bool AllowShortBlocksOnASingleLine;
 
-  /// \brief If \c true, <tt>if (a) return;</tt> can be put on a single
-  /// line.
-  bool AllowShortIfStatementsOnASingleLine;
-
-  /// \brief If \c true, <tt>while (true) continue;</tt> can be put on a
-  /// single line.
-  bool AllowShortLoopsOnASingleLine;
-
   /// \brief If \c true, short case labels will be contracted to a single line.
   bool AllowShortCaseLabelsOnASingleLine;
 
@@ -222,69 +104,27 @@ struct FormatStyle {
   /// on a single line.
   ShortFunctionStyle AllowShortFunctionsOnASingleLine;
 
-  /// \brief Add a space after \c @property in Objective-C, i.e. use
-  /// <tt>\@property (readonly)</tt> instead of <tt>\@property(readonly)</tt>.
-  bool ObjCSpaceAfterProperty;
-
-  /// \brief Add a space in front of an Objective-C protocol list, i.e. use
-  /// <tt>Foo <Protocol></tt> instead of \c Foo<Protocol>.
-  bool ObjCSpaceBeforeProtocolList;
-
-  /// \brief If \c true, horizontally aligns arguments after an open bracket.
-  ///
-  /// This applies to round brackets (parentheses), angle brackets and square
-  /// brackets. This will result in formattings like
-  /// \code
-  /// someLongFunction(argument1,
-  ///                  argument2);
-  /// \endcode
-  bool AlignAfterOpenBracket;
-
-  /// \brief If \c true, horizontally align operands of binary and ternary
-  /// expressions.
-  bool AlignOperands;
-
-  /// \brief If \c true, aligns trailing comments.
-  bool AlignTrailingComments;
-
-  /// \brief If \c true, aligns consecutive assignments.
-  ///
-  /// This will align the assignment operators of consecutive lines. This
-  /// will result in formattings like
-  /// \code
-  /// int aaaa = 12;
-  /// int b    = 23;
-  /// int ccc  = 23;
-  /// \endcode
-  bool AlignConsecutiveAssignments;
-
-  /// \brief If \c true, aligns escaped newlines as far left as possible.
-  /// Otherwise puts them into the right-most column.
-  bool AlignEscapedNewlinesLeft;
-
-  /// \brief The number of columns to use for indentation.
-  unsigned IndentWidth;
-
-  /// \brief The number of columns used for tab stops.
-  unsigned TabWidth;
-
-  /// \brief The number of characters to use for indentation of constructor
-  /// initializer lists.
-  unsigned ConstructorInitializerIndentWidth;
+  /// \brief If \c true, <tt>if (a) return;</tt> can be put on a single
+  /// line.
+  bool AllowShortIfStatementsOnASingleLine;
 
-  /// \brief The number of characters to use for indentation of ObjC blocks.
-  unsigned ObjCBlockIndentWidth;
+  /// \brief If \c true, <tt>while (true) continue;</tt> can be put on a
+  /// single line.
+  bool AllowShortLoopsOnASingleLine;
 
-  /// \brief If \c true, always break after function definition return types.
-  ///
-  /// More truthfully called 'break before the identifier following the type
-  /// in a function definition'. PenaltyReturnTypeOnItsOwnLine becomes
-  /// irrelevant.
-  bool AlwaysBreakAfterDefinitionReturnType;
+  /// \brief Different ways to break after the function definition return type.
+  enum DefinitionReturnTypeBreakingStyle {
+    /// Break after return type automatically.
+    /// \c PenaltyReturnTypeOnItsOwnLine is taken into account.
+    DRTBS_None,
+    /// Always break after the return type.
+    DRTBS_All,
+    /// Always break after the return types of top level functions.
+    DRTBS_TopLevel,
+  };
 
-  /// \brief If \c true, always break after the <tt>template<...></tt> of a
-  /// template declaration.
-  bool AlwaysBreakTemplateDeclarations;
+  /// \brief The function definition return type breaking style to use.
+  DefinitionReturnTypeBreakingStyle AlwaysBreakAfterDefinitionReturnType;
 
   /// \brief If \c true, always break before multiline string literals.
   ///
@@ -294,19 +134,17 @@ struct FormatStyle {
   /// \c ContinuationIndentWidth spaces from the start of the line.
   bool AlwaysBreakBeforeMultilineStrings;
 
-  /// \brief Different ways to use tab in formatting.
-  enum UseTabStyle {
-    /// Never use tab.
-    UT_Never,
-    /// Use tabs only for indentation.
-    UT_ForIndentation,
-    /// Use tabs whenever we need to fill whitespace that spans at least from
-    /// one tab stop to the next one.
-    UT_Always
-  };
+  /// \brief If \c true, always break after the <tt>template<...></tt> of a
+  /// template declaration.
+  bool AlwaysBreakTemplateDeclarations;
 
-  /// \brief The way to use tab characters in the resulting file.
-  UseTabStyle UseTab;
+  /// \brief If \c false, a function call's arguments will either be all on the
+  /// same line or will have one line each.
+  bool BinPackArguments;
+
+  /// \brief If \c false, a function declaration's or function definition's
+  /// parameters will either all be on the same line or will have one line each.
+  bool BinPackParameters;
 
   /// \brief The style of breaking before or after binary operators.
   enum BinaryOperatorStyle {
@@ -321,9 +159,6 @@ struct FormatStyle {
   /// \brief The way to wrap binary operators.
   BinaryOperatorStyle BreakBeforeBinaryOperators;
 
-  /// \brief If \c true, ternary operators will be placed after line breaks.
-  bool BreakBeforeTernaryOperators;
-
   /// \brief Different ways to attach braces to their surrounding context.
   enum BraceBreakingStyle {
     /// Always attach braces to surrounding context.
@@ -341,8 +176,37 @@ struct FormatStyle {
     BS_GNU
   };
 
-  /// \brief The brace breaking style to use.
-  BraceBreakingStyle BreakBeforeBraces;
+  /// \brief The brace breaking style to use.
+  BraceBreakingStyle BreakBeforeBraces;
+
+  /// \brief If \c true, ternary operators will be placed after line breaks.
+  bool BreakBeforeTernaryOperators;
+
+  /// \brief Always break constructor initializers before commas and align
+  /// the commas with the colon.
+  bool BreakConstructorInitializersBeforeComma;
+
+  /// \brief The column limit.
+  ///
+  /// A column limit of \c 0 means that there is no column limit. In this case,
+  /// clang-format will respect the input's line breaking decisions within
+  /// statements unless they contradict other rules.
+  unsigned ColumnLimit;
+
+  /// \brief A regular expression that describes comments with special meaning,
+  /// which should not be split into lines or otherwise changed.
+  std::string CommentPragmas;
+
+  /// \brief If the constructor initializers don't fit on a line, put each
+  /// initializer on its own line.
+  bool ConstructorInitializerAllOnOneLineOrOnePerLine;
+
+  /// \brief The number of characters to use for indentation of constructor
+  /// initializer lists.
+  unsigned ConstructorInitializerIndentWidth;
+
+  /// \brief Indent width for line continuations.
+  unsigned ContinuationIndentWidth;
 
   /// \brief If \c true, format braced lists as best suited for C++11 braced
   /// lists.
@@ -359,29 +223,138 @@ struct FormatStyle {
   /// a zero-length name is assumed.
   bool Cpp11BracedListStyle;
 
-  /// \brief If \c true, spaces will be inserted after '(' and before ')'.
-  bool SpacesInParentheses;
+  /// \brief If \c true, analyze the formatted file for the most common
+  /// alignment of & and *. \c PointerAlignment is then used only as fallback.
+  bool DerivePointerAlignment;
 
-  /// \brief If \c true, spaces will be inserted after '<' and before '>' in
-  /// template argument lists
-  bool SpacesInAngles;
+  /// \brief Disables formatting completely.
+  bool DisableFormat;
 
-  /// \brief If \c true, spaces will be inserted after '[' and before ']'.
-  bool SpacesInSquareBrackets;
+  /// \brief If \c true, clang-format detects whether function calls and
+  /// definitions are formatted with one parameter per line.
+  ///
+  /// Each call can be bin-packed, one-per-line or inconclusive. If it is
+  /// inconclusive, e.g. completely on one line, but a decision needs to be
+  /// made, clang-format analyzes whether there are other bin-packed cases in
+  /// the input file and act accordingly.
+  ///
+  /// NOTE: This is an experimental flag, that might go away or be renamed. Do
+  /// not use this in config files, etc. Use at your own risk.
+  bool ExperimentalAutoDetectBinPacking;
 
-  /// \brief If \c true, spaces may be inserted into '()'.
-  bool SpaceInEmptyParentheses;
+  /// \brief A vector of macros that should be interpreted as foreach loops
+  /// instead of as function calls.
+  ///
+  /// These are expected to be macros of the form:
+  /// \code
+  /// FOREACH(<variable-declaration>, ...)
+  ///   <loop-body>
+  /// \endcode
+  ///
+  /// For example: BOOST_FOREACH.
+  std::vector<std::string> ForEachMacros;
 
-  /// \brief If \c true, spaces are inserted inside container literals (e.g.
-  /// ObjC and Javascript array and dict literals).
-  bool SpacesInContainerLiterals;
+  /// \brief Indent case labels one level from the switch statement.
+  ///
+  /// When \c false, use the same indentation level as for the switch statement.
+  /// Switch statement body is always indented one level more than case labels.
+  bool IndentCaseLabels;
 
-  /// \brief If \c true, spaces may be inserted into C style casts.
-  bool SpacesInCStyleCastParentheses;
+  /// \brief The number of columns to use for indentation.
+  unsigned IndentWidth;
+
+  /// \brief Indent if a function definition or declaration is wrapped after the
+  /// type.
+  bool IndentWrappedFunctionNames;
+
+  /// \brief If true, empty lines at the start of blocks are kept.
+  bool KeepEmptyLinesAtTheStartOfBlocks;
+
+  /// \brief Supported languages. When stored in a configuration file, specifies
+  /// the language, that the configuration targets. When passed to the
+  /// reformat() function, enables syntax features specific to the language.
+  enum LanguageKind {
+    /// Do not use.
+    LK_None,
+    /// Should be used for C, C++, ObjectiveC, ObjectiveC++.
+    LK_Cpp,
+    /// Should be used for Java.
+    LK_Java,
+    /// Should be used for JavaScript.
+    LK_JavaScript,
+    /// Should be used for Protocol Buffers
+    /// (https://developers.google.com/protocol-buffers/).
+    LK_Proto
+  };
+
+  /// \brief Language, this format style is targeted at.
+  LanguageKind Language;
+
+  /// \brief The maximum number of consecutive empty lines to keep.
+  unsigned MaxEmptyLinesToKeep;
+
+  /// \brief Different ways to indent namespace contents.
+  enum NamespaceIndentationKind {
+    /// Don't indent in namespaces.
+    NI_None,
+    /// Indent only in inner namespaces (nested in other namespaces).
+    NI_Inner,
+    /// Indent in all namespaces.
+    NI_All
+  };
+
+  /// \brief The indentation used for namespaces.
+  NamespaceIndentationKind NamespaceIndentation;
+
+  /// \brief The number of characters to use for indentation of ObjC blocks.
+  unsigned ObjCBlockIndentWidth;
+
+  /// \brief Add a space after \c @property in Objective-C, i.e. use
+  /// <tt>\@property (readonly)</tt> instead of <tt>\@property(readonly)</tt>.
+  bool ObjCSpaceAfterProperty;
+
+  /// \brief Add a space in front of an Objective-C protocol list, i.e. use
+  /// <tt>Foo <Protocol></tt> instead of \c Foo<Protocol>.
+  bool ObjCSpaceBeforeProtocolList;
+
+  /// \brief The penalty for breaking a function call after "call(".
+  unsigned PenaltyBreakBeforeFirstCallParameter;
+
+  /// \brief The penalty for each line break introduced inside a comment.
+  unsigned PenaltyBreakComment;
+
+  /// \brief The penalty for breaking before the first \c <<.
+  unsigned PenaltyBreakFirstLessLess;
+
+  /// \brief The penalty for each line break introduced inside a string literal.
+  unsigned PenaltyBreakString;
+
+  /// \brief The penalty for each character outside of the column limit.
+  unsigned PenaltyExcessCharacter;
+
+  /// \brief Penalty for putting the return type of a function onto its own
+  /// line.
+  unsigned PenaltyReturnTypeOnItsOwnLine;
+
+  /// \brief The & and * alignment style.
+  enum PointerAlignmentStyle {
+    /// Align pointer to the left.
+    PAS_Left,
+    /// Align pointer to the right.
+    PAS_Right,
+    /// Align pointer in the middle.
+    PAS_Middle
+  };
+
+  /// Pointer and reference alignment style.
+  PointerAlignmentStyle PointerAlignment;
 
   /// \brief If \c true, a space may be inserted after C style casts.
   bool SpaceAfterCStyleCast;
 
+  /// \brief If \c false, spaces will be removed before assignment operators.
+  bool SpaceBeforeAssignmentOperators;
+
   /// \brief Different ways to put a space before opening parentheses.
   enum SpaceBeforeParensOptions {
     /// Never put a space before opening parentheses.
@@ -399,97 +372,139 @@ struct FormatStyle {
   /// \brief Defines in which cases to put a space before opening parentheses.
   SpaceBeforeParensOptions SpaceBeforeParens;
 
-  /// \brief If \c false, spaces will be removed before assignment operators.
-  bool SpaceBeforeAssignmentOperators;
+  /// \brief If \c true, spaces may be inserted into '()'.
+  bool SpaceInEmptyParentheses;
 
-  /// \brief Indent width for line continuations.
-  unsigned ContinuationIndentWidth;
+  /// \brief The number of spaces before trailing line comments
+  /// (\c // - comments).
+  ///
+  /// This does not affect trailing block comments (\c /**/ - comments) as those
+  /// commonly have different usage patterns and a number of special cases.
+  unsigned SpacesBeforeTrailingComments;
 
-  /// \brief A regular expression that describes comments with special meaning,
-  /// which should not be split into lines or otherwise changed.
-  std::string CommentPragmas;
+  /// \brief If \c true, spaces will be inserted after '<' and before '>' in
+  /// template argument lists
+  bool SpacesInAngles;
 
-  /// \brief Disables formatting at all.
-  bool DisableFormat;
+  /// \brief If \c true, spaces are inserted inside container literals (e.g.
+  /// ObjC and Javascript array and dict literals).
+  bool SpacesInContainerLiterals;
 
-  /// \brief A vector of macros that should be interpreted as foreach loops
-  /// instead of as function calls.
-  ///
-  /// These are expected to be macros of the form:
-  /// \code
-  /// FOREACH(<variable-declaration>, ...)
-  ///   <loop-body>
-  /// \endcode
-  ///
-  /// For example: BOOST_FOREACH.
-  std::vector<std::string> ForEachMacros;
+  /// \brief If \c true, spaces may be inserted into C style casts.
+  bool SpacesInCStyleCastParentheses;
+
+  /// \brief If \c true, spaces will be inserted after '(' and before ')'.
+  bool SpacesInParentheses;
+
+  /// \brief If \c true, spaces will be inserted after '[' and before ']'.
+  bool SpacesInSquareBrackets;
+
+  /// \brief Supported language standards.
+  enum LanguageStandard {
+    /// Use C++03-compatible syntax.
+    LS_Cpp03,
+    /// Use features of C++11 (e.g. \c A<A<int>> instead of
+    /// <tt>A<A<int> ></tt>).
+    LS_Cpp11,
+    /// Automatic detection based on the input.
+    LS_Auto
+  };
+
+  /// \brief Format compatible with this standard, e.g. use
+  /// <tt>A<A<int> ></tt> instead of \c A<A<int>> for LS_Cpp03.
+  LanguageStandard Standard;
+
+  /// \brief The number of columns used for tab stops.
+  unsigned TabWidth;
+
+  /// \brief Different ways to use tab in formatting.
+  enum UseTabStyle {
+    /// Never use tab.
+    UT_Never,
+    /// Use tabs only for indentation.
+    UT_ForIndentation,
+    /// Use tabs whenever we need to fill whitespace that spans at least from
+    /// one tab stop to the next one.
+    UT_Always
+  };
+
+  /// \brief The way to use tab characters in the resulting file.
+  UseTabStyle UseTab;
 
   bool operator==(const FormatStyle &R) const {
     return AccessModifierOffset == R.AccessModifierOffset &&
            AlignAfterOpenBracket == R.AlignAfterOpenBracket &&
-           AlignOperands == R.AlignOperands &&
+           AlignConsecutiveAssignments == R.AlignConsecutiveAssignments &&
            AlignEscapedNewlinesLeft == R.AlignEscapedNewlinesLeft &&
+           AlignOperands == R.AlignOperands &&
            AlignTrailingComments == R.AlignTrailingComments &&
            AllowAllParametersOfDeclarationOnNextLine ==
                R.AllowAllParametersOfDeclarationOnNextLine &&
+           AllowShortBlocksOnASingleLine == R.AllowShortBlocksOnASingleLine &&
+           AllowShortCaseLabelsOnASingleLine ==
+               R.AllowShortCaseLabelsOnASingleLine &&
            AllowShortFunctionsOnASingleLine ==
                R.AllowShortFunctionsOnASingleLine &&
-           AllowShortBlocksOnASingleLine == R.AllowShortBlocksOnASingleLine &&
            AllowShortIfStatementsOnASingleLine ==
                R.AllowShortIfStatementsOnASingleLine &&
            AllowShortLoopsOnASingleLine == R.AllowShortLoopsOnASingleLine &&
            AlwaysBreakAfterDefinitionReturnType ==
                R.AlwaysBreakAfterDefinitionReturnType &&
-           AlwaysBreakTemplateDeclarations ==
-               R.AlwaysBreakTemplateDeclarations &&
            AlwaysBreakBeforeMultilineStrings ==
                R.AlwaysBreakBeforeMultilineStrings &&
-           BinPackParameters == R.BinPackParameters &&
+           AlwaysBreakTemplateDeclarations ==
+               R.AlwaysBreakTemplateDeclarations &&
            BinPackArguments == R.BinPackArguments &&
+           BinPackParameters == R.BinPackParameters &&
            BreakBeforeBinaryOperators == R.BreakBeforeBinaryOperators &&
-           BreakBeforeTernaryOperators == R.BreakBeforeTernaryOperators &&
            BreakBeforeBraces == R.BreakBeforeBraces &&
+           BreakBeforeTernaryOperators == R.BreakBeforeTernaryOperators &&
            BreakConstructorInitializersBeforeComma ==
                R.BreakConstructorInitializersBeforeComma &&
            ColumnLimit == R.ColumnLimit &&
+           CommentPragmas == R.CommentPragmas &&
            ConstructorInitializerAllOnOneLineOrOnePerLine ==
                R.ConstructorInitializerAllOnOneLineOrOnePerLine &&
            ConstructorInitializerIndentWidth ==
                R.ConstructorInitializerIndentWidth &&
+           ContinuationIndentWidth == R.ContinuationIndentWidth &&
+           Cpp11BracedListStyle == R.Cpp11BracedListStyle &&
            DerivePointerAlignment == R.DerivePointerAlignment &&
+           DisableFormat == R.DisableFormat &&
            ExperimentalAutoDetectBinPacking ==
                R.ExperimentalAutoDetectBinPacking &&
+           ForEachMacros == R.ForEachMacros &&
            IndentCaseLabels == R.IndentCaseLabels &&
-           IndentWrappedFunctionNames == R.IndentWrappedFunctionNames &&
            IndentWidth == R.IndentWidth && Language == R.Language &&
-           MaxEmptyLinesToKeep == R.MaxEmptyLinesToKeep &&
+           IndentWrappedFunctionNames == R.IndentWrappedFunctionNames &&
            KeepEmptyLinesAtTheStartOfBlocks ==
                R.KeepEmptyLinesAtTheStartOfBlocks &&
+           MaxEmptyLinesToKeep == R.MaxEmptyLinesToKeep &&
            NamespaceIndentation == R.NamespaceIndentation &&
            ObjCBlockIndentWidth == R.ObjCBlockIndentWidth &&
            ObjCSpaceAfterProperty == R.ObjCSpaceAfterProperty &&
            ObjCSpaceBeforeProtocolList == R.ObjCSpaceBeforeProtocolList &&
+           PenaltyBreakBeforeFirstCallParameter ==
+               R.PenaltyBreakBeforeFirstCallParameter &&
            PenaltyBreakComment == R.PenaltyBreakComment &&
            PenaltyBreakFirstLessLess == R.PenaltyBreakFirstLessLess &&
            PenaltyBreakString == R.PenaltyBreakString &&
            PenaltyExcessCharacter == R.PenaltyExcessCharacter &&
            PenaltyReturnTypeOnItsOwnLine == R.PenaltyReturnTypeOnItsOwnLine &&
            PointerAlignment == R.PointerAlignment &&
+           SpaceAfterCStyleCast == R.SpaceAfterCStyleCast &&
+           SpaceBeforeAssignmentOperators == R.SpaceBeforeAssignmentOperators &&
+           SpaceBeforeParens == R.SpaceBeforeParens &&
+           SpaceInEmptyParentheses == R.SpaceInEmptyParentheses &&
            SpacesBeforeTrailingComments == R.SpacesBeforeTrailingComments &&
-           Cpp11BracedListStyle == R.Cpp11BracedListStyle &&
-           Standard == R.Standard && TabWidth == R.TabWidth &&
-           UseTab == R.UseTab && SpacesInParentheses == R.SpacesInParentheses &&
-           SpacesInSquareBrackets == R.SpacesInSquareBrackets &&
            SpacesInAngles == R.SpacesInAngles &&
-           SpaceInEmptyParentheses == R.SpaceInEmptyParentheses &&
            SpacesInContainerLiterals == R.SpacesInContainerLiterals &&
            SpacesInCStyleCastParentheses == R.SpacesInCStyleCastParentheses &&
-           SpaceAfterCStyleCast == R.SpaceAfterCStyleCast &&
-           SpaceBeforeParens == R.SpaceBeforeParens &&
-           SpaceBeforeAssignmentOperators == R.SpaceBeforeAssignmentOperators &&
-           ContinuationIndentWidth == R.ContinuationIndentWidth &&
-           CommentPragmas == R.CommentPragmas &&
-           ForEachMacros == R.ForEachMacros;
+           SpacesInParentheses == R.SpacesInParentheses &&
+           SpacesInSquareBrackets == R.SpacesInSquareBrackets &&
+           Standard == R.Standard &&
+           TabWidth == R.TabWidth &&
+           UseTab == R.UseTab;
   }
 };
 
diff --git a/include/clang/Lex/ExternalPreprocessorSource.h b/include/clang/Lex/ExternalPreprocessorSource.h
index 33e7a2d..adf8e71 100644
--- a/include/clang/Lex/ExternalPreprocessorSource.h
+++ b/include/clang/Lex/ExternalPreprocessorSource.h
@@ -23,7 +23,7 @@ class Module;
 /// information.
 ///
 /// This abstract class allows an external sources (such as the \c ASTReader) 
-/// to provide additional macro definitions.
+/// to provide additional preprocessing information.
 class ExternalPreprocessorSource {
 public:
   virtual ~ExternalPreprocessorSource();
@@ -34,6 +34,11 @@ public:
   /// \brief Update an out-of-date identifier.
   virtual void updateOutOfDateIdentifier(IdentifierInfo &II) = 0;
 
+  /// \brief Return the identifier associated with the given ID number.
+  ///
+  /// The ID 0 is associated with the NULL identifier.
+  virtual IdentifierInfo *GetIdentifier(unsigned ID) = 0;
+
   /// \brief Map a module ID to a module.
   virtual Module *getModule(unsigned ModuleID) = 0;
 };
diff --git a/include/clang/Lex/HeaderSearch.h b/include/clang/Lex/HeaderSearch.h
index 0406c6d..4c13380 100644
--- a/include/clang/Lex/HeaderSearch.h
+++ b/include/clang/Lex/HeaderSearch.h
@@ -27,7 +27,7 @@
 namespace clang {
   
 class DiagnosticsEngine;  
-class ExternalIdentifierLookup;
+class ExternalPreprocessorSource;
 class FileEntry;
 class FileManager;
 class HeaderSearchOptions;
@@ -111,8 +111,9 @@ struct HeaderFileInfo {
 
   /// \brief Retrieve the controlling macro for this header file, if
   /// any.
-  const IdentifierInfo *getControllingMacro(ExternalIdentifierLookup *External);
-  
+  const IdentifierInfo *
+  getControllingMacro(ExternalPreprocessorSource *External);
+
   /// \brief Determine whether this is a non-default header file info, e.g.,
   /// it corresponds to an actual header we've included or tried to include.
   bool isNonDefault() const {
@@ -242,8 +243,9 @@ class HeaderSearch {
   llvm::StringSet<llvm::BumpPtrAllocator> FrameworkNames;
   
   /// \brief Entity used to resolve the identifier IDs of controlling
-  /// macros into IdentifierInfo pointers, as needed.
-  ExternalIdentifierLookup *ExternalLookup;
+  /// macros into IdentifierInfo pointers, and keep the identifire up to date,
+  /// as needed.
+  ExternalPreprocessorSource *ExternalLookup;
 
   /// \brief Entity used to look up stored header file information.
   ExternalHeaderFileInfoSource *ExternalSource;
@@ -345,11 +347,11 @@ public:
     FileInfo.clear();
   }
 
-  void SetExternalLookup(ExternalIdentifierLookup *EIL) {
-    ExternalLookup = EIL;
+  void SetExternalLookup(ExternalPreprocessorSource *EPS) {
+    ExternalLookup = EPS;
   }
 
-  ExternalIdentifierLookup *getExternalLookup() const {
+  ExternalPreprocessorSource *getExternalLookup() const {
     return ExternalLookup;
   }
   
@@ -421,7 +423,7 @@ public:
   /// \return false if \#including the file will have no effect or true
   /// if we should include it.
   bool ShouldEnterIncludeFile(Preprocessor &PP, const FileEntry *File,
-                              bool isImport);
+                              bool isImport, Module *CorrespondingModule);
 
   /// \brief Return whether the specified file is a normal header,
   /// a system header, or a C++ friendly system header.
diff --git a/include/clang/Lex/ModuleMap.h b/include/clang/Lex/ModuleMap.h
index 0bbcfac..2b18a7d 100644
--- a/include/clang/Lex/ModuleMap.h
+++ b/include/clang/Lex/ModuleMap.h
@@ -231,8 +231,7 @@ private:
     return static_cast<bool>(findHeaderInUmbrellaDirs(File, IntermediateDirs));
   }
 
-  Module *inferFrameworkModule(StringRef ModuleName,
-                               const DirectoryEntry *FrameworkDir,
+  Module *inferFrameworkModule(const DirectoryEntry *FrameworkDir,
                                Attributes Attrs, Module *Parent);
 
 public:
@@ -344,10 +343,9 @@ public:
 
   /// \brief Infer the contents of a framework module map from the given
   /// framework directory.
-  Module *inferFrameworkModule(StringRef ModuleName, 
-                               const DirectoryEntry *FrameworkDir,
+  Module *inferFrameworkModule(const DirectoryEntry *FrameworkDir,
                                bool IsSystem, Module *Parent);
-  
+
   /// \brief Retrieve the module map file containing the definition of the given
   /// module.
   ///
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h
index 439a280..bba0c38 100644
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -394,7 +394,9 @@ class Preprocessor : public RefCountedBase<Preprocessor> {
                                    const IdentifierInfo *II) const {
       // FIXME: Find a spare bit on IdentifierInfo and store a
       //        HasModuleMacros flag.
-      if (!II->hasMacroDefinition() || !PP.getLangOpts().Modules ||
+      if (!II->hasMacroDefinition() ||
+          (!PP.getLangOpts().Modules &&
+           !PP.getLangOpts().ModulesLocalVisibility) ||
           !PP.CurSubmoduleState->VisibleModules.getGeneration())
         return nullptr;
 
@@ -454,7 +456,9 @@ class Preprocessor : public RefCountedBase<Preprocessor> {
     MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
                                                SourceManager &SourceMgr) const {
       // FIXME: Incorporate module macros into the result of this.
-      return getLatest()->findDirectiveAtLoc(Loc, SourceMgr);
+      if (auto *Latest = getLatest())
+        return Latest->findDirectiveAtLoc(Loc, SourceMgr);
+      return MacroDirective::DefInfo();
     }
 
     void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h
index 7042a1e..97a0aa4 100644
--- a/include/clang/Parse/Parser.h
+++ b/include/clang/Parse/Parser.h
@@ -303,7 +303,7 @@ public:
     return true;
   }
 
-  /// Retrieve the underscored keyword (__nonnull, __nullable) that corresponds
+  /// Retrieve the underscored keyword (_Nonnull, _Nullable) that corresponds
   /// to the given nullability kind.
   IdentifierInfo *getNullabilityKeyword(NullabilityKind nullability) {
     return Actions.getNullabilityKeyword(nullability);
@@ -1182,7 +1182,7 @@ private:
                                 ParsingDeclarator &D,
                                 const ParsedTemplateInfo &TemplateInfo,
                                 const VirtSpecifiers& VS,
-                                ExprResult& Init);
+                                SourceLocation PureSpecLoc);
   void ParseCXXNonStaticMemberInitializer(Decl *VarD);
   void ParseLexedAttributes(ParsingClass &Class);
   void ParseLexedAttributeList(LateParsedAttrList &LAs, Decl *D,
@@ -1331,6 +1331,7 @@ public:
 
   ExprResult ParseExpression(TypeCastState isTypeCast = NotTypeCast);
   ExprResult ParseConstantExpression(TypeCastState isTypeCast = NotTypeCast);
+  ExprResult ParseConstraintExpression();
   // Expr that doesn't include commas.
   ExprResult ParseAssignmentExpression(TypeCastState isTypeCast = NotTypeCast);
 
@@ -1704,6 +1705,7 @@ private:
     DSC_top_level, // top-level/namespace declaration context
     DSC_template_type_arg, // template type argument context
     DSC_objc_method_result, // ObjC method result context, enables 'instancetype'
+    DSC_condition // condition declaration context
   };
 
   /// Is this a context in which we are parsing just a type-specifier (or
@@ -1714,6 +1716,7 @@ private:
     case DSC_class:
     case DSC_top_level:
     case DSC_objc_method_result:
+    case DSC_condition:
       return false;
 
     case DSC_template_type_arg:
diff --git a/include/clang/Sema/DeclSpec.h b/include/clang/Sema/DeclSpec.h
index 2ec3286..d375ec3 100644
--- a/include/clang/Sema/DeclSpec.h
+++ b/include/clang/Sema/DeclSpec.h
@@ -358,6 +358,9 @@ private:
   // constexpr-specifier
   unsigned Constexpr_specified : 1;
 
+  // concept-specifier
+  unsigned Concept_specified : 1;
+
   union {
     UnionParsedType TypeRep;
     Decl *DeclRep;
@@ -393,7 +396,7 @@ private:
   SourceLocation TQ_constLoc, TQ_restrictLoc, TQ_volatileLoc, TQ_atomicLoc;
   SourceLocation FS_inlineLoc, FS_virtualLoc, FS_explicitLoc, FS_noreturnLoc;
   SourceLocation FS_forceinlineLoc;
-  SourceLocation FriendLoc, ModulePrivateLoc, ConstexprLoc;
+  SourceLocation FriendLoc, ModulePrivateLoc, ConstexprLoc, ConceptLoc;
 
   WrittenBuiltinSpecs writtenBS;
   void SaveWrittenBuiltinSpecs();
@@ -437,6 +440,7 @@ public:
       FS_noreturn_specified(false),
       Friend_specified(false),
       Constexpr_specified(false),
+      Concept_specified(false),
       Attrs(attrFactory),
       ProtocolQualifiers(nullptr),
       NumProtocolQualifiers(0),
@@ -688,6 +692,8 @@ public:
                             unsigned &DiagID);
   bool SetConstexprSpec(SourceLocation Loc, const char *&PrevSpec,
                         unsigned &DiagID);
+  bool SetConceptSpec(SourceLocation Loc, const char *&PrevSpec,
+                      unsigned &DiagID);
 
   bool isFriendSpecified() const { return Friend_specified; }
   SourceLocation getFriendSpecLoc() const { return FriendLoc; }
@@ -698,11 +704,19 @@ public:
   bool isConstexprSpecified() const { return Constexpr_specified; }
   SourceLocation getConstexprSpecLoc() const { return ConstexprLoc; }
 
+  bool isConceptSpecified() const { return Concept_specified; }
+  SourceLocation getConceptSpecLoc() const { return ConceptLoc; }
+
   void ClearConstexprSpec() {
     Constexpr_specified = false;
     ConstexprLoc = SourceLocation();
   }
 
+  void ClearConceptSpec() {
+    Concept_specified = false;
+    ConceptLoc = SourceLocation();
+  }
+
   AttributePool &getAttributePool() const {
     return Attrs.getPool();
   }
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index cb75b96..72a0e0b 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -277,7 +277,9 @@ class Sema {
     // it will keep having external linkage. If it has internal linkage, we
     // will not link it. Since it has no previous decls, it will remain
     // with internal linkage.
-    return isVisible(Old) || New->isExternallyVisible();
+    if (getLangOpts().ModulesHideInternalLinkage)
+      return isVisible(Old) || New->isExternallyVisible();
+    return true;
   }
 
 public:
@@ -700,9 +702,15 @@ public:
   /// \brief The declaration of the Objective-C NSNumber class.
   ObjCInterfaceDecl *NSNumberDecl;
 
+  /// \brief The declaration of the Objective-C NSValue class.
+  ObjCInterfaceDecl *NSValueDecl;
+
   /// \brief Pointer to NSNumber type (NSNumber *).
   QualType NSNumberPointer;
 
+  /// \brief Pointer to NSValue type (NSValue *).
+  QualType NSValuePointer;
+
   /// \brief The Objective-C NSNumber methods used to create NSNumber literals.
   ObjCMethodDecl *NSNumberLiteralMethods[NSAPI::NumNSNumberLiteralMethods];
 
@@ -715,6 +723,9 @@ public:
   /// \brief The declaration of the stringWithUTF8String: method.
   ObjCMethodDecl *StringWithUTF8StringMethod;
 
+  /// \brief The declaration of the valueWithBytes:objCType: method.
+  ObjCMethodDecl *ValueWithBytesObjCTypeMethod;
+
   /// \brief The declaration of the Objective-C NSArray class.
   ObjCInterfaceDecl *NSArrayDecl;
 
@@ -1679,6 +1690,7 @@ public:
                             bool TypeMayContainAuto);
   void ActOnUninitializedDecl(Decl *dcl, bool TypeMayContainAuto);
   void ActOnInitializerError(Decl *Dcl);
+  void ActOnPureSpecifier(Decl *D, SourceLocation PureSpecLoc);
   void ActOnCXXForRangeDecl(Decl *D);
   StmtResult ActOnCXXForRangeIdentifier(Scope *S, SourceLocation IdentLoc,
                                         IdentifierInfo *Ident,
@@ -2976,7 +2988,7 @@ public:
                                        bool SynthesizeProperties);
 
   /// Diagnose any null-resettable synthesized setters.
-  void diagnoseNullResettableSynthesizedSetters(ObjCImplDecl *impDecl);
+  void diagnoseNullResettableSynthesizedSetters(const ObjCImplDecl *impDecl);
 
   /// DefaultSynthesizeProperties - This routine default synthesizes all
   /// properties which must be synthesized in the class's \@implementation.
@@ -5025,9 +5037,9 @@ public:
 
   /// BuildObjCBoxedExpr - builds an ObjCBoxedExpr AST node for the
   /// '@' prefixed parenthesized expression. The type of the expression will
-  /// either be "NSNumber *" or "NSString *" depending on the type of
-  /// ValueType, which is allowed to be a built-in numeric type or
-  /// "char *" or "const char *".
+  /// either be "NSNumber *", "NSString *" or "NSValue *" depending on the type
+  /// of ValueType, which is allowed to be a built-in numeric type, "char *",
+  /// "const char *" or C structure with attribute 'objc_boxable'.
   ExprResult BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr);
 
   ExprResult BuildObjCSubscriptExpression(SourceLocation RB, Expr *BaseExpr,
@@ -7603,6 +7615,12 @@ private:
   bool IsOpenMPCapturedVar(VarDecl *VD);
 
 public:
+  /// \brief Check if the specified variable is used in one of the private
+  /// clauses in OpenMP constructs.
+  /// \param Level Relative level of nested OpenMP construct for that the check
+  /// is performed.
+  bool isOpenMPPrivateVar(VarDecl *VD, unsigned Level);
+
   ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc,
                                                     Expr *Op);
   /// \brief Called on start of new data sharing attribute block.
@@ -7610,9 +7628,9 @@ public:
                            const DeclarationNameInfo &DirName, Scope *CurScope,
                            SourceLocation Loc);
   /// \brief Start analysis of clauses.
-  void StartOpenMPClauses();
+  void StartOpenMPClause(OpenMPClauseKind K);
   /// \brief End analysis of clauses.
-  void EndOpenMPClauses();
+  void EndOpenMPClause();
   /// \brief Called on end of data sharing attribute block.
   void EndOpenMPDSABlock(Stmt *CurDirective);
 
@@ -7646,12 +7664,10 @@ public:
   ///
   /// \returns Statement for finished OpenMP region.
   StmtResult ActOnOpenMPRegionEnd(StmtResult S, ArrayRef<OMPClause *> Clauses);
-  StmtResult ActOnOpenMPExecutableDirective(OpenMPDirectiveKind Kind,
-                                            const DeclarationNameInfo &DirName,
-                                            ArrayRef<OMPClause *> Clauses,
-                                            Stmt *AStmt,
-                                            SourceLocation StartLoc,
-                                            SourceLocation EndLoc);
+  StmtResult ActOnOpenMPExecutableDirective(
+      OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName,
+      OpenMPDirectiveKind CancelRegion, ArrayRef<OMPClause *> Clauses,
+      Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc);
   /// \brief Called on well-formed '\#pragma omp parallel' after parsing
   /// of the  associated statement.
   StmtResult ActOnOpenMPParallelDirective(ArrayRef<OMPClause *> Clauses,
@@ -7757,6 +7773,15 @@ public:
   StmtResult ActOnOpenMPTeamsDirective(ArrayRef<OMPClause *> Clauses,
                                        Stmt *AStmt, SourceLocation StartLoc,
                                        SourceLocation EndLoc);
+  /// \brief Called on well-formed '\#pragma omp cancellation point'.
+  StmtResult
+  ActOnOpenMPCancellationPointDirective(SourceLocation StartLoc,
+                                        SourceLocation EndLoc,
+                                        OpenMPDirectiveKind CancelRegion);
+  /// \brief Called on well-formed '\#pragma omp cancel'.
+  StmtResult ActOnOpenMPCancelDirective(SourceLocation StartLoc,
+                                        SourceLocation EndLoc,
+                                        OpenMPDirectiveKind CancelRegion);
 
   OMPClause *ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind,
                                          Expr *Expr,
@@ -7851,13 +7876,13 @@ public:
   OMPClause *ActOnOpenMPSeqCstClause(SourceLocation StartLoc,
                                      SourceLocation EndLoc);
 
-  OMPClause *
-  ActOnOpenMPVarListClause(OpenMPClauseKind Kind, ArrayRef<Expr *> Vars,
-                           Expr *TailExpr, SourceLocation StartLoc,
-                           SourceLocation LParenLoc, SourceLocation ColonLoc,
-                           SourceLocation EndLoc,
-                           CXXScopeSpec &ReductionIdScopeSpec,
-                           const DeclarationNameInfo &ReductionId);
+  OMPClause *ActOnOpenMPVarListClause(
+      OpenMPClauseKind Kind, ArrayRef<Expr *> Vars, Expr *TailExpr,
+      SourceLocation StartLoc, SourceLocation LParenLoc,
+      SourceLocation ColonLoc, SourceLocation EndLoc,
+      CXXScopeSpec &ReductionIdScopeSpec,
+      const DeclarationNameInfo &ReductionId, OpenMPDependClauseKind DepKind,
+      SourceLocation DepLoc);
   /// \brief Called on well-formed 'private' clause.
   OMPClause *ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
                                       SourceLocation StartLoc,
@@ -7914,6 +7939,12 @@ public:
                                     SourceLocation StartLoc,
                                     SourceLocation LParenLoc,
                                     SourceLocation EndLoc);
+  /// \brief Called on well-formed 'depend' clause.
+  OMPClause *
+  ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind, SourceLocation DepLoc,
+                          SourceLocation ColonLoc, ArrayRef<Expr *> VarList,
+                          SourceLocation StartLoc, SourceLocation LParenLoc,
+                          SourceLocation EndLoc);
 
   /// \brief The kind of conversion being performed.
   enum CheckedConversionKind {
@@ -8711,6 +8742,7 @@ private:
   bool SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
                                 int ArgNum, unsigned ExpectedFieldNum,
                                 bool AllowName);
+  bool SemaBuiltinCpuSupports(CallExpr *TheCall);
 public:
   enum FormatStringType {
     FST_Scanf,
@@ -8840,9 +8872,9 @@ private:
   mutable IdentifierInfo *Ident___float128;
 
   /// Nullability type specifiers.
-  IdentifierInfo *Ident___nonnull = nullptr;
-  IdentifierInfo *Ident___nullable = nullptr;
-  IdentifierInfo *Ident___null_unspecified = nullptr;
+  IdentifierInfo *Ident__Nonnull = nullptr;
+  IdentifierInfo *Ident__Nullable = nullptr;
+  IdentifierInfo *Ident__Null_unspecified = nullptr;
 
   IdentifierInfo *Ident_NSError = nullptr;
 
diff --git a/include/clang/Serialization/ASTBitCodes.h b/include/clang/Serialization/ASTBitCodes.h
index 83185a8..ee8e3f4 100644
--- a/include/clang/Serialization/ASTBitCodes.h
+++ b/include/clang/Serialization/ASTBitCodes.h
@@ -1397,6 +1397,8 @@ namespace clang {
       STMT_OMP_TARGET_DIRECTIVE,
       STMT_OMP_TEAMS_DIRECTIVE,
       STMT_OMP_TASKGROUP_DIRECTIVE,
+      STMT_OMP_CANCELLATION_POINT_DIRECTIVE,
+      STMT_OMP_CANCEL_DIRECTIVE,
 
       // ARC
       EXPR_OBJC_BRIDGED_CAST,     // ObjCBridgedCastExpr
diff --git a/include/clang/Serialization/ASTReader.h b/include/clang/Serialization/ASTReader.h
index 429f00f..ef5b107 100644
--- a/include/clang/Serialization/ASTReader.h
+++ b/include/clang/Serialization/ASTReader.h
@@ -304,7 +304,6 @@ class ASTReader
     public ExternalHeaderFileInfoSource,
     public ExternalSemaSource,
     public IdentifierInfoLookup,
-    public ExternalIdentifierLookup,
     public ExternalSLocEntrySource
 {
 public:
@@ -1846,6 +1845,11 @@ public:
   /// Note: overrides method in ExternalASTSource
   Module *getModule(unsigned ID) override;
 
+  /// \brief Return a descriptor for the corresponding module.
+  llvm::Optional<ASTSourceDescriptor> getSourceDescriptor(unsigned ID) override;
+  /// \brief Return a descriptor for the module.
+  ASTSourceDescriptor getSourceDescriptor(const Module &M) override;
+
   /// \brief Retrieve a selector from the given module with its local ID
   /// number.
   Selector getLocalSelector(ModuleFile &M, unsigned LocalID);
diff --git a/include/clang/Serialization/ASTWriter.h b/include/clang/Serialization/ASTWriter.h
index decd07a..c966d3e 100644
--- a/include/clang/Serialization/ASTWriter.h
+++ b/include/clang/Serialization/ASTWriter.h
@@ -42,6 +42,7 @@ namespace llvm {
 namespace clang {
 
 class ASTContext;
+class Attr;
 class NestedNameSpecifier;
 class CXXBaseSpecifier;
 class CXXCtorInitializer;
@@ -60,6 +61,7 @@ class Module;
 class PreprocessedEntity;
 class PreprocessingRecord;
 class Preprocessor;
+class RecordDecl;
 class Sema;
 class SourceManager;
 struct StoredDeclsList;
@@ -302,6 +304,7 @@ private:
       unsigned Loc;
       unsigned Val;
       Module *Mod;
+      const Attr *Attribute;
     };
 
   public:
@@ -315,6 +318,8 @@ private:
         : Kind(Kind), Val(Val) {}
     DeclUpdate(unsigned Kind, Module *M)
           : Kind(Kind), Mod(M) {}
+    DeclUpdate(unsigned Kind, const Attr *Attribute)
+          : Kind(Kind), Attribute(Attribute) {}
 
     unsigned getKind() const { return Kind; }
     const Decl *getDecl() const { return Dcl; }
@@ -324,6 +329,7 @@ private:
     }
     unsigned getNumber() const { return Val; }
     Module *getModule() const { return Mod; }
+    const Attr *getAttr() const { return Attribute; }
   };
 
   typedef SmallVector<DeclUpdate, 1> UpdateRecord;
@@ -860,6 +866,8 @@ public:
   void DeclarationMarkedUsed(const Decl *D) override;
   void DeclarationMarkedOpenMPThreadPrivate(const Decl *D) override;
   void RedefinedHiddenDefinition(const NamedDecl *D, Module *M) override;
+  void AddedAttributeToRecord(const Attr *Attr,
+                              const RecordDecl *Record) override;
 };
 
 /// \brief AST and semantic-analysis consumer that generates a
diff --git a/include/clang/Serialization/Module.h b/include/clang/Serialization/Module.h
index 5571d91..c98ced4 100644
--- a/include/clang/Serialization/Module.h
+++ b/include/clang/Serialization/Module.h
@@ -20,6 +20,7 @@
 #include "clang/Serialization/ContinuousRangeMap.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Support/Endian.h"
 #include <memory>
 #include <string>
 
@@ -206,7 +207,7 @@ public:
   llvm::BitstreamCursor InputFilesCursor;
 
   /// \brief Offsets for all of the input file entries in the AST file.
-  const uint64_t *InputFileOffsets;
+  const llvm::support::unaligned_uint64_t *InputFileOffsets;
 
   /// \brief The input files that have been loaded from this AST file.
   std::vector<InputFile> InputFilesLoaded;
diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
index 308ac83..57c73fd 100644
--- a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
+++ b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
@@ -464,7 +464,7 @@ public:
   /// The reports are usually generated by the checkers. Further, they are
   /// folded based on the profile value, which is done to coalesce similar
   /// reports.
-  void emitReport(BugReport *R);
+  void emitReport(std::unique_ptr<BugReport> R);
 
   void EmitBasicReport(const Decl *DeclWithIssue, const CheckerBase *Checker,
                        StringRef BugName, StringRef BugCategory,
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
index 68274f5..a4ff133 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
@@ -232,9 +232,9 @@ public:
   }
 
   /// \brief Emit the diagnostics report.
-  void emitReport(BugReport *R) {
+  void emitReport(std::unique_ptr<BugReport> R) {
     Changed = true;
-    Eng.getBugReporter().emitReport(R);
+    Eng.getBugReporter().emitReport(std::move(R));
   }
 
   /// \brief Get the declaration of the called function (path-sensitive).
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h
index 6a42df2..e7ec1f4 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h
@@ -28,10 +28,9 @@ template <class T> bool containsStmt(const Stmt *S) {
   if (isa<T>(S))
       return true;
 
-  for (Stmt::const_child_range I = S->children(); I; ++I)
-    if (const Stmt *child = *I)
-      if (containsStmt<T>(child))
-        return true;
+  for (const Stmt *Child : S->children())
+    if (Child && containsStmt<T>(Child))
+      return true;
 
   return false;
 }
diff --git a/lib/ARCMigrate/ObjCMT.cpp b/lib/ARCMigrate/ObjCMT.cpp
index 8c2e0f4..b61a421 100644
--- a/lib/ARCMigrate/ObjCMT.cpp
+++ b/lib/ARCMigrate/ObjCMT.cpp
@@ -355,8 +355,8 @@ public:
   bool TraverseObjCMessageExpr(ObjCMessageExpr *E) {
     // Do depth first; we want to rewrite the subexpressions first so that if
     // we have to move expressions we will move them already rewritten.
-    for (Stmt::child_range range = E->children(); range; ++range)
-      if (!TraverseStmt(*range))
+    for (Stmt *SubStmt : E->children())
+      if (!TraverseStmt(SubStmt))
         return false;
 
     return WalkUpFromObjCMessageExpr(E);
diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp
index 049eebd..5a91f07 100644
--- a/lib/AST/ASTContext.cpp
+++ b/lib/AST/ASTContext.cpp
@@ -1786,6 +1786,17 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
   return TypeInfo(Width, Align, AlignIsRequired);
 }
 
+unsigned ASTContext::getOpenMPDefaultSimdAlign(QualType T) const {
+  unsigned SimdAlign = getTargetInfo().getSimdDefaultAlign();
+  // Target ppc64 with QPX: simd default alignment for pointer to double is 32.
+  if ((getTargetInfo().getTriple().getArch() == llvm::Triple::ppc64 ||
+       getTargetInfo().getTriple().getArch() == llvm::Triple::ppc64le) &&
+      getTargetInfo().getABI() == "elfv1-qpx" &&
+      T->isSpecificBuiltinType(BuiltinType::Double))
+    SimdAlign = 256;
+  return SimdAlign;
+}
+
 /// toCharUnitsFromBits - Convert a size in bits to a size in characters.
 CharUnits ASTContext::toCharUnitsFromBits(int64_t BitSize) const {
   return CharUnits::fromQuantity(BitSize / getCharWidth());
@@ -1866,6 +1877,16 @@ CharUnits ASTContext::getAlignOfGlobalVarInChars(QualType T) const {
   return toCharUnitsFromBits(getAlignOfGlobalVar(T));
 }
 
+CharUnits ASTContext::getOffsetOfBaseWithVBPtr(const CXXRecordDecl *RD) const {
+  CharUnits Offset = CharUnits::Zero();
+  const ASTRecordLayout *Layout = &getASTRecordLayout(RD);
+  while (const CXXRecordDecl *Base = Layout->getBaseSharingVBPtr()) {
+    Offset += Layout->getBaseClassOffset(Base);
+    Layout = &getASTRecordLayout(Base);
+  }
+  return Offset;
+}
+
 /// DeepCollectObjCIvars -
 /// This routine first collects all declared, but not synthesized, ivars in
 /// super class and then collects all ivars, including those synthesized for
diff --git a/lib/AST/ASTDumper.cpp b/lib/AST/ASTDumper.cpp
index 60cbb06..90da416 100644
--- a/lib/AST/ASTDumper.cpp
+++ b/lib/AST/ASTDumper.cpp
@@ -981,6 +981,10 @@ void ASTDumper::dumpDecl(const Decl *D) {
       OS << " in " << M->getFullModuleName();
     else if (Module *M = D->getLocalOwningModule())
       OS << " in (local) " << M->getFullModuleName();
+    if (auto *ND = dyn_cast<NamedDecl>(D))
+      for (Module *M : D->getASTContext().getModulesWithMergedDefinition(
+               const_cast<NamedDecl *>(ND)))
+        dumpChild([=] { OS << "also in " << M->getFullModuleName(); });
     if (const NamedDecl *ND = dyn_cast<NamedDecl>(D))
       if (ND->isHidden())
         OS << " hidden";
@@ -1595,8 +1599,8 @@ void ASTDumper::dumpStmt(const Stmt *S) {
 
     ConstStmtVisitor<ASTDumper>::Visit(S);
 
-    for (Stmt::const_child_range CI = S->children(); CI; ++CI)
-      dumpStmt(*CI);
+    for (const Stmt *SubStmt : S->children())
+      dumpStmt(SubStmt);
   });
 }
 
@@ -1825,6 +1829,9 @@ void ASTDumper::VisitUnaryExprOrTypeTraitExpr(
   case UETT_VecStep:
     OS << " vec_step";
     break;
+  case UETT_OpenMPRequiredSimdAlign:
+    OS << " __builtin_omp_required_simd_align";
+    break;
   }
   if (Node->isArgumentType())
     dumpType(Node->getArgumentType());
diff --git a/lib/AST/DeclBase.cpp b/lib/AST/DeclBase.cpp
index 70bd16f..d20451d 100644
--- a/lib/AST/DeclBase.cpp
+++ b/lib/AST/DeclBase.cpp
@@ -236,6 +236,7 @@ void Decl::setLexicalDeclContext(DeclContext *DC) {
   } else {
     getMultipleDC()->LexicalDC = DC;
   }
+  Hidden = cast<Decl>(DC)->Hidden;
 }
 
 void Decl::setDeclContextsImpl(DeclContext *SemaDC, DeclContext *LexicalDC,
diff --git a/lib/AST/DeclPrinter.cpp b/lib/AST/DeclPrinter.cpp
index c3ce476..d33093b 100644
--- a/lib/AST/DeclPrinter.cpp
+++ b/lib/AST/DeclPrinter.cpp
@@ -954,9 +954,8 @@ void DeclPrinter::PrintObjCMethodType(ASTContext &Ctx,
   if (Quals & Decl::ObjCDeclQualifier::OBJC_TQ_Oneway)
     Out << "oneway ";
   if (Quals & Decl::ObjCDeclQualifier::OBJC_TQ_CSNullability) {
-    if (auto nullability = AttributedType::stripOuterNullability(T)) {
-      Out << getNullabilitySpelling(*nullability).substr(2) << ' ';
-    }
+    if (auto nullability = AttributedType::stripOuterNullability(T))
+      Out << getNullabilitySpelling(*nullability, true) << ' ';
   }
   
   Out << Ctx.getUnqualifiedObjCPointerType(T).getAsString(Policy);
@@ -1207,7 +1206,7 @@ void DeclPrinter::VisitObjCPropertyDecl(ObjCPropertyDecl *PDecl) {
           Out << (first ? ' ' : ',') << "null_resettable";
         } else {
           Out << (first ? ' ' : ',')
-              << getNullabilitySpelling(*nullability).substr(2);
+              << getNullabilitySpelling(*nullability, true);
         }
         first = false;
       }
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
index 36f4139..87f9ffb 100644
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -3154,10 +3154,10 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   }
 
   // Recurse to children.
-  for (const_child_range SubStmts = children(); SubStmts; ++SubStmts)
-    if (const Stmt *S = *SubStmts)
-      if (cast<Expr>(S)->HasSideEffects(Ctx, IncludePossibleEffects))
-        return true;
+  for (const Stmt *SubStmt : children())
+    if (SubStmt &&
+        cast<Expr>(SubStmt)->HasSideEffects(Ctx, IncludePossibleEffects))
+      return true;
 
   return false;
 }
diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp
index 8e472f1..ed749cc 100644
--- a/lib/AST/ExprConstant.cpp
+++ b/lib/AST/ExprConstant.cpp
@@ -7251,6 +7251,13 @@ bool IntExprEvaluator::VisitUnaryExprOrTypeTraitExpr(
       return false;
     return Success(Sizeof, E);
   }
+  case UETT_OpenMPRequiredSimdAlign:
+    assert(E->isArgumentType());
+    return Success(
+        Info.Ctx.toCharUnitsFromBits(
+                    Info.Ctx.getOpenMPDefaultSimdAlign(E->getArgumentType()))
+            .getQuantity(),
+        E);
   }
 
   llvm_unreachable("unknown expr/type trait");
diff --git a/lib/AST/ExternalASTSource.cpp b/lib/AST/ExternalASTSource.cpp
index 730842a..1c82c35 100644
--- a/lib/AST/ExternalASTSource.cpp
+++ b/lib/AST/ExternalASTSource.cpp
@@ -22,6 +22,16 @@ using namespace clang;
 
 ExternalASTSource::~ExternalASTSource() { }
 
+llvm::Optional<ExternalASTSource::ASTSourceDescriptor>
+ExternalASTSource::getSourceDescriptor(unsigned ID) {
+  return None;
+}
+
+ExternalASTSource::ASTSourceDescriptor
+ExternalASTSource::getSourceDescriptor(const Module &M) {
+  return ASTSourceDescriptor();
+}
+
 void ExternalASTSource::FindFileRegionDecls(FileID File, unsigned Offset,
                                             unsigned Length,
                                             SmallVectorImpl<Decl *> &Decls) {}
diff --git a/lib/AST/ItaniumMangle.cpp b/lib/AST/ItaniumMangle.cpp
index e5a31f8..0134c09 100644
--- a/lib/AST/ItaniumMangle.cpp
+++ b/lib/AST/ItaniumMangle.cpp
@@ -3018,13 +3018,21 @@ recurse:
     case UETT_AlignOf:
       Out << 'a';
       break;
-    case UETT_VecStep:
+    case UETT_VecStep: {
       DiagnosticsEngine &Diags = Context.getDiags();
       unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
                                      "cannot yet mangle vec_step expression");
       Diags.Report(DiagID);
       return;
     }
+    case UETT_OpenMPRequiredSimdAlign:
+      DiagnosticsEngine &Diags = Context.getDiags();
+      unsigned DiagID = Diags.getCustomDiagID(
+          DiagnosticsEngine::Error,
+          "cannot yet mangle __builtin_omp_required_simd_align expression");
+      Diags.Report(DiagID);
+      return;
+    }
     if (SAE->isArgumentType()) {
       Out << 't';
       mangleType(SAE->getArgumentType());
diff --git a/lib/AST/MicrosoftMangle.cpp b/lib/AST/MicrosoftMangle.cpp
index 29a95a5..48a8fa5 100644
--- a/lib/AST/MicrosoftMangle.cpp
+++ b/lib/AST/MicrosoftMangle.cpp
@@ -115,6 +115,9 @@ public:
   void mangleCXXVBTable(const CXXRecordDecl *Derived,
                         ArrayRef<const CXXRecordDecl *> BasePath,
                         raw_ostream &Out) override;
+  void mangleCXXVirtualDisplacementMap(const CXXRecordDecl *SrcRD,
+                                       const CXXRecordDecl *DstRD,
+                                       raw_ostream &Out) override;
   void mangleCXXThrowInfo(QualType T, bool IsConst, bool IsVolatile,
                           uint32_t NumEntries, raw_ostream &Out) override;
   void mangleCXXCatchableTypeArray(QualType T, uint32_t NumEntries,
@@ -499,6 +502,9 @@ void MicrosoftCXXNameMangler::mangleMemberDataPointer(const CXXRecordDecl *RD,
     FieldOffset /= getASTContext().getCharWidth();
 
     VBTableOffset = 0;
+
+    if (IM == MSInheritanceAttr::Keyword_virtual_inheritance)
+      FieldOffset -= getASTContext().getOffsetOfBaseWithVBPtr(RD).getQuantity();
   } else {
     FieldOffset = RD->nullFieldOffsetIsZero() ? 0 : -1;
 
@@ -567,6 +573,10 @@ MicrosoftCXXNameMangler::mangleMemberFunctionPointer(const CXXRecordDecl *RD,
       mangleName(MD);
       mangleFunctionEncoding(MD, /*ShouldMangle=*/true);
     }
+
+    if (VBTableOffset == 0 &&
+        IM == MSInheritanceAttr::Keyword_virtual_inheritance)
+      NVOffset -= getASTContext().getOffsetOfBaseWithVBPtr(RD).getQuantity();
   } else {
     // Null single inheritance member functions are encoded as a simple nullptr.
     if (IM == MSInheritanceAttr::Keyword_single_inheritance) {
@@ -579,7 +589,7 @@ MicrosoftCXXNameMangler::mangleMemberFunctionPointer(const CXXRecordDecl *RD,
   }
 
   if (MSInheritanceAttr::hasNVOffsetField(/*IsMemberFunction=*/true, IM))
-    mangleNumber(NVOffset);
+    mangleNumber(static_cast<uint32_t>(NVOffset));
   if (MSInheritanceAttr::hasVBPtrOffsetField(IM))
     mangleNumber(VBPtrOffset);
   if (MSInheritanceAttr::hasVBTableOffsetField(IM))
@@ -1205,11 +1215,23 @@ void MicrosoftCXXNameMangler::mangleTemplateArg(const TemplateDecl *TD,
         return;
       }
       if (MPT->isMemberDataPointer()) {
-        mangleMemberDataPointer(RD, nullptr);
-        return;
+        if (isa<ClassTemplateDecl>(TD)) {
+          mangleMemberDataPointer(RD, nullptr);
+          return;
+        }
+        // nullptr data pointers are always represented with a single field
+        // which is initialized with either 0 or -1.  Why -1?  Well, we need to
+        // distinguish the case where the data member is at offset zero in the
+        // record.
+        // However, we are free to use 0 *if* we would use multiple fields for
+        // non-nullptr member pointers.
+        if (!RD->nullFieldOffsetIsZero()) {
+          mangleIntegerLiteral(llvm::APSInt::get(-1), /*IsBoolean=*/false);
+          return;
+        }
       }
     }
-    Out << "$0A@";
+    mangleIntegerLiteral(llvm::APSInt::getUnsigned(0), /*IsBoolean=*/false);
     break;
   }
   case TemplateArgument::Expression:
@@ -2395,6 +2417,15 @@ void MicrosoftMangleContextImpl::mangleCXXCatchHandlerType(QualType T,
   Mangler.getStream() << '.' << Flags;
 }
 
+void MicrosoftMangleContextImpl::mangleCXXVirtualDisplacementMap(
+    const CXXRecordDecl *SrcRD, const CXXRecordDecl *DstRD, raw_ostream &Out) {
+  MicrosoftCXXNameMangler Mangler(*this, Out);
+  Mangler.getStream() << "\01??_K";
+  Mangler.mangleName(SrcRD);
+  Mangler.getStream() << "$C";
+  Mangler.mangleName(DstRD);
+}
+
 void MicrosoftMangleContextImpl::mangleCXXThrowInfo(QualType T,
                                                     bool IsConst,
                                                     bool IsVolatile,
diff --git a/lib/AST/NSAPI.cpp b/lib/AST/NSAPI.cpp
index 2749100..a9b10ed 100644
--- a/lib/AST/NSAPI.cpp
+++ b/lib/AST/NSAPI.cpp
@@ -30,7 +30,8 @@ IdentifierInfo *NSAPI::getNSClassId(NSClassIdKindKind K) const {
     "NSNumber",
     "NSMutableSet",
     "NSCountedSet",
-    "NSMutableOrderedSet"
+    "NSMutableOrderedSet",
+    "NSValue"
   };
 
   if (!ClassIds[K])
diff --git a/lib/AST/ParentMap.cpp b/lib/AST/ParentMap.cpp
index a991302..d7d5f9c 100644
--- a/lib/AST/ParentMap.cpp
+++ b/lib/AST/ParentMap.cpp
@@ -36,8 +36,8 @@ static void BuildParentMap(MapTy& M, Stmt* S,
 
     // If we are rebuilding the map, clear out any existing state.
     if (M[POE->getSyntacticForm()])
-      for (Stmt::child_range I = S->children(); I; ++I)
-        M[*I] = nullptr;
+      for (Stmt *SubStmt : S->children())
+        M[SubStmt] = nullptr;
 
     M[POE->getSyntacticForm()] = S;
     BuildParentMap(M, POE->getSyntacticForm(), OV_Transparent);
@@ -82,10 +82,10 @@ static void BuildParentMap(MapTy& M, Stmt* S,
     break;
   }
   default:
-    for (Stmt::child_range I = S->children(); I; ++I) {
-      if (*I) {
-        M[*I] = S;
-        BuildParentMap(M, *I, OVMode);
+    for (Stmt *SubStmt : S->children()) {
+      if (SubStmt) {
+        M[SubStmt] = S;
+        BuildParentMap(M, SubStmt, OVMode);
       }
     }
     break;
diff --git a/lib/AST/Stmt.cpp b/lib/AST/Stmt.cpp
index 6f4a89f..c0aab4c 100644
--- a/lib/AST/Stmt.cpp
+++ b/lib/AST/Stmt.cpp
@@ -1579,6 +1579,30 @@ OMPFlushClause *OMPFlushClause::CreateEmpty(const ASTContext &C, unsigned N) {
   return new (Mem) OMPFlushClause(N);
 }
 
+OMPDependClause *
+OMPDependClause::Create(const ASTContext &C, SourceLocation StartLoc,
+                        SourceLocation LParenLoc, SourceLocation EndLoc,
+                        OpenMPDependClauseKind DepKind, SourceLocation DepLoc,
+                        SourceLocation ColonLoc, ArrayRef<Expr *> VL) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPDependClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         sizeof(Expr *) * VL.size());
+  OMPDependClause *Clause =
+      new (Mem) OMPDependClause(StartLoc, LParenLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  Clause->setDependencyKind(DepKind);
+  Clause->setDependencyLoc(DepLoc);
+  Clause->setColonLoc(ColonLoc);
+  return Clause;
+}
+
+OMPDependClause *OMPDependClause::CreateEmpty(const ASTContext &C, unsigned N) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPDependClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         sizeof(Expr *) * N);
+  return new (Mem) OMPDependClause(N);
+}
+
 const OMPClause *
 OMPExecutableDirective::getSingleClause(OpenMPClauseKind K) const {
   auto &&I = getClausesOfKind(K);
@@ -2062,6 +2086,46 @@ OMPTaskgroupDirective *OMPTaskgroupDirective::CreateEmpty(const ASTContext &C,
   return new (Mem) OMPTaskgroupDirective();
 }
 
+OMPCancellationPointDirective *OMPCancellationPointDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    OpenMPDirectiveKind CancelRegion) {
+  unsigned Size = llvm::RoundUpToAlignment(
+      sizeof(OMPCancellationPointDirective), llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size);
+  OMPCancellationPointDirective *Dir =
+      new (Mem) OMPCancellationPointDirective(StartLoc, EndLoc);
+  Dir->setCancelRegion(CancelRegion);
+  return Dir;
+}
+
+OMPCancellationPointDirective *
+OMPCancellationPointDirective::CreateEmpty(const ASTContext &C, EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(
+      sizeof(OMPCancellationPointDirective), llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size);
+  return new (Mem) OMPCancellationPointDirective();
+}
+
+OMPCancelDirective *
+OMPCancelDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+                           SourceLocation EndLoc,
+                           OpenMPDirectiveKind CancelRegion) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPCancelDirective),
+                                           llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size);
+  OMPCancelDirective *Dir = new (Mem) OMPCancelDirective(StartLoc, EndLoc);
+  Dir->setCancelRegion(CancelRegion);
+  return Dir;
+}
+
+OMPCancelDirective *OMPCancelDirective::CreateEmpty(const ASTContext &C,
+                                                    EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPCancelDirective),
+                                           llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size);
+  return new (Mem) OMPCancelDirective();
+}
+
 OMPFlushDirective *OMPFlushDirective::Create(const ASTContext &C,
                                              SourceLocation StartLoc,
                                              SourceLocation EndLoc,
diff --git a/lib/AST/StmtIterator.cpp b/lib/AST/StmtIterator.cpp
index 1ccba04..732756f 100644
--- a/lib/AST/StmtIterator.cpp
+++ b/lib/AST/StmtIterator.cpp
@@ -93,12 +93,12 @@ bool StmtIteratorBase::HandleDecl(Decl* D) {
 }
 
 StmtIteratorBase::StmtIteratorBase(Decl** dgi, Decl** dge)
-  : stmt(nullptr), DGI(dgi), RawVAPtr(DeclGroupMode), DGE(dge) {
+  : DGI(dgi), RawVAPtr(DeclGroupMode), DGE(dge) {
   NextDecl(false);
 }
 
 StmtIteratorBase::StmtIteratorBase(const VariableArrayType* t)
-  : stmt(nullptr), DGI(nullptr), RawVAPtr(SizeOfTypeVAMode) {
+  : DGI(nullptr), RawVAPtr(SizeOfTypeVAMode) {
   RawVAPtr |= reinterpret_cast<uintptr_t>(t);
 }
 
diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp
index 658e3df..7960077 100644
--- a/lib/AST/StmtPrinter.cpp
+++ b/lib/AST/StmtPrinter.cpp
@@ -799,6 +799,17 @@ void OMPClausePrinter::VisitOMPFlushClause(OMPFlushClause *Node) {
     OS << ")";
   }
 }
+
+void OMPClausePrinter::VisitOMPDependClause(OMPDependClause *Node) {
+  if (!Node->varlist_empty()) {
+    OS << "depend(";
+    OS << getOpenMPSimpleClauseTypeName(Node->getClauseKind(),
+                                        Node->getDependencyKind())
+       << " :";
+    VisitOMPClauseList(Node, ' ');
+    OS << ")";
+  }
+}
 }
 
 //===----------------------------------------------------------------------===//
@@ -940,6 +951,18 @@ void StmtPrinter::VisitOMPTeamsDirective(OMPTeamsDirective *Node) {
   PrintOMPExecutableDirective(Node);
 }
 
+void StmtPrinter::VisitOMPCancellationPointDirective(
+    OMPCancellationPointDirective *Node) {
+  Indent() << "#pragma omp cancellation point "
+           << getOpenMPDirectiveName(Node->getCancelRegion());
+  PrintOMPExecutableDirective(Node);
+}
+
+void StmtPrinter::VisitOMPCancelDirective(OMPCancelDirective *Node) {
+  Indent() << "#pragma omp cancel "
+           << getOpenMPDirectiveName(Node->getCancelRegion());
+  PrintOMPExecutableDirective(Node);
+}
 //===----------------------------------------------------------------------===//
 //  Expr printing methods.
 //===----------------------------------------------------------------------===//
@@ -1206,6 +1229,9 @@ void StmtPrinter::VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node){
   case UETT_VecStep:
     OS << "vec_step";
     break;
+  case UETT_OpenMPRequiredSimdAlign:
+    OS << "__builtin_omp_required_simd_align";
+    break;
   }
   if (Node->isArgumentType()) {
     OS << '(';
diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp
index 23f8d0c..da99692 100644
--- a/lib/AST/StmtProfile.cpp
+++ b/lib/AST/StmtProfile.cpp
@@ -69,9 +69,9 @@ namespace {
 
 void StmtProfiler::VisitStmt(const Stmt *S) {
   ID.AddInteger(S->getStmtClass());
-  for (Stmt::const_child_range C = S->children(); C; ++C) {
-    if (*C)
-      Visit(*C);
+  for (const Stmt *SubStmt : S->children()) {
+    if (SubStmt)
+      Visit(SubStmt);
     else
       ID.AddInteger(0);
   }
@@ -425,6 +425,9 @@ OMPClauseProfiler::VisitOMPCopyprivateClause(const OMPCopyprivateClause *C) {
 void OMPClauseProfiler::VisitOMPFlushClause(const OMPFlushClause *C) {
   VisitOMPClauseList(C);
 }
+void OMPClauseProfiler::VisitOMPDependClause(const OMPDependClause *C) {
+  VisitOMPClauseList(C);
+}
 }
 
 void
@@ -534,6 +537,15 @@ void StmtProfiler::VisitOMPTeamsDirective(const OMPTeamsDirective *S) {
   VisitOMPExecutableDirective(S);
 }
 
+void StmtProfiler::VisitOMPCancellationPointDirective(
+    const OMPCancellationPointDirective *S) {
+  VisitOMPExecutableDirective(S);
+}
+
+void StmtProfiler::VisitOMPCancelDirective(const OMPCancelDirective *S) {
+  VisitOMPExecutableDirective(S);
+}
+
 void StmtProfiler::VisitExpr(const Expr *S) {
   VisitStmt(S);
 }
diff --git a/lib/AST/Type.cpp b/lib/AST/Type.cpp
index 3ac1171..541bd1e 100644
--- a/lib/AST/Type.cpp
+++ b/lib/AST/Type.cpp
@@ -364,6 +364,11 @@ bool Type::isStructureType() const {
     return RT->getDecl()->isStruct();
   return false;
 }
+bool Type::isObjCBoxableRecordType() const {
+  if (const RecordType *RT = getAs<RecordType>())
+    return RT->getDecl()->hasAttr<ObjCBoxableAttr>();
+  return false;
+}
 bool Type::isInterfaceType() const {
   if (const RecordType *RT = getAs<RecordType>())
     return RT->getDecl()->isInterface();
diff --git a/lib/AST/TypePrinter.cpp b/lib/AST/TypePrinter.cpp
index ebe09d8..9938170 100644
--- a/lib/AST/TypePrinter.cpp
+++ b/lib/AST/TypePrinter.cpp
@@ -1147,11 +1147,11 @@ void TypePrinter::printAttributedBefore(const AttributedType *T,
       T->getAttrKind() == AttributedType::attr_nullable ||
       T->getAttrKind() == AttributedType::attr_null_unspecified) {
     if (T->getAttrKind() == AttributedType::attr_nonnull)
-      OS << " __nonnull";
+      OS << " _Nonnull";
     else if (T->getAttrKind() == AttributedType::attr_nullable)
-      OS << " __nullable";
+      OS << " _Nullable";
     else if (T->getAttrKind() == AttributedType::attr_null_unspecified)
-      OS << " __null_unspecified";
+      OS << " _Null_unspecified";
     else
       llvm_unreachable("unhandled nullability");
     spaceBeforePlaceHolder(OS);
@@ -1186,11 +1186,11 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
       T->getAttrKind() == AttributedType::attr_nullable ||
       T->getAttrKind() == AttributedType::attr_null_unspecified) {
     if (T->getAttrKind() == AttributedType::attr_nonnull)
-      OS << " __nonnull";
+      OS << " _Nonnull";
     else if (T->getAttrKind() == AttributedType::attr_nullable)
-      OS << " __nullable";
+      OS << " _Nullable";
     else if (T->getAttrKind() == AttributedType::attr_null_unspecified)
-      OS << " __null_unspecified";
+      OS << " _Null_unspecified";
     else
       llvm_unreachable("unhandled nullability");
 
diff --git a/lib/ASTMatchers/ASTMatchersInternal.cpp b/lib/ASTMatchers/ASTMatchersInternal.cpp
index 2c482e3..b6ef822 100644
--- a/lib/ASTMatchers/ASTMatchersInternal.cpp
+++ b/lib/ASTMatchers/ASTMatchersInternal.cpp
@@ -114,9 +114,9 @@ DynTypedMatcher DynTypedMatcher::constructVariadic(
   assert(InnerMatchers.size() > 0 && "Array must not be empty.");
   assert(std::all_of(InnerMatchers.begin(), InnerMatchers.end(),
                      [&InnerMatchers](const DynTypedMatcher &M) {
-           return InnerMatchers[0].SupportedKind.isSame(M.SupportedKind);
+           return InnerMatchers[0].canConvertTo(M.SupportedKind);
          }) &&
-         "SupportedKind must match!");
+         "SupportedKind must be convertible to a common type!");
 
   auto SupportedKind = InnerMatchers[0].SupportedKind;
   // We must relax the restrict kind here.
diff --git a/lib/ASTMatchers/Dynamic/Registry.cpp b/lib/ASTMatchers/Dynamic/Registry.cpp
index 59c204d..72713dd 100644
--- a/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -240,6 +240,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(innerType);
   REGISTER_MATCHER(integerLiteral);
   REGISTER_MATCHER(isArrow);
+  REGISTER_MATCHER(isCatchAll);
   REGISTER_MATCHER(isConst);
   REGISTER_MATCHER(isConstQualified);
   REGISTER_MATCHER(isDefinition);
diff --git a/lib/Analysis/AnalysisDeclContext.cpp b/lib/Analysis/AnalysisDeclContext.cpp
index 4e623c8..d7fb7e9 100644
--- a/lib/Analysis/AnalysisDeclContext.cpp
+++ b/lib/Analysis/AnalysisDeclContext.cpp
@@ -472,9 +472,9 @@ public:
   : BEVals(bevals), BC(bc) {}
 
   void VisitStmt(Stmt *S) {
-    for (Stmt::child_range I = S->children(); I; ++I)
-      if (Stmt *child = *I)
-        Visit(child);
+    for (Stmt *Child : S->children())
+      if (Child)
+        Visit(Child);
   }
 
   void VisitDeclRefExpr(DeclRefExpr *DR) {
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index 19b3f5a..54d15bd 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -270,9 +270,8 @@ reverse_children::reverse_children(Stmt *S) {
   }
 
   // Default case for all other statements.
-  for (Stmt::child_range I = S->children(); I; ++I) {
-    childrenBuf.push_back(*I);
-  }
+  for (Stmt *SubStmt : S->children())
+    childrenBuf.push_back(SubStmt);
 
   // This needs to be done *after* childrenBuf has been populated.
   children = childrenBuf;
@@ -3641,11 +3640,11 @@ CFGBlock *CFGBuilder::VisitChildrenForTemporaryDtors(Stmt *E,
   // bottom-up, this means we visit them in their natural order, which
   // reverses them in the CFG.
   CFGBlock *B = Block;
-  for (Stmt::child_range I = E->children(); I; ++I) {
-    if (Stmt *Child = *I)
+  for (Stmt *Child : E->children())
+    if (Child)
       if (CFGBlock *R = VisitForTemporaryDtors(Child, false, Context))
         B = R;
-  }
+
   return B;
 }
 
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index 91a8492..d0660346 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -83,9 +83,9 @@ public:
   }
 
   void VisitChildren(Stmt *S) {
-    for (Stmt::child_range I = S->children(); I; ++I)
-      if (*I)
-        static_cast<CGBuilder*>(this)->Visit(*I);
+    for (Stmt *SubStmt : S->children())
+      if (SubStmt)
+        this->Visit(SubStmt);
   }
 };
 
diff --git a/lib/Analysis/LiveVariables.cpp b/lib/Analysis/LiveVariables.cpp
index 0ab1580..5e0a9a0 100644
--- a/lib/Analysis/LiveVariables.cpp
+++ b/lib/Analysis/LiveVariables.cpp
@@ -322,11 +322,10 @@ void TransferFunctions::Visit(Stmt *S) {
       return;
     }
   }
-  
-  for (Stmt::child_iterator it = S->child_begin(), ei = S->child_end();
-       it != ei; ++it) {
-    if (Stmt *child = *it)
-      AddLiveStmt(val.liveStmts, LV.SSetFact, child);
+
+  for (Stmt *Child : S->children()) {
+    if (Child)
+      AddLiveStmt(val.liveStmts, LV.SSetFact, Child);
   }
 }
 
diff --git a/lib/Analysis/PrintfFormatString.cpp b/lib/Analysis/PrintfFormatString.cpp
index b8d3ec1..f0976bc 100644
--- a/lib/Analysis/PrintfFormatString.cpp
+++ b/lib/Analysis/PrintfFormatString.cpp
@@ -49,6 +49,24 @@ static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
   return false;
 }
 
+static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS,
+                           const char *FlagBeg, const char *E, bool Warn) {
+   StringRef Flag(FlagBeg, E - FlagBeg);
+   // Currently there is only one flag.
+   if (Flag == "tt") {
+     FS.setHasObjCTechnicalTerm(FlagBeg);
+     return false;
+   }
+   // Handle either the case of no flag or an invalid flag.
+   if (Warn) {
+     if (Flag == "")
+       H.HandleEmptyObjCModifierFlag(FlagBeg, E  - FlagBeg);
+     else
+       H.HandleInvalidObjCModifierFlag(FlagBeg, E  - FlagBeg);
+   }
+   return true;
+}
+
 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
                                                   const char *&Beg,
                                                   const char *E,
@@ -168,6 +186,38 @@ static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
     return true;
   }
 
+  // Look for the Objective-C modifier flags, if any.
+  // We parse these here, even if they don't apply to
+  // the conversion specifier, and then emit an error
+  // later if the conversion specifier isn't '@'.  This
+  // enables better recovery, and we don't know if
+  // these flags are applicable until later.
+  const char *ObjCModifierFlagsStart = nullptr,
+             *ObjCModifierFlagsEnd = nullptr;
+  if (*I == '[') {
+    ObjCModifierFlagsStart = I;
+    ++I;
+    auto flagStart = I;
+    for (;; ++I) {
+      ObjCModifierFlagsEnd = I;
+      if (I == E) {
+        if (Warn)
+          H.HandleIncompleteSpecifier(Start, E - Start);
+        return true;
+      }
+      // Did we find the closing ']'?
+      if (*I == ']') {
+        if (ParseObjCFlags(H, FS, flagStart, I, Warn))
+          return true;
+        ++I;
+        break;
+      }
+      // There are no separators defined yet for multiple
+      // Objective-C modifier flags.  When those are
+      // defined, this is the place to check.
+    }
+  }
+
   if (*I == '\0') {
     // Detect spurious null characters, which are likely errors.
     H.HandleNullChar(I);
@@ -240,6 +290,18 @@ static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
       if (Target.getTriple().isOSMSVCRT())
         k = ConversionSpecifier::ZArg;
   }
+  
+  // Check to see if we used the Objective-C modifier flags with
+  // a conversion specifier other than '@'.
+  if (k != ConversionSpecifier::ObjCObjArg &&
+      k != ConversionSpecifier::InvalidSpecifier &&
+      ObjCModifierFlagsStart) {
+    H.HandleObjCFlagsWithNonObjCConversion(ObjCModifierFlagsStart,
+                                           ObjCModifierFlagsEnd + 1,
+                                           conversionPosition);
+    return true;
+  }
+  
   PrintfConversionSpecifier CS(conversionPosition, k);
   FS.setConversionSpecifier(CS);
   if (CS.consumesDataArgument() && !FS.usesPositionalArg())
diff --git a/lib/Analysis/PseudoConstantAnalysis.cpp b/lib/Analysis/PseudoConstantAnalysis.cpp
index 3f96ca8..5b917a7 100644
--- a/lib/Analysis/PseudoConstantAnalysis.cpp
+++ b/lib/Analysis/PseudoConstantAnalysis.cpp
@@ -220,8 +220,8 @@ void PseudoConstantAnalysis::RunAnalysis() {
     } // switch (head->getStmtClass())
 
     // Add all substatements to the worklist
-    for (Stmt::const_child_range I = Head->children(); I; ++I)
-      if (*I)
-        WorkList.push_back(*I);
+    for (const Stmt *SubStmt : Head->children())
+      if (SubStmt)
+        WorkList.push_back(SubStmt);
   } // while (!WorkList.empty())
 }
diff --git a/lib/Basic/Diagnostic.cpp b/lib/Basic/Diagnostic.cpp
index 7f5a15d..f89caf7 100644
--- a/lib/Basic/Diagnostic.cpp
+++ b/lib/Basic/Diagnostic.cpp
@@ -24,6 +24,27 @@
 
 using namespace clang;
 
+const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB,
+                                           DiagNullabilityKind nullability) {
+  StringRef string;
+  switch (nullability.first) {
+  case NullabilityKind::NonNull:
+    string = nullability.second ? "'nonnull'" : "'_Nonnull'";
+    break;
+
+  case NullabilityKind::Nullable:
+    string = nullability.second ? "'nullable'" : "'_Nullable'";
+    break;
+
+  case NullabilityKind::Unspecified:
+    string = nullability.second ? "'null_unspecified'" : "'_Null_unspecified'";
+    break;
+  }
+
+  DB.AddString(string);
+  return DB;
+}
+
 static void DummyArgToStringFn(DiagnosticsEngine::ArgumentKind AK, intptr_t QT,
                             StringRef Modifier, StringRef Argument,
                             ArrayRef<DiagnosticsEngine::ArgumentValue> PrevArgs,
diff --git a/lib/Basic/IdentifierTable.cpp b/lib/Basic/IdentifierTable.cpp
index b295008..36fba99 100644
--- a/lib/Basic/IdentifierTable.cpp
+++ b/lib/Basic/IdentifierTable.cpp
@@ -71,8 +71,6 @@ IdentifierIterator *IdentifierInfoLookup::getIdentifiers() {
   return new EmptyLookupIterator();
 }
 
-ExternalIdentifierLookup::~ExternalIdentifierLookup() {}
-
 IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
                                  IdentifierInfoLookup* externalLookup)
   : HashTable(8192), // Start with space for 8K identifiers.
@@ -647,16 +645,17 @@ const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
   llvm_unreachable("Invalid OverloadedOperatorKind!");
 }
 
-StringRef clang::getNullabilitySpelling(NullabilityKind kind) {
+StringRef clang::getNullabilitySpelling(NullabilityKind kind,
+                                        bool isContextSensitive) {
   switch (kind) {
   case NullabilityKind::NonNull:
-    return "__nonnull";
+    return isContextSensitive ? "nonnull" : "_Nonnull";
 
   case NullabilityKind::Nullable:
-    return "__nullable";
+    return isContextSensitive ? "nullable" : "_Nullable";
 
   case NullabilityKind::Unspecified:
-    return "__null_unspecified";
+    return isContextSensitive ? "null_unspecified" : "_Null_unspecified";
   }
   llvm_unreachable("Unknown nullability kind.");
 }
diff --git a/lib/Basic/Module.cpp b/lib/Basic/Module.cpp
index 7308665..1a48a6c 100644
--- a/lib/Basic/Module.cpp
+++ b/lib/Basic/Module.cpp
@@ -27,7 +27,7 @@ using namespace clang;
 Module::Module(StringRef Name, SourceLocation DefinitionLoc, Module *Parent,
                bool IsFramework, bool IsExplicit, unsigned VisibilityID)
     : Name(Name), DefinitionLoc(DefinitionLoc), Parent(Parent), Directory(),
-      Umbrella(), ASTFile(nullptr), VisibilityID(VisibilityID),
+      Umbrella(), Signature(0), ASTFile(nullptr), VisibilityID(VisibilityID),
       IsMissingRequirement(false), IsAvailable(true), IsFromModuleFile(false),
       IsFramework(IsFramework), IsExplicit(IsExplicit), IsSystem(false),
       IsExternC(false), IsInferred(false), InferSubmodules(false),
diff --git a/lib/Basic/OpenMPKinds.cpp b/lib/Basic/OpenMPKinds.cpp
index b2798b7..b7407f6 100644
--- a/lib/Basic/OpenMPKinds.cpp
+++ b/lib/Basic/OpenMPKinds.cpp
@@ -91,6 +91,11 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind,
 #define OPENMP_SCHEDULE_KIND(Name) .Case(#Name, OMPC_SCHEDULE_##Name)
 #include "clang/Basic/OpenMPKinds.def"
         .Default(OMPC_SCHEDULE_unknown);
+  case OMPC_depend:
+    return llvm::StringSwitch<OpenMPDependClauseKind>(Str)
+#define OPENMP_DEPEND_KIND(Name) .Case(#Name, OMPC_DEPEND_##Name)
+#include "clang/Basic/OpenMPKinds.def"
+        .Default(OMPC_DEPEND_unknown);
   case OMPC_unknown:
   case OMPC_threadprivate:
   case OMPC_if:
@@ -154,6 +159,15 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
     return #Name;
 #include "clang/Basic/OpenMPKinds.def"
     }
+  case OMPC_depend:
+    switch (Type) {
+    case OMPC_DEPEND_unknown:
+      return "unknown";
+#define OPENMP_DEPEND_KIND(Name)                                             \
+  case OMPC_DEPEND_##Name:                                                   \
+    return #Name;
+#include "clang/Basic/OpenMPKinds.def"
+    }
     llvm_unreachable("Invalid OpenMP 'schedule' clause type");
   case OMPC_unknown:
   case OMPC_threadprivate:
@@ -333,6 +347,8 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
   case OMPD_barrier:
   case OMPD_taskwait:
   case OMPD_taskgroup:
+  case OMPD_cancellation_point:
+  case OMPD_cancel:
   case OMPD_ordered:
     break;
   }
diff --git a/lib/Basic/TargetInfo.cpp b/lib/Basic/TargetInfo.cpp
index 330258b..856ad50 100644
--- a/lib/Basic/TargetInfo.cpp
+++ b/lib/Basic/TargetInfo.cpp
@@ -50,6 +50,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) {
   LargeArrayAlign = 0;
   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 0;
   MaxVectorAlign = 0;
+  SimdDefaultAlign = 0;
   SizeType = UnsignedLong;
   PtrDiffType = SignedLong;
   IntMaxType = SignedLongLong;
diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index 076b04b..8f2bca0 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -759,6 +759,7 @@ public:
       HasP8Crypto(false), HasDirectMove(false), HasQPX(false), HasHTM(false),
       HasBPERMD(false), HasExtDiv(false) {
     BigEndian = (Triple.getArch() != llvm::Triple::ppc64le);
+    SimdDefaultAlign = 128;
     LongDoubleWidth = LongDoubleAlign = 128;
     LongDoubleFormat = &llvm::APFloat::PPCDoubleDouble;
   }
@@ -2215,6 +2216,7 @@ public:
     Names = AddlRegNames;
     NumNames = llvm::array_lengthof(AddlRegNames);
   }
+  bool validateCpuSupports(StringRef Name) const override;
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &info) const override;
 
@@ -2249,7 +2251,9 @@ public:
   bool handleTargetFeatures(std::vector<std::string> &Features,
                             DiagnosticsEngine &Diags) override;
   StringRef getABI() const override {
-    if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX)
+    if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX512F)
+      return "avx512";
+    else if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX)
       return "avx";
     else if (getTriple().getArch() == llvm::Triple::x86 &&
              MMX3DNowLevel == NoMMX3DNow)
@@ -2725,7 +2729,11 @@ void X86TargetInfo::setXOPLevel(llvm::StringMap<bool> &Features, XOPEnum Level,
 
 void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
                                           StringRef Name, bool Enabled) {
-  Features[Name] = Enabled;
+  // This is a bit of a hack to deal with the sse4 target feature when used
+  // as part of the target attribute. We handle sse4 correctly everywhere
+  // else. See below for more information on how we handle the sse4 options.
+  if (Name != "sse4")
+    Features[Name] = Enabled;
 
   if (Name == "mmx") {
     setMMXLevel(Features, MMX, Enabled);
@@ -2776,6 +2784,15 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
   } else if (Name == "sha") {
     if (Enabled)
       setSSELevel(Features, SSE2, Enabled);
+  } else if (Name == "sse4") {
+    // We can get here via the __target__ attribute since that's not controlled
+    // via the -msse4/-mno-sse4 command line alias. Handle this the same way
+    // here - turn on the sse4.2 if enabled, turn off the sse4.1 level if
+    // disabled.
+    if (Enabled)
+      setSSELevel(Features, SSE42, Enabled);
+    else
+      setSSELevel(Features, SSE41, Enabled);
   }
 }
 
@@ -2972,6 +2989,9 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
     Features.erase(it);
   else if (SSELevel > NoSSE)
     MMX3DNowLevel = std::max(MMX3DNowLevel, MMX);
+
+  SimdDefaultAlign =
+      (getABI() == "avx512") ? 512 : (getABI() == "avx") ? 256 : 128;
   return true;
 }
 
@@ -3336,6 +3356,33 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Default(false);
 }
 
+// We can't use a generic validation scheme for the features accepted here
+// versus subtarget features accepted in the target attribute because the
+// bitfield structure that's initialized in the runtime only supports the
+// below currently rather than the full range of subtarget features. (See
+// X86TargetInfo::hasFeature for a somewhat comprehensive list).
+bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
+  return llvm::StringSwitch<bool>(FeatureStr)
+      .Case("cmov", true)
+      .Case("mmx", true)
+      .Case("popcnt", true)
+      .Case("sse", true)
+      .Case("sse2", true)
+      .Case("sse3", true)
+      .Case("sse4.1", true)
+      .Case("sse4.2", true)
+      .Case("avx", true)
+      .Case("avx2", true)
+      .Case("sse4a", true)
+      .Case("fma4", true)
+      .Case("xop", true)
+      .Case("fma", true)
+      .Case("avx512f", true)
+      .Case("bmi", true)
+      .Case("bmi2", true)
+      .Default(false);
+}
+
 bool
 X86TargetInfo::validateAsmConstraint(const char *&Name,
                                      TargetInfo::ConstraintInfo &Info) const {
@@ -3949,8 +3996,14 @@ class ARMTargetInfo : public TargetInfo {
     FP_Neon
   } FPMath;
 
+  unsigned ArchISA;
+  unsigned ArchKind;
+  unsigned ArchProfile;
+  unsigned ArchVersion;
+
   unsigned FPU : 5;
 
+  unsigned ShouldUseInlineAtomic : 1;
   unsigned IsAAPCS : 1;
   unsigned IsThumb : 1;
   unsigned HWDiv : 2;
@@ -3972,37 +4025,6 @@ class ARMTargetInfo : public TargetInfo {
 
   static const Builtin::Info BuiltinInfo[];
 
-  static bool shouldUseInlineAtomic(const llvm::Triple &T) {
-    StringRef ArchName = T.getArchName();
-    if (T.getArch() == llvm::Triple::arm ||
-        T.getArch() == llvm::Triple::armeb) {
-      StringRef VersionStr;
-      if (ArchName.startswith("armv"))
-        VersionStr = ArchName.substr(4, 1);
-      else if (ArchName.startswith("armebv"))
-        VersionStr = ArchName.substr(6, 1);
-      else
-        return false;
-      unsigned Version;
-      if (VersionStr.getAsInteger(10, Version))
-        return false;
-      return Version >= 6;
-    }
-    assert(T.getArch() == llvm::Triple::thumb ||
-           T.getArch() == llvm::Triple::thumbeb);
-    StringRef VersionStr;
-    if (ArchName.startswith("thumbv"))
-      VersionStr = ArchName.substr(6, 1);
-    else if (ArchName.startswith("thumbebv"))
-      VersionStr = ArchName.substr(8, 1);
-    else
-      return false;
-    unsigned Version;
-    if (VersionStr.getAsInteger(10, Version))
-      return false;
-    return Version >= 7;
-  }
-
   void setABIAAPCS() {
     IsAAPCS = true;
 
@@ -4101,6 +4123,27 @@ class ARMTargetInfo : public TargetInfo {
     // FIXME: Override "preferred align" for double and long long.
   }
 
+  void setArchInfo(StringRef ArchName) {
+    ArchISA     = llvm::ARMTargetParser::parseArchISA(ArchName);
+    ArchKind    = llvm::ARMTargetParser::parseArch(ArchName);
+    ArchProfile = llvm::ARMTargetParser::parseArchProfile(ArchName);
+    ArchVersion = llvm::ARMTargetParser::parseArchVersion(ArchName); 
+  }
+
+  void setAtomic() {
+    // Cortex M does not support 8 byte atomics, while general Thumb2 does. 
+    if (ArchProfile == llvm::ARM::PK_M) {
+      MaxAtomicPromoteWidth = 32;
+      if (ShouldUseInlineAtomic)
+        MaxAtomicInlineWidth = 32;
+    }
+    else {
+      MaxAtomicPromoteWidth = 64;
+      if (ShouldUseInlineAtomic)
+        MaxAtomicInlineWidth = 64;
+    } 
+  }
+
 public:
   ARMTargetInfo(const llvm::Triple &Triple, bool IsBigEndian)
       : TargetInfo(Triple), CPU("arm1136j-s"), FPMath(FP_Default),
@@ -4116,12 +4159,27 @@ public:
       break;
     }
 
+    // if subArch is not specified Arc Info is based on the default CPU
+    if (Triple.getSubArch() == llvm::Triple::SubArchType::NoSubArch) {
+      unsigned ArchKind = llvm::ARMTargetParser::parseCPUArch(CPU);
+      setArchInfo(llvm::ARMTargetParser::getArchName(ArchKind));
+      ShouldUseInlineAtomic = false;
+    }
+    else {
+      setArchInfo(Triple.getArchName());
+      // FIXME: Should't this be updated also when calling setCPU() ? 
+      // Doing so currently causes regressions
+      ShouldUseInlineAtomic = (ArchISA == llvm::ARM::IK_ARM   && ArchVersion >= 6) ||
+                              (ArchISA == llvm::ARM::IK_THUMB && ArchVersion >= 7);
+    }
+
     // {} in inline assembly are neon specifiers, not assembly variant
     // specifiers.
     NoAsmVariants = true;
 
-    // FIXME: Should we just treat this as a feature?
-    IsThumb = getTriple().getArchName().startswith("thumb");
+    // FIXME: Should't this be updated also when calling setCPU() ? 
+    // Doing so currently causes regressions
+    IsThumb = (ArchISA == llvm::ARM::IK_THUMB);
 
     // FIXME: This duplicates code from the driver that sets the -target-abi
     // option - this code is used if -target-abi isn't passed and should
@@ -4166,10 +4224,7 @@ public:
     // ARM targets default to using the ARM C++ ABI.
     TheCXXABI.set(TargetCXXABI::GenericARM);
 
-    // ARM has atomics up to 8 bytes
-    MaxAtomicPromoteWidth = 64;
-    if (shouldUseInlineAtomic(getTriple()))
-      MaxAtomicInlineWidth = 64;
+    setAtomic();
 
     // Do force alignment of members that follow zero length bitfields.  If
     // the alignment of the zero-length bitfield is greater than the member
@@ -4177,7 +4232,9 @@ public:
     // zero length bitfield.
     UseZeroLengthBitfieldAlignment = true;
   }
+
   StringRef getABI() const override { return ABI; }
+
   bool setABI(const std::string &Name) override {
     ABI = Name;
 
@@ -4198,11 +4255,6 @@ public:
 
   // FIXME: This should be based on Arch attributes, not CPU names.
   void getDefaultFeatures(llvm::StringMap<bool> &Features) const override {
-    StringRef ArchName = getTriple().getArchName();
-    unsigned ArchKind = llvm::ARMTargetParser::parseArch(ArchName);
-    bool IsV8 = (ArchKind == llvm::ARM::AK_ARMV8A ||
-                 ArchKind == llvm::ARM::AK_ARMV8_1A);
-
     if (CPU == "arm1136jf-s" || CPU == "arm1176jzf-s" || CPU == "mpcore")
       Features["vfp2"] = true;
     else if (CPU == "cortex-a8" || CPU == "cortex-a9") {
@@ -4227,7 +4279,7 @@ public:
       Features["hwdiv-arm"] = true;
       Features["crc"] = true;
       Features["crypto"] = true;
-    } else if (CPU == "cortex-r5" || CPU == "cortex-r7" || IsV8) {
+    } else if (CPU == "cortex-r5" || CPU == "cortex-r7" || ArchVersion == 8) {
       Features["hwdiv"] = true;
       Features["hwdiv-arm"] = true;
     } else if (CPU == "cortex-m3" || CPU == "cortex-m4" || CPU == "cortex-m7" ||
@@ -4244,6 +4296,9 @@ public:
     SoftFloat = SoftFloatABI = false;
     HWDiv = 0;
 
+    // This does not diagnose illegal cases like having both
+    // "+vfpv2" and "+vfpv3" or having "+neon" and "+fp-only-sp".
+    uint32_t HW_FP_remove = 0;
     for (const auto &Feature : Features) {
       if (Feature == "+soft-float") {
         SoftFloat = true;
@@ -4251,19 +4306,19 @@ public:
         SoftFloatABI = true;
       } else if (Feature == "+vfp2") {
         FPU |= VFP2FPU;
-        HW_FP = HW_FP_SP | HW_FP_DP;
+        HW_FP |= HW_FP_SP | HW_FP_DP;
       } else if (Feature == "+vfp3") {
         FPU |= VFP3FPU;
-        HW_FP = HW_FP_SP | HW_FP_DP;
+        HW_FP |= HW_FP_SP | HW_FP_DP;
       } else if (Feature == "+vfp4") {
         FPU |= VFP4FPU;
-        HW_FP = HW_FP_SP | HW_FP_DP | HW_FP_HP;
+        HW_FP |= HW_FP_SP | HW_FP_DP | HW_FP_HP;
       } else if (Feature == "+fp-armv8") {
         FPU |= FPARMV8;
-        HW_FP = HW_FP_SP | HW_FP_DP | HW_FP_HP;
+        HW_FP |= HW_FP_SP | HW_FP_DP | HW_FP_HP;
       } else if (Feature == "+neon") {
         FPU |= NeonFPU;
-        HW_FP = HW_FP_SP | HW_FP_DP;
+        HW_FP |= HW_FP_SP | HW_FP_DP;
       } else if (Feature == "+hwdiv") {
         HWDiv |= HWDivThumb;
       } else if (Feature == "+hwdiv-arm") {
@@ -4273,9 +4328,10 @@ public:
       } else if (Feature == "+crypto") {
         Crypto = 1;
       } else if (Feature == "+fp-only-sp") {
-        HW_FP &= ~HW_FP_DP;
+        HW_FP_remove |= HW_FP_DP | HW_FP_HP;
       }
     }
+    HW_FP &= ~HW_FP_remove;
 
     if (!(FPU & NeonFPU) && FPMath == FP_Neon) {
       Diags.Report(diag::err_target_unsupported_fpmath) << "neon";
@@ -4306,26 +4362,14 @@ public:
         .Case("hwdiv-arm", HWDiv & HWDivARM)
         .Default(false);
   }
-  const char *getCPUDefineSuffix(StringRef Name) const {
-    if(Name == "generic") {
-      auto subarch = getTriple().getSubArch();
-      switch (subarch) {
-        case llvm::Triple::SubArchType::ARMSubArch_v8_1a: 
-          return "8_1A";
-        default:
-          break;
-      }
-    }
-
-    unsigned ArchKind = llvm::ARMTargetParser::parseCPUArch(Name);
-    if (ArchKind == llvm::ARM::AK_INVALID)
-      return "";
-
+  const char *getCPUAttr() const {
+    const char *CPUAttr;
     // For most sub-arches, the build attribute CPU name is enough.
     // For Cortex variants, it's slightly different.
     switch(ArchKind) {
     default:
-      return llvm::ARMTargetParser::getCPUAttr(ArchKind);
+      CPUAttr = llvm::ARMTargetParser::getCPUAttr(ArchKind);
+      return CPUAttr ? CPUAttr : "" ;
     case llvm::ARM::AK_ARMV6M:
     case llvm::ARM::AK_ARMV6SM:
       return "6M";
@@ -4345,23 +4389,8 @@ public:
       return "8_1A";
     }
   }
-  const char *getCPUProfile(StringRef Name) const {
-    if(Name == "generic") {
-      auto subarch = getTriple().getSubArch();
-      switch (subarch) {
-        case llvm::Triple::SubArchType::ARMSubArch_v8_1a: 
-          return "A";
-        default:
-          break;
-      }
-    }
-
-    unsigned CPUArch = llvm::ARMTargetParser::parseCPUArch(Name);
-    if (CPUArch == llvm::ARM::AK_INVALID)
-      return "";
-
-    StringRef ArchName = llvm::ARMTargetParser::getArchName(CPUArch);
-    switch(llvm::ARMTargetParser::parseArchProfile(ArchName)) {
+  const char *getCPUProfile() const {
+    switch(ArchProfile) {
       case llvm::ARM::PK_A:
         return "A";
       case llvm::ARM::PK_R:
@@ -4373,35 +4402,26 @@ public:
     }
   }
   bool setCPU(const std::string &Name) override {
-    if (!getCPUDefineSuffix(Name))
+    unsigned ArchKind = llvm::ARMTargetParser::parseCPUArch(Name);
+    if (ArchKind == llvm::ARM::AK_INVALID)
       return false;
-
-    // Cortex M does not support 8 byte atomics, while general Thumb2 does.
-    StringRef Profile = getCPUProfile(Name);
-    if (Profile == "M" && MaxAtomicInlineWidth) {
-      MaxAtomicPromoteWidth = 32;
-      MaxAtomicInlineWidth = 32;
-    }
-
+    setArchInfo(llvm::ARMTargetParser::getArchName(ArchKind));
+    setAtomic();
     CPU = Name;
     return true;
   }
   bool setFPMath(StringRef Name) override;
-  bool supportsThumb(StringRef ArchName, StringRef CPUArch,
-                     unsigned CPUArchVer) const {
-    return CPUArchVer >= 7 || (CPUArch.find('T') != StringRef::npos) ||
-           (CPUArch.find('M') != StringRef::npos);
-  }
-  bool supportsThumb2(StringRef ArchName, StringRef CPUArch,
-                      unsigned CPUArchVer) const {
-    // We check both CPUArchVer and ArchName because when only triple is
-    // specified, the default CPU is arm1136j-s.
-    return ArchName.endswith("v6t2") || ArchName.endswith("v7") ||
-           ArchName.endswith("v8.1a") ||
-           ArchName.endswith("v8") || CPUArch == "6T2" || CPUArchVer >= 7;
+  bool supportsThumb(StringRef CPUAttr) const {
+    return CPUAttr.count('T') || ArchVersion >= 6;
+  }
+  bool supportsThumb2(StringRef CPUAttr) const {
+    return CPUAttr.equals("6T2") || ArchVersion >= 7;
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
+    StringRef CPUAttr    = getCPUAttr();
+    StringRef CPUProfile = getCPUProfile();
+
     // Target identification.
     Builder.defineMacro("__arm");
     Builder.defineMacro("__arm__");
@@ -4409,19 +4429,12 @@ public:
     // Target properties.
     Builder.defineMacro("__REGISTER_PREFIX__", "");
 
-    StringRef CPUArch = getCPUDefineSuffix(CPU);
-    unsigned int CPUArchVer;
-    if (CPUArch.substr(0, 1).getAsInteger<unsigned int>(10, CPUArchVer))
-      llvm_unreachable("Invalid char for architecture version number");
-    Builder.defineMacro("__ARM_ARCH_" + CPUArch + "__");
+    Builder.defineMacro("__ARM_ARCH_" + CPUAttr + "__");
 
     // ACLE 6.4.1 ARM/Thumb instruction set architecture
-    StringRef CPUProfile = getCPUProfile(CPU);
-    StringRef ArchName = getTriple().getArchName();
-
     // __ARM_ARCH is defined as an integer value indicating the current ARM ISA
-    Builder.defineMacro("__ARM_ARCH", CPUArch.substr(0, 1));
-    if (CPUArch[0] >= '8') {
+    Builder.defineMacro("__ARM_ARCH", std::to_string(ArchVersion));
+    if (ArchVersion >= 8) {
       Builder.defineMacro("__ARM_FEATURE_NUMERIC_MAXMIN");
       Builder.defineMacro("__ARM_FEATURE_DIRECTED_ROUNDING");
     }
@@ -4435,9 +4448,9 @@ public:
     // __ARM_ARCH_ISA_THUMB is defined to 1 if the core supporst the original
     // Thumb ISA (including v6-M).  It is set to 2 if the core supports the
     // Thumb-2 ISA as found in the v6T2 architecture and all v7 architecture.
-    if (supportsThumb2(ArchName, CPUArch, CPUArchVer))
+    if (supportsThumb2(CPUAttr))
       Builder.defineMacro("__ARM_ARCH_ISA_THUMB", "2");
-    else if (supportsThumb(ArchName, CPUArch, CPUArchVer))
+    else if (supportsThumb(CPUAttr))
       Builder.defineMacro("__ARM_ARCH_ISA_THUMB", "1");
 
     // __ARM_32BIT_STATE is defined to 1 if code is being generated for a 32-bit
@@ -4462,7 +4475,7 @@ public:
     // FIXME: It's more complicated than this and we don't really support
     // interworking.
     // Windows on ARM does not "support" interworking
-    if (5 <= CPUArchVer && CPUArchVer <= 8 && !getTriple().isOSWindows())
+    if (5 <= ArchVersion && ArchVersion <= 8 && !getTriple().isOSWindows())
       Builder.defineMacro("__THUMB_INTERWORK__");
 
     if (ABI == "aapcs" || ABI == "aapcs-linux" || ABI == "aapcs-vfp") {
@@ -4485,7 +4498,7 @@ public:
     if (IsThumb) {
       Builder.defineMacro("__THUMBEL__");
       Builder.defineMacro("__thumb__");
-      if (supportsThumb2(ArchName, CPUArch, CPUArchVer))
+      if (supportsThumb2(CPUAttr))
         Builder.defineMacro("__thumb2__");
     }
     if (((HWDiv & HWDivThumb) && IsThumb) || ((HWDiv & HWDivARM) && !IsThumb))
@@ -4508,7 +4521,7 @@ public:
     // the VFP define, hence the soft float and arch check. This is subtly
     // different from gcc, we follow the intent which was that it should be set
     // when Neon instructions are actually available.
-    if ((FPU & NeonFPU) && !SoftFloat && CPUArchVer >= 7) {
+    if ((FPU & NeonFPU) && !SoftFloat && ArchVersion >= 7) {
       Builder.defineMacro("__ARM_NEON");
       Builder.defineMacro("__ARM_NEON__");
     }
@@ -4525,18 +4538,18 @@ public:
     if (Crypto)
       Builder.defineMacro("__ARM_FEATURE_CRYPTO");
 
-    if (CPUArchVer >= 6 && CPUArch != "6M") {
+    if (ArchVersion >= 6 && CPUAttr != "6M") {
       Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
       Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
       Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
       Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
     }
 
-    bool is5EOrAbove = (CPUArchVer >= 6 ||
-                        (CPUArchVer == 5 &&
-                         CPUArch.find('E') != StringRef::npos));
-    bool is32Bit = (!IsThumb || supportsThumb2(ArchName, CPUArch, CPUArchVer));
-    if (is5EOrAbove && is32Bit && (CPUProfile != "M" || CPUArch  == "7EM"))
+    bool is5EOrAbove = (ArchVersion >= 6 ||
+                       (ArchVersion == 5 && CPUAttr.count('E')));
+    // FIXME: We are not getting all 32-bit ARM architectures
+    bool is32Bit = (!IsThumb || supportsThumb2(CPUAttr));
+    if (is5EOrAbove && is32Bit && (CPUProfile != "M" || CPUAttr  == "7EM"))
       Builder.defineMacro("__ARM_FEATURE_DSP");
   }
   void getTargetBuiltins(const Builtin::Info *&Records,
diff --git a/lib/Basic/VirtualFileSystem.cpp b/lib/Basic/VirtualFileSystem.cpp
index 8a882e1..a36102c 100644
--- a/lib/Basic/VirtualFileSystem.cpp
+++ b/lib/Basic/VirtualFileSystem.cpp
@@ -324,20 +324,6 @@ directory_iterator OverlayFileSystem::dir_begin(const Twine &Dir,
 // VFSFromYAML implementation
 //===-----------------------------------------------------------------------===/
 
-// Allow DenseMap<StringRef, ...>.  This is useful below because we know all the
-// strings are literals and will outlive the map, and there is no reason to
-// store them.
-namespace llvm {
-  template<>
-  struct DenseMapInfo<StringRef> {
-    // This assumes that "" will never be a valid key.
-    static inline StringRef getEmptyKey() { return StringRef(""); }
-    static inline StringRef getTombstoneKey() { return StringRef(); }
-    static unsigned getHashValue(StringRef Val) { return HashString(Val); }
-    static bool isEqual(StringRef LHS, StringRef RHS) { return LHS == RHS; }
-  };
-}
-
 namespace {
 
 enum EntryKind {
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index f5edea7..0bccad0 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -536,7 +536,6 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS;
   Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath;
   Options.StackAlignmentOverride = CodeGenOpts.StackAlignment;
-  Options.TrapFuncName = CodeGenOpts.TrapFuncName;
   Options.PositionIndependentExecutable = LangOpts.PIELevel != 0;
   Options.FunctionSections = CodeGenOpts.FunctionSections;
   Options.DataSections = CodeGenOpts.DataSections;
@@ -604,7 +603,10 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
   if (!TM)
     TM.reset(CreateTargetMachine(UsesCodeGen));
 
-  if (UsesCodeGen && !TM) return;
+  if (UsesCodeGen && !TM)
+    return;
+  if (TM)
+    TheModule->setDataLayout(*TM->getDataLayout());
   CreatePasses();
 
   switch (Action) {
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index a14daac..2ec909b 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -82,9 +82,9 @@ static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
 
 /// Utility to insert an atomic instruction based on Instrinsic::ID
 /// and the expression node.
-static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
-                               llvm::AtomicRMWInst::BinOp Kind,
-                               const CallExpr *E) {
+static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
+                                    llvm::AtomicRMWInst::BinOp Kind,
+                                    const CallExpr *E) {
   QualType T = E->getType();
   assert(E->getArg(0)->getType()->isPointerType());
   assert(CGF.getContext().hasSameUnqualifiedType(T,
@@ -108,8 +108,13 @@ static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
   llvm::Value *Result =
       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
                                   llvm::SequentiallyConsistent);
-  Result = EmitFromInt(CGF, Result, T, ValueType);
-  return RValue::get(Result);
+  return EmitFromInt(CGF, Result, T, ValueType);
+}
+
+static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
+                               llvm::AtomicRMWInst::BinOp Kind,
+                               const CallExpr *E) {
+  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
 }
 
 /// Utility to insert an atomic instruction based Instrinsic::ID and
@@ -151,6 +156,47 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
   return RValue::get(Result);
 }
 
+/// @brief Utility to insert an atomic cmpxchg instruction.
+///
+/// @param CGF The current codegen function.
+/// @param E   Builtin call expression to convert to cmpxchg.
+///            arg0 - address to operate on
+///            arg1 - value to compare with
+///            arg2 - new value
+/// @param ReturnBool Specifies whether to return success flag of
+///                   cmpxchg result or the old value.
+///
+/// @returns result of cmpxchg, according to ReturnBool
+static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
+                                     bool ReturnBool) {
+  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
+  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
+  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
+
+  llvm::IntegerType *IntType = llvm::IntegerType::get(
+      CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
+  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
+
+  Value *Args[3];
+  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
+  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
+  llvm::Type *ValueType = Args[1]->getType();
+  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
+  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
+
+  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
+                                                llvm::SequentiallyConsistent,
+                                                llvm::SequentiallyConsistent);
+  if (ReturnBool)
+    // Extract boolean success flag and zext it to int.
+    return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
+                                  CGF.ConvertType(E->getType()));
+  else
+    // Extract old value and emit it using the same type as compare value.
+    return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
+                       ValueType);
+}
+
 /// EmitFAbs - Emit a call to @llvm.fabs().
 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
@@ -497,14 +543,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
     return RValue::get(Builder.CreateCall(F, {Begin, End}));
   }
-  case Builtin::BI__builtin_trap: {
-    Value *F = CGM.getIntrinsic(Intrinsic::trap);
-    return RValue::get(Builder.CreateCall(F, {}));
-  }
-  case Builtin::BI__debugbreak: {
-    Value *F = CGM.getIntrinsic(Intrinsic::debugtrap);
-    return RValue::get(Builder.CreateCall(F, {}));
-  }
+  case Builtin::BI__builtin_trap:
+    return RValue::get(EmitTrapCall(Intrinsic::trap));
+  case Builtin::BI__debugbreak:
+    return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
   case Builtin::BI__builtin_unreachable: {
     if (SanOpts.has(SanitizerKind::Unreachable)) {
       SanitizerScope SanScope(this);
@@ -1057,58 +1099,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   case Builtin::BI__sync_val_compare_and_swap_2:
   case Builtin::BI__sync_val_compare_and_swap_4:
   case Builtin::BI__sync_val_compare_and_swap_8:
-  case Builtin::BI__sync_val_compare_and_swap_16: {
-    QualType T = E->getType();
-    llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
-    unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
-
-    llvm::IntegerType *IntType =
-      llvm::IntegerType::get(getLLVMContext(),
-                             getContext().getTypeSize(T));
-    llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
-
-    Value *Args[3];
-    Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
-    Args[1] = EmitScalarExpr(E->getArg(1));
-    llvm::Type *ValueType = Args[1]->getType();
-    Args[1] = EmitToInt(*this, Args[1], T, IntType);
-    Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
-
-    Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
-                                                llvm::SequentiallyConsistent,
-                                                llvm::SequentiallyConsistent);
-    Result = Builder.CreateExtractValue(Result, 0);
-    Result = EmitFromInt(*this, Result, T, ValueType);
-    return RValue::get(Result);
-  }
+  case Builtin::BI__sync_val_compare_and_swap_16:
+    return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
 
   case Builtin::BI__sync_bool_compare_and_swap_1:
   case Builtin::BI__sync_bool_compare_and_swap_2:
   case Builtin::BI__sync_bool_compare_and_swap_4:
   case Builtin::BI__sync_bool_compare_and_swap_8:
-  case Builtin::BI__sync_bool_compare_and_swap_16: {
-    QualType T = E->getArg(1)->getType();
-    llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
-    unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
-
-    llvm::IntegerType *IntType =
-      llvm::IntegerType::get(getLLVMContext(),
-                             getContext().getTypeSize(T));
-    llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
-
-    Value *Args[3];
-    Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
-    Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
-    Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
-
-    Value *Pair = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
-                                              llvm::SequentiallyConsistent,
-                                              llvm::SequentiallyConsistent);
-    Value *Result = Builder.CreateExtractValue(Pair, 1);
-    // zext bool to int.
-    Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
-    return RValue::get(Result);
-  }
+  case Builtin::BI__sync_bool_compare_and_swap_16:
+    return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
 
   case Builtin::BI__sync_swap_1:
   case Builtin::BI__sync_swap_2:
@@ -1880,6 +1879,9 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
     return EmitAMDGPUBuiltinExpr(BuiltinID, E);
   case llvm::Triple::systemz:
     return EmitSystemZBuiltinExpr(BuiltinID, E);
+  case llvm::Triple::nvptx:
+  case llvm::Triple::nvptx64:
+    return EmitNVPTXBuiltinExpr(BuiltinID, E);
   default:
     return nullptr;
   }
@@ -3339,6 +3341,42 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
   return Builder.CreateCall(F, { Metadata, ArgValue });
 }
 
+/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
+/// argument that specifies the vector type.
+static bool HasExtraNeonArgument(unsigned BuiltinID) {
+  switch (BuiltinID) {
+  default: break;
+  case NEON::BI__builtin_neon_vget_lane_i8:
+  case NEON::BI__builtin_neon_vget_lane_i16:
+  case NEON::BI__builtin_neon_vget_lane_i32:
+  case NEON::BI__builtin_neon_vget_lane_i64:
+  case NEON::BI__builtin_neon_vget_lane_f32:
+  case NEON::BI__builtin_neon_vgetq_lane_i8:
+  case NEON::BI__builtin_neon_vgetq_lane_i16:
+  case NEON::BI__builtin_neon_vgetq_lane_i32:
+  case NEON::BI__builtin_neon_vgetq_lane_i64:
+  case NEON::BI__builtin_neon_vgetq_lane_f32:
+  case NEON::BI__builtin_neon_vset_lane_i8:
+  case NEON::BI__builtin_neon_vset_lane_i16:
+  case NEON::BI__builtin_neon_vset_lane_i32:
+  case NEON::BI__builtin_neon_vset_lane_i64:
+  case NEON::BI__builtin_neon_vset_lane_f32:
+  case NEON::BI__builtin_neon_vsetq_lane_i8:
+  case NEON::BI__builtin_neon_vsetq_lane_i16:
+  case NEON::BI__builtin_neon_vsetq_lane_i32:
+  case NEON::BI__builtin_neon_vsetq_lane_i64:
+  case NEON::BI__builtin_neon_vsetq_lane_f32:
+  case NEON::BI__builtin_neon_vsha1h_u32:
+  case NEON::BI__builtin_neon_vsha1cq_u32:
+  case NEON::BI__builtin_neon_vsha1pq_u32:
+  case NEON::BI__builtin_neon_vsha1mq_u32:
+  case ARM::BI_MoveToCoprocessor:
+  case ARM::BI_MoveToCoprocessor2:
+    return false;
+  }
+  return true;
+}
+
 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
                                            const CallExpr *E) {
   if (auto Hint = GetValueForARMHint(BuiltinID))
@@ -3591,7 +3629,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
 
   SmallVector<Value*, 4> Ops;
   llvm::Value *Align = nullptr;
-  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
+  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
+  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
+  for (unsigned i = 0, e = NumArgs; i != e; i++) {
     if (i == 0) {
       switch (BuiltinID) {
       case NEON::BI__builtin_neon_vld1_v:
@@ -3666,8 +3706,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
 
   switch (BuiltinID) {
   default: break;
-  // vget_lane and vset_lane are not overloaded and do not have an extra
-  // argument that specifies the vector type.
+
   case NEON::BI__builtin_neon_vget_lane_i8:
   case NEON::BI__builtin_neon_vget_lane_i16:
   case NEON::BI__builtin_neon_vget_lane_i32:
@@ -3678,8 +3717,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vgetq_lane_i32:
   case NEON::BI__builtin_neon_vgetq_lane_i64:
   case NEON::BI__builtin_neon_vgetq_lane_f32:
-    return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
-                                        "vget_lane");
+    return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
+
   case NEON::BI__builtin_neon_vset_lane_i8:
   case NEON::BI__builtin_neon_vset_lane_i16:
   case NEON::BI__builtin_neon_vset_lane_i32:
@@ -3690,29 +3729,34 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vsetq_lane_i32:
   case NEON::BI__builtin_neon_vsetq_lane_i64:
   case NEON::BI__builtin_neon_vsetq_lane_f32:
-    Ops.push_back(EmitScalarExpr(E->getArg(2)));
     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
 
-  // Non-polymorphic crypto instructions also not overloaded
   case NEON::BI__builtin_neon_vsha1h_u32:
-    Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
                         "vsha1h");
   case NEON::BI__builtin_neon_vsha1cq_u32:
-    Ops.push_back(EmitScalarExpr(E->getArg(2)));
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
                         "vsha1h");
   case NEON::BI__builtin_neon_vsha1pq_u32:
-    Ops.push_back(EmitScalarExpr(E->getArg(2)));
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
                         "vsha1h");
   case NEON::BI__builtin_neon_vsha1mq_u32:
-    Ops.push_back(EmitScalarExpr(E->getArg(2)));
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
                         "vsha1h");
+
+  // The ARM _MoveToCoprocessor builtins put the input register value as
+  // the first argument, but the LLVM intrinsic expects it as the third one.
+  case ARM::BI_MoveToCoprocessor:
+  case ARM::BI_MoveToCoprocessor2: {
+    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 
+                                   Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
+    return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
+                                  Ops[3], Ops[4], Ops[5]});
+  }
   }
 
   // Get the last argument, which specifies the vector type.
+  assert(HasExtraArg);
   llvm::APSInt Result;
   const Expr *Arg = E->getArg(E->getNumArgs()-1);
   if (!Arg->isIntegerConstantExpr(Result, getContext()))
@@ -6053,6 +6097,83 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
 
   switch (BuiltinID) {
   default: return nullptr;
+  case X86::BI__builtin_cpu_supports: {
+    const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
+    StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
+
+    // TODO: When/if this becomes more than x86 specific then use a TargetInfo
+    // based mapping.
+    // Processor features and mapping to processor feature value.
+    enum X86Features {
+      CMOV = 0,
+      MMX,
+      POPCNT,
+      SSE,
+      SSE2,
+      SSE3,
+      SSSE3,
+      SSE4_1,
+      SSE4_2,
+      AVX,
+      AVX2,
+      SSE4_A,
+      FMA4,
+      XOP,
+      FMA,
+      AVX512F,
+      BMI,
+      BMI2,
+      MAX
+    };
+
+    X86Features Feature = StringSwitch<X86Features>(FeatureStr)
+                              .Case("cmov", X86Features::CMOV)
+                              .Case("mmx", X86Features::MMX)
+                              .Case("popcnt", X86Features::POPCNT)
+                              .Case("sse", X86Features::SSE)
+                              .Case("sse2", X86Features::SSE2)
+                              .Case("sse3", X86Features::SSE3)
+                              .Case("sse4.1", X86Features::SSE4_1)
+                              .Case("sse4.2", X86Features::SSE4_2)
+                              .Case("avx", X86Features::AVX)
+                              .Case("avx2", X86Features::AVX2)
+                              .Case("sse4a", X86Features::SSE4_A)
+                              .Case("fma4", X86Features::FMA4)
+                              .Case("xop", X86Features::XOP)
+                              .Case("fma", X86Features::FMA)
+                              .Case("avx512f", X86Features::AVX512F)
+                              .Case("bmi", X86Features::BMI)
+                              .Case("bmi2", X86Features::BMI2)
+                              .Default(X86Features::MAX);
+    assert(Feature != X86Features::MAX && "Invalid feature!");
+
+    // Matching the struct layout from the compiler-rt/libgcc structure that is
+    // filled in:
+    // unsigned int __cpu_vendor;
+    // unsigned int __cpu_type;
+    // unsigned int __cpu_subtype;
+    // unsigned int __cpu_features[1];
+    llvm::Type *STy = llvm::StructType::get(
+        Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
+
+    // Grab the global __cpu_model.
+    llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
+
+    // Grab the first (0th) element from the field __cpu_features off of the
+    // global in the struct STy.
+    Value *Idxs[] = {
+      ConstantInt::get(Int32Ty, 0),
+      ConstantInt::get(Int32Ty, 3),
+      ConstantInt::get(Int32Ty, 0)
+    };
+    Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
+    Value *Features = Builder.CreateLoad(CpuFeatures);
+
+    // Check the value of the bit corresponding to the feature requested.
+    Value *Bitset = Builder.CreateAnd(
+        Features, llvm::ConstantInt::get(Int32Ty, 1 << Feature));
+    return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
+  }
   case X86::BI_mm_prefetch: {
     Value *Address = EmitScalarExpr(E->getArg(0));
     Value *RW = ConstantInt::get(Int32Ty, 0);
@@ -6512,6 +6633,13 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
     llvm::Function *F = CGM.getIntrinsic(ID);
     return Builder.CreateCall(F, Ops, "");
   }
+  case PPC::BI__builtin_vsx_xvrspip:
+  case PPC::BI__builtin_vsx_xvrdpip:
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    ID = Intrinsic::ceil;
+    llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
+    return Builder.CreateCall(F, X);
   }
 }
 
@@ -6859,3 +6987,72 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
     return nullptr;
   }
 }
+
+Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
+                                             const CallExpr *E) {
+  switch (BuiltinID) {
+  case NVPTX::BI__nvvm_atom_add_gen_i:
+  case NVPTX::BI__nvvm_atom_add_gen_l:
+  case NVPTX::BI__nvvm_atom_add_gen_ll:
+    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
+
+  case NVPTX::BI__nvvm_atom_sub_gen_i:
+  case NVPTX::BI__nvvm_atom_sub_gen_l:
+  case NVPTX::BI__nvvm_atom_sub_gen_ll:
+    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
+
+  case NVPTX::BI__nvvm_atom_and_gen_i:
+  case NVPTX::BI__nvvm_atom_and_gen_l:
+  case NVPTX::BI__nvvm_atom_and_gen_ll:
+    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
+
+  case NVPTX::BI__nvvm_atom_or_gen_i:
+  case NVPTX::BI__nvvm_atom_or_gen_l:
+  case NVPTX::BI__nvvm_atom_or_gen_ll:
+    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
+
+  case NVPTX::BI__nvvm_atom_xor_gen_i:
+  case NVPTX::BI__nvvm_atom_xor_gen_l:
+  case NVPTX::BI__nvvm_atom_xor_gen_ll:
+    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
+
+  case NVPTX::BI__nvvm_atom_xchg_gen_i:
+  case NVPTX::BI__nvvm_atom_xchg_gen_l:
+  case NVPTX::BI__nvvm_atom_xchg_gen_ll:
+    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
+
+  case NVPTX::BI__nvvm_atom_max_gen_i:
+  case NVPTX::BI__nvvm_atom_max_gen_l:
+  case NVPTX::BI__nvvm_atom_max_gen_ll:
+  case NVPTX::BI__nvvm_atom_max_gen_ui:
+  case NVPTX::BI__nvvm_atom_max_gen_ul:
+  case NVPTX::BI__nvvm_atom_max_gen_ull:
+    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
+
+  case NVPTX::BI__nvvm_atom_min_gen_i:
+  case NVPTX::BI__nvvm_atom_min_gen_l:
+  case NVPTX::BI__nvvm_atom_min_gen_ll:
+  case NVPTX::BI__nvvm_atom_min_gen_ui:
+  case NVPTX::BI__nvvm_atom_min_gen_ul:
+  case NVPTX::BI__nvvm_atom_min_gen_ull:
+    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
+
+  case NVPTX::BI__nvvm_atom_cas_gen_i:
+  case NVPTX::BI__nvvm_atom_cas_gen_l:
+  case NVPTX::BI__nvvm_atom_cas_gen_ll:
+    return MakeAtomicCmpXchgValue(*this, E, true);
+
+  case NVPTX::BI__nvvm_atom_add_gen_f: {
+    Value *Ptr = EmitScalarExpr(E->getArg(0));
+    Value *Val = EmitScalarExpr(E->getArg(1));
+    // atomicrmw only deals with integer arguments so we need to use
+    // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
+    Value *FnALAF32 =
+        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
+    return Builder.CreateCall(FnALAF32, {Ptr, Val});
+  }
+
+  default:
+    return nullptr;
+  }
+}
diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp
index cb7e6df..dc16616 100644
--- a/lib/CodeGen/CGCXXABI.cpp
+++ b/lib/CodeGen/CGCXXABI.cpp
@@ -130,10 +130,9 @@ CGCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) {
   return GetBogusMemberPointer(QualType(MPT, 0));
 }
 
-llvm::Constant *CGCXXABI::EmitMemberPointer(const CXXMethodDecl *MD) {
-  return GetBogusMemberPointer(
-                         CGM.getContext().getMemberPointerType(MD->getType(),
-                                         MD->getParent()->getTypeForDecl()));
+llvm::Constant *CGCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) {
+  return GetBogusMemberPointer(CGM.getContext().getMemberPointerType(
+      MD->getType(), MD->getParent()->getTypeForDecl()));
 }
 
 llvm::Constant *CGCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT,
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index b6b4ee6..436b96a 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -172,7 +172,7 @@ public:
   virtual llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT);
 
   /// Create a member pointer for the given method.
-  virtual llvm::Constant *EmitMemberPointer(const CXXMethodDecl *MD);
+  virtual llvm::Constant *EmitMemberFunctionPointer(const CXXMethodDecl *MD);
 
   /// Create a member pointer for the given field.
   virtual llvm::Constant *EmitMemberDataPointer(const MemberPointerType *MPT,
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 58ef171..0535c05 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -32,7 +32,6 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include <sstream>
 using namespace clang;
 using namespace CodeGen;
 
@@ -1453,6 +1452,8 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     // Attributes that should go on the call site only.
     if (!CodeGenOpts.SimplifyLibCalls)
       FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
+    if (!CodeGenOpts.TrapFuncName.empty())
+      FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
   } else {
     // Attributes that should go on the function, but not the call site.
     if (!CodeGenOpts.DisableFPElim) {
@@ -1493,12 +1494,16 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     // avoid putting features in the target-features set if we know it'll be
     // one of the default features in the backend, e.g. corei7-avx and +avx or
     // figure out non-explicit dependencies.
-    std::vector<std::string> Features(getTarget().getTargetOpts().Features);
+    // Canonicalize the existing features in a new feature map.
+    // TODO: Migrate the existing backends to keep the map around rather than
+    // the vector.
+    llvm::StringMap<bool> FeatureMap;
+    for (auto F : getTarget().getTargetOpts().Features) {
+      const char *Name = F.c_str();
+      bool Enabled = Name[0] == '+';
+      getTarget().setFeatureEnabled(FeatureMap, Name + 1, Enabled);
+    }
 
-    // TODO: The target attribute complicates this further by allowing multiple
-    // additional features to be tacked on to the feature string for a
-    // particular function. For now we simply append to the set of features and
-    // let backend resolution fix them up.
     const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
     if (FD) {
       if (const TargetAttr *TD = FD->getAttr<TargetAttr>()) {
@@ -1507,7 +1512,7 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
         FeaturesStr.split(AttrFeatures, ",");
 
         // Grab the various features and prepend a "+" to turn on the feature to
-        // the backend and add them to our existing set of Features.
+        // the backend and add them to our existing set of features.
         for (auto &Feature : AttrFeatures) {
           // While we're here iterating check for a different target cpu.
           if (Feature.startswith("arch="))
@@ -1521,24 +1526,28 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
 	    // attributes on the function.
 	    ;
 	  else if (Feature.startswith("mno-"))
-            Features.push_back("-" + Feature.split("-").second.str());
+            getTarget().setFeatureEnabled(FeatureMap, Feature.split("-").second,
+                                          false);
           else
-            Features.push_back("+" + Feature.str());
-	}
+            getTarget().setFeatureEnabled(FeatureMap, Feature, true);
+        }
       }
     }
 
+    // Produce the canonical string for this set of features.
+    std::vector<std::string> Features;
+    for (llvm::StringMap<bool>::const_iterator it = FeatureMap.begin(),
+                                               ie = FeatureMap.end();
+         it != ie; ++it)
+      Features.push_back((it->second ? "+" : "-") + it->first().str());
+
     // Now add the target-cpu and target-features to the function.
     if (TargetCPU != "")
       FuncAttrs.addAttribute("target-cpu", TargetCPU);
     if (!Features.empty()) {
-      std::stringstream TargetFeatures;
-      std::copy(Features.begin(), Features.end(),
-                std::ostream_iterator<std::string>(TargetFeatures, ","));
-
-      // The drop_back gets rid of the trailing space.
+      std::sort(Features.begin(), Features.end());
       FuncAttrs.addAttribute("target-features",
-                             StringRef(TargetFeatures.str()).drop_back(1));
+                             llvm::join(Features.begin(), Features.end(), ","));
     }
   }
 
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index 1d2b787..62df982 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -29,13 +29,12 @@
 using namespace clang;
 using namespace CodeGen;
 
-static CharUnits
-ComputeNonVirtualBaseClassOffset(ASTContext &Context,
-                                 const CXXRecordDecl *DerivedClass,
-                                 CastExpr::path_const_iterator Start,
-                                 CastExpr::path_const_iterator End) {
+CharUnits CodeGenModule::computeNonVirtualBaseClassOffset(
+    const CXXRecordDecl *DerivedClass, CastExpr::path_const_iterator Start,
+    CastExpr::path_const_iterator End) {
   CharUnits Offset = CharUnits::Zero();
 
+  const ASTContext &Context = getContext();
   const CXXRecordDecl *RD = DerivedClass;
 
   for (CastExpr::path_const_iterator I = Start; I != End; ++I) {
@@ -64,8 +63,7 @@ CodeGenModule::GetNonVirtualBaseClassOffset(const CXXRecordDecl *ClassDecl,
   assert(PathBegin != PathEnd && "Base path should not be empty!");
 
   CharUnits Offset =
-    ComputeNonVirtualBaseClassOffset(getContext(), ClassDecl,
-                                     PathBegin, PathEnd);
+      computeNonVirtualBaseClassOffset(ClassDecl, PathBegin, PathEnd);
   if (Offset.isZero())
     return nullptr;
 
@@ -158,9 +156,8 @@ llvm::Value *CodeGenFunction::GetAddressOfBaseClass(
   // Compute the static offset of the ultimate destination within its
   // allocating subobject (the virtual base, if there is one, or else
   // the "complete" object that we see).
-  CharUnits NonVirtualOffset =
-    ComputeNonVirtualBaseClassOffset(getContext(), VBase ? VBase : Derived,
-                                     Start, PathEnd);
+  CharUnits NonVirtualOffset = CGM.computeNonVirtualBaseClassOffset(
+      VBase ? VBase : Derived, Start, PathEnd);
 
   // If there's a virtual step, we can sometimes "devirtualize" it.
   // For now, that's limited to when the derived type is final.
@@ -1342,6 +1339,11 @@ FieldHasTrivialDestructorBody(ASTContext &Context,
     return true;
 
   CXXRecordDecl *FieldClassDecl = cast<CXXRecordDecl>(RT->getDecl());
+
+  // The destructor for an implicit anonymous union member is never invoked.
+  if (FieldClassDecl->isUnion() && FieldClassDecl->isAnonymousStructOrUnion())
+    return false;
+
   return HasTrivialDestructorBody(Context, FieldClassDecl, FieldClassDecl);
 }
 
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index a20e39f..8c4b4b3 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -27,6 +27,8 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Version.h"
 #include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/PreprocessorOptions.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Constants.h"
@@ -1179,12 +1181,13 @@ void CGDebugInfo::CollectCXXMemberFunctions(
     // the member being added to type units by LLVM, while still allowing it
     // to be emitted into the type declaration/reference inside the compile
     // unit.
+    // Ditto 'nodebug' methods, for consistency with CodeGenFunction.cpp.
     // FIXME: Handle Using(Shadow?)Decls here to create
     // DW_TAG_imported_declarations inside the class for base decls brought into
     // derived classes. GDB doesn't seem to notice/leverage these when I tried
     // it, so I'm not rushing to fix this. (GCC seems to produce them, if
     // referenced)
-    if (!Method || Method->isImplicit())
+    if (!Method || Method->isImplicit() || Method->hasAttr<NoDebugAttr>())
       continue;
 
     if (Method->getType()->getAs<FunctionProtoType>()->getContainedAutoType())
@@ -1279,7 +1282,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
       // Member function pointers have special support for building them, though
       // this is currently unsupported in LLVM CodeGen.
       else if ((MD = dyn_cast<CXXMethodDecl>(D)) && MD->isInstance())
-        V = CGM.getCXXABI().EmitMemberPointer(MD);
+        V = CGM.getCXXABI().EmitMemberFunctionPointer(MD);
       else if (const auto *FD = dyn_cast<FunctionDecl>(D))
         V = CGM.GetAddrOfFunction(FD);
       // Member data pointers have special handling too to compute the fixed
@@ -1660,6 +1663,47 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
   return CreateTypeDefinition(Ty, Unit);
 }
 
+llvm::DIModule *
+CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod) {
+  auto it = ModuleRefCache.find(Mod.Signature);
+  if (it != ModuleRefCache.end())
+    return it->second;
+
+  // Macro definitions that were defined with "-D" on the command line.
+  SmallString<128> ConfigMacros;
+  {
+    llvm::raw_svector_ostream OS(ConfigMacros);
+    const auto &PPOpts = CGM.getPreprocessorOpts();
+    unsigned I = 0;
+    // Translate the macro definitions back into a commmand line.
+    for (auto &M : PPOpts.Macros) {
+      if (++I > 1)
+        OS << " ";
+      const std::string &Macro = M.first;
+      bool Undef = M.second;
+      OS << "\"-" << (Undef ? 'U' : 'D');
+      for (char c : Macro)
+        switch (c) {
+        case '\\' : OS << "\\\\"; break;
+        case '"'  : OS << "\\\""; break;
+        default: OS << c;
+        }
+      OS << '\"';
+    }
+  }
+  llvm::DIBuilder DIB(CGM.getModule());
+  auto *CU = DIB.createCompileUnit(
+      TheCU->getSourceLanguage(), internString(Mod.ModuleName),
+      internString(Mod.Path), TheCU->getProducer(), true, StringRef(), 0,
+      internString(Mod.ASTFile), llvm::DIBuilder::FullDebug, Mod.Signature);
+  llvm::DIModule *ModuleRef =
+      DIB.createModule(CU, Mod.ModuleName, ConfigMacros, internString(Mod.Path),
+                       internString(CGM.getHeaderSearchOpts().Sysroot));
+  DIB.finalize();
+  ModuleRefCache.insert(std::make_pair(Mod.Signature, ModuleRef));
+  return ModuleRef;
+}
+
 llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
                                                 llvm::DIFile *Unit) {
   ObjCInterfaceDecl *ID = Ty->getDecl();
@@ -3303,6 +3347,15 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) {
         getLineNumber(USD.getLocation()));
 }
 
+void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) {
+  auto *Reader = CGM.getContext().getExternalSource();
+  auto Info = Reader->getSourceDescriptor(*ID.getImportedModule());
+  DBuilder.createImportedDeclaration(
+    getCurrentContextDescriptor(cast<Decl>(ID.getDeclContext())),
+                                getOrCreateModuleRef(Info),
+                                getLineNumber(ID.getLocation()));
+}
+
 llvm::DIImportedEntity *
 CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) {
   if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 8509e07..10d3b0d 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -83,6 +83,9 @@ class CGDebugInfo {
   /// which may change.
   llvm::SmallVector<ObjCInterfaceCacheEntry, 32> ObjCInterfaceCache;
 
+  /// \brief Cache of references to AST files such as PCHs or modules.
+  llvm::DenseMap<uint64_t, llvm::DIModule *> ModuleRefCache;
+
   /// \brief list of interfaces we want to keep even if orphaned.
   std::vector<void *> RetainedTypes;
 
@@ -289,6 +292,9 @@ public:
   /// \brief Emit C++ using declaration.
   void EmitUsingDecl(const UsingDecl &UD);
 
+  /// \brief Emit an @import declaration.
+  void EmitImportDecl(const ImportDecl &ID);
+
   /// \brief Emit C++ namespace alias.
   llvm::DIImportedEntity *EmitNamespaceAlias(const NamespaceAliasDecl &NA);
 
@@ -344,6 +350,10 @@ private:
   /// necessary.
   llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg);
 
+  /// \brief Get a reference to a clang module.
+  llvm::DIModule *
+  getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod);
+
   /// \brief Get the type from the cache or create a new
   /// partial type if necessary.
   llvm::DIType *getOrCreateLimitedType(const RecordType *Ty, llvm::DIFile *F);
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 07dbce4..839c2e4 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -579,9 +579,9 @@ static bool isAccessedBy(const VarDecl &var, const Stmt *s) {
     }
   }
 
-  for (Stmt::const_child_range children = s->children(); children; ++children)
-    // children might be null; as in missing decl or conditional of an if-stmt.
-    if ((*children) && isAccessedBy(var, *children))
+  for (const Stmt *SubStmt : s->children())
+    // SubStmt might be null; as in missing decl or conditional of an if-stmt.
+    if (SubStmt && isAccessedBy(var, SubStmt))
       return true;
 
   return false;
@@ -1074,8 +1074,8 @@ static bool isCapturedBy(const VarDecl &var, const Expr *e) {
     return false;
   }
 
-  for (Stmt::const_child_range children = e->children(); children; ++children)
-    if (isCapturedBy(var, cast<Expr>(*children)))
+  for (const Stmt *SubStmt : e->children())
+    if (isCapturedBy(var, cast<Expr>(SubStmt)))
       return true;
 
   return false;
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 1a76afa..175763c 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -2403,8 +2403,7 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked) {
     TrapBB = createBasicBlock("trap");
     Builder.CreateCondBr(Checked, Cont, TrapBB);
     EmitBlock(TrapBB);
-    llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::trap);
-    llvm::CallInst *TrapCall = Builder.CreateCall(F, {});
+    llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
     TrapCall->setDoesNotReturn();
     TrapCall->setDoesNotThrow();
     Builder.CreateUnreachable();
@@ -2415,6 +2414,18 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked) {
   EmitBlock(Cont);
 }
 
+llvm::CallInst *CodeGenFunction::EmitTrapCall(llvm::Intrinsic::ID IntrID) {
+  llvm::CallInst *TrapCall =
+      Builder.CreateCall(CGM.getIntrinsic(IntrID), {});
+
+  if (!CGM.getCodeGenOpts().TrapFuncName.empty())
+    TrapCall->addAttribute(llvm::AttributeSet::FunctionIndex,
+                           "trap-func-name",
+                           CGM.getCodeGenOpts().TrapFuncName);
+
+  return TrapCall;
+}
+
 /// isSimpleArrayDecayOperand - If the specified expr is a simple decay from an
 /// array to pointer, return the array subexpression.
 static const Expr *isSimpleArrayDecayOperand(const Expr *E) {
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index f0f706d..c7adcca 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -1548,7 +1548,8 @@ namespace {
         // The size of an element, multiplied by the number of elements.
         llvm::Value *Size
           = llvm::ConstantInt::get(SizeTy, ElementTypeSize.getQuantity());
-        Size = CGF.Builder.CreateMul(Size, NumElements);
+        if (NumElements)
+          Size = CGF.Builder.CreateMul(Size, NumElements);
 
         // Plus the size of the cookie if applicable.
         if (!CookieSize.isZero()) {
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index acfb9b6..a15c151 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -1452,7 +1452,7 @@ CodeGenModule::getMemberPointerConstant(const UnaryOperator *uo) {
 
   // A member function pointer.
   if (const CXXMethodDecl *method = dyn_cast<CXXMethodDecl>(decl))
-    return getCXXABI().EmitMemberPointer(method);
+    return getCXXABI().EmitMemberFunctionPointer(method);
 
   // Otherwise, a member data pointer.
   uint64_t fieldOffset = getContext().getFieldOffset(decl);
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 330a0df..c73f118 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -16,6 +16,7 @@
 #include "CGDebugInfo.h"
 #include "CGObjCRuntime.h"
 #include "CodeGenModule.h"
+#include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/RecordLayout.h"
@@ -2037,6 +2038,13 @@ ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
 
       return size;
     }
+  } else if (E->getKind() == UETT_OpenMPRequiredSimdAlign) {
+    auto Alignment =
+        CGF.getContext()
+            .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
+                E->getTypeOfArgument()->getPointeeType()))
+            .getQuantity();
+    return llvm::ConstantInt::get(CGF.SizeTy, Alignment);
   }
 
   // If this isn't sizeof(vla), the result must be constant; use the constant
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index 9981fcc..747326e 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -55,13 +55,15 @@ llvm::Value *CodeGenFunction::EmitObjCStringLiteral(const ObjCStringLiteral *E)
 
 /// EmitObjCBoxedExpr - This routine generates code to call
 /// the appropriate expression boxing method. This will either be
-/// one of +[NSNumber numberWith<Type>:], or +[NSString stringWithUTF8String:].
+/// one of +[NSNumber numberWith<Type>:], or +[NSString stringWithUTF8String:],
+/// or [NSValue valueWithBytes:objCType:].
 ///
 llvm::Value *
 CodeGenFunction::EmitObjCBoxedExpr(const ObjCBoxedExpr *E) {
   // Generate the correct selector for this literal's concrete type.
   // Get the method.
   const ObjCMethodDecl *BoxingMethod = E->getBoxingMethod();
+  const Expr *SubExpr = E->getSubExpr();
   assert(BoxingMethod && "BoxingMethod is null");
   assert(BoxingMethod->isClassMethod() && "BoxingMethod must be a class method");
   Selector Sel = BoxingMethod->getSelector();
@@ -74,7 +76,35 @@ CodeGenFunction::EmitObjCBoxedExpr(const ObjCBoxedExpr *E) {
   llvm::Value *Receiver = Runtime.GetClass(*this, ClassDecl);
 
   CallArgList Args;
-  EmitCallArgs(Args, BoxingMethod, E->arg_begin(), E->arg_end());
+  const ParmVarDecl *ArgDecl = *BoxingMethod->param_begin();
+  QualType ArgQT = ArgDecl->getType().getUnqualifiedType();
+  
+  // ObjCBoxedExpr supports boxing of structs and unions 
+  // via [NSValue valueWithBytes:objCType:]
+  const QualType ValueType(SubExpr->getType().getCanonicalType());
+  if (ValueType->isObjCBoxableRecordType()) {
+    // Emit CodeGen for first parameter
+    // and cast value to correct type
+    llvm::Value *Temporary = CreateMemTemp(SubExpr->getType());
+    EmitAnyExprToMem(SubExpr, Temporary, Qualifiers(), /*isInit*/ true);
+    llvm::Value *BitCast = Builder.CreateBitCast(Temporary,
+                                                 ConvertType(ArgQT));
+    Args.add(RValue::get(BitCast), ArgQT);
+
+    // Create char array to store type encoding
+    std::string Str;
+    getContext().getObjCEncodingForType(ValueType, Str);
+    llvm::GlobalVariable *GV = CGM.GetAddrOfConstantCString(Str);
+    
+    // Cast type encoding to correct type
+    const ParmVarDecl *EncodingDecl = BoxingMethod->parameters()[1];
+    QualType EncodingQT = EncodingDecl->getType().getUnqualifiedType();
+    llvm::Value *Cast = Builder.CreateBitCast(GV, ConvertType(EncodingQT));
+
+    Args.add(RValue::get(Cast), EncodingQT);
+  } else {
+    Args.add(EmitAnyExpr(SubExpr), ArgQT);
+  }
 
   RValue result = Runtime.GenerateMessageSend(
       *this, ReturnValueSlot(), BoxingMethod->getReturnType(), Sel, Receiver,
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 3161af3..534d148 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -45,14 +45,14 @@ public:
 
   CGOpenMPRegionInfo(const CapturedStmt &CS,
                      const CGOpenMPRegionKind RegionKind,
-                     const RegionCodeGenTy &CodeGen)
+                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind)
       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
-        CodeGen(CodeGen) {}
+        CodeGen(CodeGen), Kind(Kind) {}
 
   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
-                     const RegionCodeGenTy &CodeGen)
-      : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind),
-        CodeGen(CodeGen) {}
+                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind)
+      : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
+        Kind(Kind) {}
 
   /// \brief Get a variable or parameter for storing global thread id
   /// inside OpenMP construct.
@@ -67,6 +67,8 @@ public:
 
   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
 
+  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
+
   static bool classof(const CGCapturedStmtInfo *Info) {
     return Info->getKind() == CR_OpenMP;
   }
@@ -74,14 +76,16 @@ public:
 protected:
   CGOpenMPRegionKind RegionKind;
   const RegionCodeGenTy &CodeGen;
+  OpenMPDirectiveKind Kind;
 };
 
 /// \brief API for captured statement code generation in OpenMP constructs.
 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
 public:
   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
-                             const RegionCodeGenTy &CodeGen)
-      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen),
+                             const RegionCodeGenTy &CodeGen,
+                             OpenMPDirectiveKind Kind)
+      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind),
         ThreadIDVar(ThreadIDVar) {
     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
   }
@@ -109,8 +113,9 @@ class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
 public:
   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
                                  const VarDecl *ThreadIDVar,
-                                 const RegionCodeGenTy &CodeGen)
-      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen),
+                                 const RegionCodeGenTy &CodeGen,
+                                 OpenMPDirectiveKind Kind)
+      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind),
         ThreadIDVar(ThreadIDVar) {
     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
   }
@@ -141,8 +146,9 @@ private:
 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
 public:
   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
-                            const RegionCodeGenTy &CodeGen)
-      : CGOpenMPRegionInfo(InlinedRegion, CodeGen), OldCSI(OldCSI),
+                            const RegionCodeGenTy &CodeGen,
+                            OpenMPDirectiveKind Kind)
+      : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind), OldCSI(OldCSI),
         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
   // \brief Retrieve the value of the context parameter.
   llvm::Value *getContextValue() const override {
@@ -207,11 +213,12 @@ public:
   /// \param CodeGen Code generation sequence for combined directives. Includes
   /// a list of functions used for code generation of implicitly inlined
   /// regions.
-  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen)
+  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
+                          OpenMPDirectiveKind Kind)
       : CGF(CGF) {
     // Start emission for the construct.
     CGF.CapturedStmtInfo =
-        new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen);
+        new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen, Kind);
   }
   ~InlinedOpenMPRegionRAII() {
     // Restore original CapturedStmtInfo only if we're done with code emission.
@@ -273,29 +280,28 @@ void CGOpenMPRuntime::clear() {
   InternalVars.clear();
 }
 
-llvm::Value *
-CGOpenMPRuntime::emitParallelOutlinedFunction(const OMPExecutableDirective &D,
-                                              const VarDecl *ThreadIDVar,
-                                              const RegionCodeGenTy &CodeGen) {
+llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
+    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
   assert(ThreadIDVar->getType()->isPointerType() &&
          "thread id variable must be of type kmp_int32 *");
   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
   CodeGenFunction CGF(CGM, true);
-  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
-  CGF.CapturedStmtInfo = &CGInfo;
+  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind);
+  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
   return CGF.GenerateCapturedStmtFunction(*CS);
 }
 
-llvm::Value *
-CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D,
-                                          const VarDecl *ThreadIDVar,
-                                          const RegionCodeGenTy &CodeGen) {
+llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
+    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
   assert(!ThreadIDVar->getType()->isPointerType() &&
          "thread id variable must be of type kmp_int32 for tasks");
   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
   CodeGenFunction CGF(CGM, true);
-  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
-  CGF.CapturedStmtInfo = &CGInfo;
+  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
+                                        InnermostKind);
+  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
   return CGF.GenerateCapturedStmtFunction(*CS);
 }
 
@@ -530,6 +536,14 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
     break;
   }
+  case OMPRTL__kmpc_barrier: {
+    // Build void __kmpc_cancel_barrier(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
+    break;
+  }
   case OMPRTL__kmpc_for_static_fini: {
     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
@@ -781,6 +795,40 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
     break;
   }
+  case OMPRTL__kmpc_omp_task_with_deps: {
+    // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
+    // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+    // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
+    llvm::Type *TypeParams[] = {
+        getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
+        CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+    RTLFn =
+        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
+    break;
+  }
+  case OMPRTL__kmpc_omp_wait_deps: {
+    // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
+    // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
+    // kmp_depend_info_t *noalias_dep_list);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                CGM.Int32Ty,           CGM.VoidPtrTy,
+                                CGM.Int32Ty,           CGM.VoidPtrTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
+    break;
+  }
+  case OMPRTL__kmpc_cancellationpoint: {
+    // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
+    // global_tid, kmp_int32 cncl_kind)
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
+    break;
+  }
   }
   return RTLFn;
 }
@@ -1212,11 +1260,12 @@ void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
         llvm::makeArrayRef(Args));
-    emitInlinedDirective(CGF, CriticalOpGen);
+    emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
   }
 }
 
 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
+                       OpenMPDirectiveKind Kind,
                        const RegionCodeGenTy &BodyOpGen) {
   llvm::Value *CallBool = CGF.EmitScalarConversion(
       IfCond,
@@ -1228,7 +1277,7 @@ static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
   // Generate the branch (If-stmt)
   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
   CGF.EmitBlock(ThenBlock);
-  CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, BodyOpGen);
+  CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
   // Emit the rest of bblocks/branches
   CGF.EmitBranch(ContBlock);
   CGF.EmitBlock(ContBlock, true);
@@ -1247,7 +1296,7 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
       MasterCallEndCleanup;
-  emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void {
+  emitIfStmt(CGF, IsMaster, OMPD_master, [&](CodeGenFunction &CGF) -> void {
     CodeGenFunction::RunCleanupsScope Scope(CGF);
     CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
@@ -1280,7 +1329,7 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
         llvm::makeArrayRef(Args));
-    emitInlinedDirective(CGF, TaskgroupOpGen);
+    emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
   }
 }
 
@@ -1376,7 +1425,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
       SingleCallEndCleanup;
-  emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void {
+  emitIfStmt(CGF, IsSingle, OMPD_single, [&](CodeGenFunction &CGF) -> void {
     CodeGenFunction::RunCleanupsScope Scope(CGF);
     CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
@@ -1444,13 +1493,15 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
         llvm::makeArrayRef(Args));
-    emitInlinedDirective(CGF, OrderedOpGen);
+    emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
   }
 }
 
 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
-                                      OpenMPDirectiveKind Kind) {
+                                      OpenMPDirectiveKind Kind,
+                                      bool CheckForCancel) {
   // Build call __kmpc_cancel_barrier(loc, thread_id);
+  // Build call __kmpc_barrier(loc, thread_id);
   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
   if (Kind == OMPD_for) {
     Flags =
@@ -1466,15 +1517,34 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
   } else {
     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
   }
-  // Build call __kmpc_cancel_barrier(loc, thread_id);
-  // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
-  // one provides the same functionality and adds initial support for
-  // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier()
-  // is provided default by the runtime library so it safe to make such
-  // replacement.
+  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
+  // thread_id);
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
                          getThreadID(CGF, Loc)};
-  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
+  if (auto *OMPRegionInfo =
+          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
+    auto CancelDestination =
+        CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
+    if (CancelDestination.isValid()) {
+      auto *Result = CGF.EmitRuntimeCall(
+          createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
+      if (CheckForCancel) {
+        // if (__kmpc_cancel_barrier()) {
+        //   exit from construct;
+        // }
+        auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
+        auto *ContBB = CGF.createBasicBlock(".cancel.continue");
+        auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
+        CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
+        CGF.EmitBlock(ExitBB);
+        //   exit from construct;
+        CGF.EmitBranchThroughCleanup(CancelDestination);
+        CGF.EmitBlock(ContBB, /*IsFinished=*/true);
+      }
+      return;
+    }
+  }
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
 }
 
 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
@@ -2009,11 +2079,12 @@ void CGOpenMPRuntime::emitTaskCall(
     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
     llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
-    const Expr *IfCond, const ArrayRef<const Expr *> PrivateVars,
-    const ArrayRef<const Expr *> PrivateCopies,
-    const ArrayRef<const Expr *> FirstprivateVars,
-    const ArrayRef<const Expr *> FirstprivateCopies,
-    const ArrayRef<const Expr *> FirstprivateInits) {
+    const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
+    ArrayRef<const Expr *> PrivateCopies,
+    ArrayRef<const Expr *> FirstprivateVars,
+    ArrayRef<const Expr *> FirstprivateCopies,
+    ArrayRef<const Expr *> FirstprivateInits,
+    ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
   auto &C = CGM.getContext();
   llvm::SmallVector<PrivateDataTy, 8> Privates;
   // Aggregate privates and sort them by the alignment.
@@ -2169,12 +2240,11 @@ void CGOpenMPRuntime::emitTaskCall(
                     });
                     (void)InitScope.Privatize();
                     // Emit initialization for single element.
-                    auto *OldCapturedStmtInfo = CGF.CapturedStmtInfo;
-                    CGF.CapturedStmtInfo = &CapturesInfo;
+                    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
+                        CGF, &CapturesInfo);
                     CGF.EmitAnyExprToMem(Init, DestElement,
                                          Init->getType().getQualifiers(),
                                          /*IsInitializer=*/false);
-                    CGF.CapturedStmtInfo = OldCapturedStmtInfo;
                   });
             }
           } else {
@@ -2183,11 +2253,9 @@ void CGOpenMPRuntime::emitTaskCall(
               return SharedRefLValue.getAddress();
             });
             (void)InitScope.Privatize();
-            auto *OldCapturedStmtInfo = CGF.CapturedStmtInfo;
-            CGF.CapturedStmtInfo = &CapturesInfo;
+            CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
                                /*capturedByInit=*/false);
-            CGF.CapturedStmtInfo = OldCapturedStmtInfo;
           }
         } else {
           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
@@ -2209,35 +2277,139 @@ void CGOpenMPRuntime::emitTaskCall(
   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
                             DestructorFn, KmpRoutineEntryPtrTy),
                         Destructor);
+
+  // Process list of dependences.
+  llvm::Value *DependInfo = nullptr;
+  unsigned DependencesNumber = Dependences.size();
+  if (!Dependences.empty()) {
+    // Dependence kind for RTL.
+    enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 };
+    enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
+    RecordDecl *KmpDependInfoRD;
+    QualType FlagsTy = C.getIntTypeForBitwidth(
+        C.toBits(C.getTypeSizeInChars(C.BoolTy)), /*Signed=*/false);
+    llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
+    if (KmpDependInfoTy.isNull()) {
+      KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
+      KmpDependInfoRD->startDefinition();
+      addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
+      addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
+      addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
+      KmpDependInfoRD->completeDefinition();
+      KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
+    } else {
+      KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
+    }
+    // Define type kmp_depend_info[<Dependences.size()>];
+    QualType KmpDependInfoArrayTy = C.getConstantArrayType(
+        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, Dependences.size()),
+        ArrayType::Normal, /*IndexTypeQuals=*/0);
+    // kmp_depend_info[<Dependences.size()>] deps;
+    DependInfo = CGF.CreateMemTemp(KmpDependInfoArrayTy);
+    for (unsigned i = 0; i < DependencesNumber; ++i) {
+      auto Addr = CGF.EmitLValue(Dependences[i].second);
+      auto *Size = llvm::ConstantInt::get(
+          CGF.SizeTy,
+          C.getTypeSizeInChars(Dependences[i].second->getType()).getQuantity());
+      auto Base = CGF.MakeNaturalAlignAddrLValue(
+          CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, i),
+          KmpDependInfoTy);
+      // deps[i].base_addr = &<Dependences[i].second>;
+      auto BaseAddrLVal = CGF.EmitLValueForField(
+          Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
+      CGF.EmitStoreOfScalar(
+          CGF.Builder.CreatePtrToInt(Addr.getAddress(), CGF.IntPtrTy),
+          BaseAddrLVal);
+      // deps[i].len = sizeof(<Dependences[i].second>);
+      auto LenLVal = CGF.EmitLValueForField(
+          Base, *std::next(KmpDependInfoRD->field_begin(), Len));
+      CGF.EmitStoreOfScalar(Size, LenLVal);
+      // deps[i].flags = <Dependences[i].first>;
+      RTLDependenceKindTy DepKind;
+      switch (Dependences[i].first) {
+      case OMPC_DEPEND_in:
+        DepKind = DepIn;
+        break;
+      case OMPC_DEPEND_out:
+        DepKind = DepOut;
+        break;
+      case OMPC_DEPEND_inout:
+        DepKind = DepInOut;
+        break;
+      case OMPC_DEPEND_unknown:
+        llvm_unreachable("Unknown task dependence type");
+      }
+      auto FlagsLVal = CGF.EmitLValueForField(
+          Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
+      CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
+                            FlagsLVal);
+    }
+    DependInfo = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+        CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, 0),
+        CGF.VoidPtrTy);
+  }
+
   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
   // libcall.
   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
   // *new_task);
+  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
+  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
+  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
+  // list is not empty
   auto *ThreadID = getThreadID(CGF, Loc);
-  llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID, NewTask};
-  auto &&ThenCodeGen = [this, &TaskArgs](CodeGenFunction &CGF) {
+  auto *UpLoc = emitUpdateLocation(CGF, Loc);
+  llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask};
+  llvm::Value *DepTaskArgs[] = {
+      UpLoc,
+      ThreadID,
+      NewTask,
+      DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
+      DependInfo,
+      DependInfo ? CGF.Builder.getInt32(0) : nullptr,
+      DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
+  auto &&ThenCodeGen = [this, DependInfo, &TaskArgs,
+                        &DepTaskArgs](CodeGenFunction &CGF) {
     // TODO: add check for untied tasks.
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
+    CGF.EmitRuntimeCall(
+        createRuntimeFunction(DependInfo ? OMPRTL__kmpc_omp_task_with_deps
+                                         : OMPRTL__kmpc_omp_task),
+        DependInfo ? makeArrayRef(DepTaskArgs) : makeArrayRef(TaskArgs));
   };
   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
       IfCallEndCleanup;
-  auto &&ElseCodeGen =
-      [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry](
-          CodeGenFunction &CGF) {
-        CodeGenFunction::RunCleanupsScope LocalScope(CGF);
-        CGF.EmitRuntimeCall(
-            createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs);
-        // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
-        // kmp_task_t *new_task);
-        CGF.EHStack.pushCleanup<IfCallEndCleanup>(
-            NormalAndEHCleanup,
-            createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
-            llvm::makeArrayRef(TaskArgs));
-
-        // Call proxy_task_entry(gtid, new_task);
-        llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
-        CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
-      };
+  llvm::Value *DepWaitTaskArgs[] = {
+      UpLoc,
+      ThreadID,
+      DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
+      DependInfo,
+      DependInfo ? CGF.Builder.getInt32(0) : nullptr,
+      DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
+  auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
+                        DependInfo, &DepWaitTaskArgs](CodeGenFunction &CGF) {
+    CodeGenFunction::RunCleanupsScope LocalScope(CGF);
+    // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
+    // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
+    // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
+    // is specified.
+    if (DependInfo)
+      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
+                          DepWaitTaskArgs);
+    // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
+    // kmp_task_t *new_task);
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
+                        TaskArgs);
+    // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
+    // kmp_task_t *new_task);
+    CGF.EHStack.pushCleanup<IfCallEndCleanup>(
+        NormalAndEHCleanup,
+        createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
+        llvm::makeArrayRef(TaskArgs));
+
+    // Call proxy_task_entry(gtid, new_task);
+    llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
+    CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
+  };
   if (IfCond) {
     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
   } else {
@@ -2545,8 +2717,60 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
 }
 
 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
+                                           OpenMPDirectiveKind InnerKind,
                                            const RegionCodeGenTy &CodeGen) {
-  InlinedOpenMPRegionRAII Region(CGF, CodeGen);
+  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind);
   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
 }
 
+void CGOpenMPRuntime::emitCancellationPointCall(
+    CodeGenFunction &CGF, SourceLocation Loc,
+    OpenMPDirectiveKind CancelRegion) {
+  // Build call kmp_int32 OMPRTL__kmpc_cancellationpoint(ident_t *loc, kmp_int32
+  // global_tid, kmp_int32 cncl_kind);
+  enum {
+    CancelNoreq = 0,
+    CancelParallel = 1,
+    CancelLoop = 2,
+    CancelSections = 3,
+    CancelTaskgroup = 4
+  } CancelKind = CancelNoreq;
+  if (CancelRegion == OMPD_parallel)
+    CancelKind = CancelParallel;
+  else if (CancelRegion == OMPD_for)
+    CancelKind = CancelLoop;
+  else if (CancelRegion == OMPD_sections)
+    CancelKind = CancelSections;
+  else {
+    assert(CancelRegion == OMPD_taskgroup);
+    CancelKind = CancelTaskgroup;
+  }
+  if (auto *OMPRegionInfo =
+          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
+    auto CancelDest =
+        CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
+    if (CancelDest.isValid()) {
+      llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc),
+                             getThreadID(CGF, Loc),
+                             CGF.Builder.getInt32(CancelKind)};
+      // Ignore return result until untied tasks are supported.
+      auto *Result = CGF.EmitRuntimeCall(
+          createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
+      // if (__kmpc_cancellationpoint()) {
+      //  __kmpc_cancel_barrier();
+      //   exit from construct;
+      // }
+      auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
+      auto *ContBB = CGF.createBasicBlock(".cancel.continue");
+      auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
+      CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
+      CGF.EmitBlock(ExitBB);
+      // __kmpc_cancel_barrier();
+      emitBarrierCall(CGF, Loc, OMPD_unknown, /*CheckForCancel=*/false);
+      // exit from construct;
+      CGF.EmitBranchThroughCleanup(CancelDest);
+      CGF.EmitBlock(ContBB, /*IsFinished=*/true);
+    }
+  }
+}
+
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index d1efde2..76bb3ae 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -68,6 +68,8 @@ private:
     // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
     // global_tid);
     OMPRTL__kmpc_cancel_barrier,
+    // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+    OMPRTL__kmpc_barrier,
     // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
     OMPRTL__kmpc_for_static_fini,
     // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
@@ -138,6 +140,17 @@ private:
     // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
     // int proc_bind);
     OMPRTL__kmpc_push_proc_bind,
+    // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
+    // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
+    // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
+    OMPRTL__kmpc_omp_task_with_deps,
+    // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
+    // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
+    // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
+    OMPRTL__kmpc_omp_wait_deps,
+    // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
+    // global_tid, kmp_int32 cncl_kind);
+    OMPRTL__kmpc_cancellationpoint,
   };
 
   /// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -249,6 +262,16 @@ private:
   ///    deconstructors of firstprivate C++ objects */
   /// } kmp_task_t;
   QualType KmpTaskTQTy;
+  /// \brief Type typedef struct kmp_depend_info {
+  ///    kmp_intptr_t               base_addr;
+  ///    size_t                     len;
+  ///    struct {
+  ///             bool                   in:1;
+  ///             bool                   out:1;
+  ///    } flags;
+  /// } kmp_depend_info_t;
+  QualType KmpDependInfoTy;
+
 
   /// \brief Build type kmp_routine_entry_t (if not built yet).
   void emitKmpRoutineEntryT(QualType KmpInt32Ty);
@@ -341,22 +364,25 @@ public:
   /// kmp_int32 BoundID, struct context_vars*).
   /// \param D OpenMP directive.
   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param InnermostKind Kind of innermost directive (for simple directives it
+  /// is a directive itself, for combined - its innermost directive).
   /// \param CodeGen Code generation sequence for the \a D directive.
-  virtual llvm::Value *
-  emitParallelOutlinedFunction(const OMPExecutableDirective &D,
-                               const VarDecl *ThreadIDVar,
-                               const RegionCodeGenTy &CodeGen);
+  virtual llvm::Value *emitParallelOutlinedFunction(
+      const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+      OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
 
   /// \brief Emits outlined function for the OpenMP task directive \a D. This
   /// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32
   /// PartID, struct context_vars*).
   /// \param D OpenMP directive.
   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param InnermostKind Kind of innermost directive (for simple directives it
+  /// is a directive itself, for combined - its innermost directive).
   /// \param CodeGen Code generation sequence for the \a D directive.
   ///
-  virtual llvm::Value *emitTaskOutlinedFunction(const OMPExecutableDirective &D,
-                                                const VarDecl *ThreadIDVar,
-                                                const RegionCodeGenTy &CodeGen);
+  virtual llvm::Value *emitTaskOutlinedFunction(
+      const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+      OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
 
   /// \brief Cleans up references to the objects in finished function.
   ///
@@ -423,9 +449,12 @@ public:
   /// \brief Emit an implicit/explicit barrier for OpenMP threads.
   /// \param Kind Directive for which this implicit barrier call must be
   /// generated. Must be OMPD_barrier for explicit barrier generation.
+  /// \param CheckForCancel true if check for possible cancellation must be
+  /// performed, false otherwise.
   ///
   virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
-                               OpenMPDirectiveKind Kind);
+                               OpenMPDirectiveKind Kind,
+                               bool CheckForCancel = true);
 
   /// \brief Check if the specified \a ScheduleKind is static non-chunked.
   /// This kind of worksharing directive is emitted without outer loop.
@@ -580,7 +609,7 @@ public:
   /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
   /// /*part_id*/, captured_struct */*__context*/);
   /// \param SharedsTy A type which contains references the shared variables.
-  /// \param Shareds Context with the list of shared variables from the \a
+  /// \param Shareds Context with the list of shared variables from the \p
   /// TaskFunction.
   /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
   /// otherwise.
@@ -595,21 +624,26 @@ public:
   /// \param FirstprivateInits List of references to auto generated variables
   /// used for initialization of a single array element. Used if firstprivate
   /// variable is of array type.
-  virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
-                            const OMPExecutableDirective &D, bool Tied,
-                            llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
-                            llvm::Value *TaskFunction, QualType SharedsTy,
-                            llvm::Value *Shareds, const Expr *IfCond,
-                            const ArrayRef<const Expr *> PrivateVars,
-                            const ArrayRef<const Expr *> PrivateCopies,
-                            const ArrayRef<const Expr *> FirstprivateVars,
-                            const ArrayRef<const Expr *> FirstprivateCopies,
-                            const ArrayRef<const Expr *> FirstprivateInits);
+  /// \param Dependences List of dependences for the 'task' construct, including
+  /// original expression and dependency type.
+  virtual void emitTaskCall(
+      CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
+      bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+      llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
+      const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
+      ArrayRef<const Expr *> PrivateCopies,
+      ArrayRef<const Expr *> FirstprivateVars,
+      ArrayRef<const Expr *> FirstprivateCopies,
+      ArrayRef<const Expr *> FirstprivateInits,
+      ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences);
 
   /// \brief Emit code for the directive that does not require outlining.
   ///
+  /// \param InnermostKind Kind of innermost directive (for simple directives it
+  /// is a directive itself, for combined - its innermost directive).
   /// \param CodeGen Code generation sequence for the \a D directive.
   virtual void emitInlinedDirective(CodeGenFunction &CGF,
+                                    OpenMPDirectiveKind InnermostKind,
                                     const RegionCodeGenTy &CodeGen);
   /// \brief Emit a code for reduction clause. Next code should be emitted for
   /// reduction:
@@ -656,6 +690,14 @@ public:
 
   /// \brief Emit code for 'taskwait' directive.
   virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc);
+
+  /// \brief Emit code for 'cancellation point' construct.
+  /// \param CancelRegion Region kind for which the cancellation point must be
+  /// emitted.
+  ///
+  virtual void emitCancellationPointCall(CodeGenFunction &CGF,
+                                         SourceLocation Loc,
+                                         OpenMPDirectiveKind CancelRegion);
 };
 
 } // namespace CodeGen
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 9286f03..a12f295 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -240,6 +240,12 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
   case Stmt::OMPTeamsDirectiveClass:
     EmitOMPTeamsDirective(cast<OMPTeamsDirective>(*S));
     break;
+  case Stmt::OMPCancellationPointDirectiveClass:
+    EmitOMPCancellationPointDirective(cast<OMPCancellationPointDirective>(*S));
+    break;
+  case Stmt::OMPCancelDirectiveClass:
+    EmitOMPCancelDirective(cast<OMPCancelDirective>(*S));
+    break;
   }
 }
 
@@ -2149,7 +2155,7 @@ CodeGenFunction::EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K) {
 
   // Emit the CapturedDecl
   CodeGenFunction CGF(CGM, true);
-  CGF.CapturedStmtInfo = new CGCapturedStmtInfo(S, K);
+  CGCapturedStmtRAII CapInfoRAII(CGF, new CGCapturedStmtInfo(S, K));
   llvm::Function *F = CGF.GenerateCapturedStmtFunction(S);
   delete CGF.CapturedStmtInfo;
 
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index 5e94d56..b021c12 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -451,11 +451,12 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
 
 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
                                            const OMPExecutableDirective &S,
+                                           OpenMPDirectiveKind InnermostKind,
                                            const RegionCodeGenTy &CodeGen) {
   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
   auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS);
   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
-      S, *CS->getCapturedDecl()->param_begin(), CodeGen);
+      S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
   if (auto C = S.getSingleClause(OMPC_num_threads)) {
     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
     auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
@@ -502,10 +503,11 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
                                                OMPD_unknown);
   };
-  emitCommonOMPParallelDirective(*this, S, CodeGen);
+  emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
 }
 
-void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D) {
+void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
+                                      JumpDest LoopExit) {
   RunCleanupsScope BodyScope(*this);
   // Update counters values on current iteration.
   for (auto I : D.updates()) {
@@ -521,7 +523,7 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D) {
 
   // On a continue in the body, jump to the end.
   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
-  BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
+  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
   // Emit loop body.
   EmitStmt(D.getBody());
   // The end (updates/cleanups).
@@ -649,8 +651,10 @@ static void emitAlignedClause(CodeGenFunction &CGF,
         // If no optional parameter is specified, implementation-defined default
         // alignments for SIMD instructions on the target platforms are assumed.
         Alignment =
-            CGF.CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment(
-                E->getType());
+            CGF.getContext()
+                .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
+                    E->getType()->getPointeeType()))
+                .getQuantity();
       }
       assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
              "alignment is not power of 2");
@@ -825,10 +829,10 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
       CGF.EmitOMPReductionClauseInit(S, LoopScope);
       HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
       (void)LoopScope.Privatize();
-      CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
-                           S.getCond(), S.getInc(),
+      CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
+                           S.getInc(),
                            [&S](CodeGenFunction &CGF) {
-                             CGF.EmitOMPLoopBody(S);
+                             CGF.EmitOMPLoopBody(S, JumpDest());
                              CGF.EmitStopPoint(&S);
                            },
                            [](CodeGenFunction &) {});
@@ -845,7 +849,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
       CGF.EmitBlock(ContBlock, true);
     }
   };
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
@@ -977,19 +981,17 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
   }
 
   SourceLocation Loc = S.getLocStart();
-  EmitOMPInnerLoop(
-      S, LoopScope.requiresCleanups(), S.getCond(),
-      S.getInc(),
-      [&S](CodeGenFunction &CGF) {
-        CGF.EmitOMPLoopBody(S);
-        CGF.EmitStopPoint(&S);
-      },
-      [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
-        if (Ordered) {
-          CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
-              CGF, Loc, IVSize, IVSigned);
-        }
-      });
+  EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
+                   [&S, LoopExit](CodeGenFunction &CGF) {
+                     CGF.EmitOMPLoopBody(S, LoopExit);
+                     CGF.EmitStopPoint(&S);
+                   },
+                   [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
+                     if (Ordered) {
+                       CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
+                           CGF, Loc, IVSize, IVSigned);
+                     }
+                   });
 
   EmitBlock(Continue.getBlock());
   BreakContinueStack.pop_back();
@@ -1138,6 +1140,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
         RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
                        Ordered, IL.getAddress(), LB.getAddress(),
                        UB.getAddress(), ST.getAddress());
+        auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
         // UB = min(UB, GlobalUB);
         EmitIgnoredExpr(S.getEnsureUpperBound());
         // IV = LB;
@@ -1145,11 +1148,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
         // while (idx <= UB) { BODY; ++idx; }
         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
                          S.getInc(),
-                         [&S](CodeGenFunction &CGF) {
-                           CGF.EmitOMPLoopBody(S);
+                         [&S, LoopExit](CodeGenFunction &CGF) {
+                           CGF.EmitOMPLoopBody(S, LoopExit);
                            CGF.EmitStopPoint(&S);
                          },
                          [](CodeGenFunction &) {});
+        EmitBlock(LoopExit.getBlock());
         // Tell the runtime we are done.
         RT.emitForStaticFinish(*this, S.getLocStart());
       } else {
@@ -1183,7 +1187,7 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
   };
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen);
 
   // Emit an implicit barrier at the end.
   if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) {
@@ -1197,7 +1201,7 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
   };
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
 
   // Emit an implicit barrier at the end.
   if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) {
@@ -1214,8 +1218,8 @@ static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
   return LVal;
 }
 
-static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF,
-                                        const OMPExecutableDirective &S) {
+OpenMPDirectiveKind
+CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
   auto *CS = dyn_cast<CompoundStmt>(Stmt);
   if (CS && CS->size() > 1) {
@@ -1263,12 +1267,13 @@ static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF,
             CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
             CS->size());
         unsigned CaseNumber = 0;
-        for (auto C = CS->children(); C; ++C, ++CaseNumber) {
+        for (auto *SubStmt : CS->children()) {
           auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
           CGF.EmitBlock(CaseBB);
           SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
-          CGF.EmitStmt(*C);
+          CGF.EmitStmt(SubStmt);
           CGF.EmitBranch(ExitBB);
+          ++CaseNumber;
         }
         CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
       };
@@ -1311,15 +1316,15 @@ static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF,
                    CGF.EmitLoadOfScalar(IL, S.getLocStart())));
     };
 
-    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen);
+    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen);
     // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
     // clause. Otherwise the barrier will be generated by the codegen for the
     // directive.
     if (HasLastprivates && S.getSingleClause(OMPC_nowait)) {
       // Emit implicit barrier to synchronize threads and avoid data races on
       // initialization of firstprivate variables.
-      CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
-                                                 OMPD_unknown);
+      CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
+                                             OMPD_unknown);
     }
     return OMPD_sections;
   }
@@ -1340,11 +1345,10 @@ static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF,
     (void)SingleScope.Privatize();
 
     CGF.EmitStmt(Stmt);
-    CGF.EnsureInsertPoint();
   };
-  CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(),
-                                              llvm::None, llvm::None,
-                                              llvm::None, llvm::None);
+  CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
+                                          llvm::None, llvm::None, llvm::None,
+                                          llvm::None);
   // Emit barrier for firstprivates, lastprivates or reductions only if
   // 'sections' directive has 'nowait' clause. Otherwise the barrier will be
   // generated by the codegen for the directive.
@@ -1352,15 +1356,14 @@ static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF,
       S.getSingleClause(OMPC_nowait)) {
     // Emit implicit barrier to synchronize threads and avoid data races on
     // initialization of firstprivate variables.
-    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
-                                               OMPD_unknown);
+    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown);
   }
   return OMPD_single;
 }
 
 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
   LexicalScope Scope(*this, S.getSourceRange());
-  OpenMPDirectiveKind EmittedAs = emitSections(*this, S);
+  OpenMPDirectiveKind EmittedAs = EmitSections(S);
   // Emit an implicit barrier at the end.
   if (!S.getSingleClause(OMPC_nowait)) {
     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
@@ -1373,7 +1376,7 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
     CGF.EnsureInsertPoint();
   };
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
@@ -1453,7 +1456,7 @@ void CodeGenFunction::EmitOMPParallelForDirective(
     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
                                                OMPD_parallel);
   };
-  emitCommonOMPParallelDirective(*this, S, CodeGen);
+  emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPParallelForSimdDirective(
@@ -1470,7 +1473,7 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
                                                OMPD_parallel);
   };
-  emitCommonOMPParallelDirective(*this, S, CodeGen);
+  emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPParallelSectionsDirective(
@@ -1479,12 +1482,12 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
   // directives: 'parallel' with 'sections' directive.
   LexicalScope Scope(*this, S.getSourceRange());
   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
-    (void)emitSections(CGF, S);
+    (void)CGF.EmitSections(S);
     // Emit implicit barrier at the end of parallel region.
     CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
                                                OMPD_parallel);
   };
-  emitCommonOMPParallelDirective(*this, S, CodeGen);
+  emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
@@ -1531,6 +1534,15 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
       ++IRef, ++IElemInitRef;
     }
   }
+  // Build list of dependences.
+  llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8>
+      Dependences;
+  for (auto &&I = S.getClausesOfKind(OMPC_depend); I; ++I) {
+    auto *C = cast<OMPDependClause>(*I);
+    for (auto *IRef : C->varlists()) {
+      Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
+    }
+  }
   auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars](
       CodeGenFunction &CGF) {
     // Set proper addresses for generated private copies.
@@ -1575,8 +1587,8 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
     }
     CGF.EmitStmt(CS->getCapturedStmt());
   };
-  auto OutlinedFn =
-      CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen);
+  auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
+      S, *I, OMPD_task, CodeGen);
   // Check if we should emit tied or untied task.
   bool Tied = !S.getSingleClause(OMPC_untied);
   // Check if the task is final
@@ -1602,7 +1614,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
   CGM.getOpenMPRuntime().emitTaskCall(
       *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
       CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars,
-      FirstprivateCopies, FirstprivateInits);
+      FirstprivateCopies, FirstprivateInits, Dependences);
 }
 
 void CodeGenFunction::EmitOMPTaskyieldDirective(
@@ -2041,6 +2053,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
   case OMPC_nowait:
   case OMPC_untied:
   case OMPC_threadprivate:
+  case OMPC_depend:
   case OMPC_mergeable:
     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
   }
@@ -2077,7 +2090,7 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
                       S.getV(), S.getExpr(), S.getUpdateExpr(),
                       S.isXLHSInRHSPart(), S.getLocStart());
   };
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
@@ -2087,3 +2100,23 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
   llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
 }
+
+void CodeGenFunction::EmitOMPCancellationPointDirective(
+    const OMPCancellationPointDirective &S) {
+  CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
+                                                   S.getCancelRegion());
+}
+
+void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
+  llvm_unreachable("CodeGen for 'omp cancel' is not supported yet.");
+}
+
+CodeGenFunction::JumpDest
+CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
+  if (Kind == OMPD_parallel || Kind == OMPD_task)
+    return ReturnBlock;
+  else if (Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections)
+    return BreakContinueStack.empty() ? JumpDest()
+                                      : BreakContinueStack.back().BreakBlock;
+  return JumpDest();
+}
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index 17db401..969629f 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -123,8 +123,8 @@ static RValue PerformReturnAdjustment(CodeGenFunction &CGF,
 //           no-op thunk for the regular definition) call va_start/va_end.
 //           There's a bit of per-call overhead for this solution, but it's
 //           better for codesize if the definition is long.
-void CodeGenFunction::GenerateVarArgsThunk(
-                                      llvm::Function *Fn,
+llvm::Function *
+CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
                                       const CGFunctionInfo &FnInfo,
                                       GlobalDecl GD, const ThunkInfo &Thunk) {
   const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
@@ -184,6 +184,8 @@ void CodeGenFunction::GenerateVarArgsThunk(
       }
     }
   }
+
+  return Fn;
 }
 
 void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD,
@@ -378,9 +380,6 @@ void CodeGenFunction::GenerateThunk(llvm::Function *Fn,
   // Set the right linkage.
   CGM.setFunctionLinkage(GD, Fn);
 
-  if (CGM.supportsCOMDAT() && Fn->isWeakForLinker())
-    Fn->setComdat(CGM.getModule().getOrInsertComdat(Fn->getName()));
-
   // Set the right visibility.
   const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
   setThunkVisibility(CGM, MD, Thunk, Fn);
@@ -450,17 +449,19 @@ void CodeGenVTables::emitThunk(GlobalDecl GD, const ThunkInfo &Thunk,
     // expensive/sucky at the moment, so don't generate the thunk unless
     // we have to.
     // FIXME: Do something better here; GenerateVarArgsThunk is extremely ugly.
-    if (!UseAvailableExternallyLinkage) {
-      CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, Thunk);
-      CGM.getCXXABI().setThunkLinkage(ThunkFn, ForVTable, GD,
-                                      !Thunk.Return.isEmpty());
-    }
+    if (UseAvailableExternallyLinkage)
+      return;
+    ThunkFn =
+        CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, Thunk);
   } else {
     // Normal thunk body generation.
     CodeGenFunction(CGM).GenerateThunk(ThunkFn, FnInfo, GD, Thunk);
-    CGM.getCXXABI().setThunkLinkage(ThunkFn, ForVTable, GD,
-                                    !Thunk.Return.isEmpty());
   }
+
+  CGM.getCXXABI().setThunkLinkage(ThunkFn, ForVTable, GD,
+                                  !Thunk.Return.isEmpty());
+  if (CGM.supportsCOMDAT() && ThunkFn->isWeakForLinker())
+    ThunkFn->setComdat(CGM.getModule().getOrInsertComdat(ThunkFn->getName()));
 }
 
 void CodeGenVTables::maybeEmitThunkForVTable(GlobalDecl GD,
@@ -702,7 +703,7 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
                    llvm::Function::InternalLinkage;
         
         return llvm::GlobalVariable::ExternalLinkage;
-        
+
       case TSK_ImplicitInstantiation:
         return !Context.getLangOpts().AppleKext ?
                  llvm::GlobalVariable::LinkOnceODRLinkage :
diff --git a/lib/CodeGen/CodeGenABITypes.cpp b/lib/CodeGen/CodeGenABITypes.cpp
index 12189ae..755e8aa 100644
--- a/lib/CodeGen/CodeGenABITypes.cpp
+++ b/lib/CodeGen/CodeGenABITypes.cpp
@@ -20,6 +20,8 @@
 #include "CodeGenModule.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/PreprocessorOptions.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -29,7 +31,10 @@ CodeGenABITypes::CodeGenABITypes(ASTContext &C,
                                  const llvm::DataLayout &TD,
                                  CoverageSourceInfo *CoverageInfo)
   : CGO(new CodeGenOptions),
-    CGM(new CodeGen::CodeGenModule(C, *CGO, M, TD, C.getDiagnostics(),
+    HSO(new HeaderSearchOptions),
+    PPO(new PreprocessorOptions),
+    CGM(new CodeGen::CodeGenModule(C, *HSO, *PPO, *CGO,
+                                   M, TD, C.getDiagnostics(),
                                    CoverageInfo)) {
 }
 
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 54e6b73..0e7b6d8 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -57,6 +57,8 @@ namespace clang {
 
   public:
     BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
+                    const HeaderSearchOptions &HeaderSearchOpts,
+                    const PreprocessorOptions &PPOpts,
                     const CodeGenOptions &CodeGenOpts,
                     const TargetOptions &TargetOpts,
                     const LangOptions &LangOpts, bool TimePasses,
@@ -66,7 +68,8 @@ namespace clang {
         : Diags(Diags), Action(Action), CodeGenOpts(CodeGenOpts),
           TargetOpts(TargetOpts), LangOpts(LangOpts), AsmOutStream(OS),
           Context(nullptr), LLVMIRGeneration("LLVM IR Generation Time"),
-          Gen(CreateLLVMCodeGen(Diags, InFile, CodeGenOpts, C, CoverageInfo)),
+          Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts,
+                                CodeGenOpts, C, CoverageInfo)),
           LinkModule(LinkModule) {
       llvm::TimePassesIsEnabled = TimePasses;
     }
@@ -667,7 +670,8 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
                                     std::unique_ptr<PPCallbacks>(CoverageInfo));
   }
   std::unique_ptr<BackendConsumer> Result(new BackendConsumer(
-      BA, CI.getDiagnostics(), CI.getCodeGenOpts(), CI.getTargetOpts(),
+      BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(),
+      CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(),
       CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile,
       LinkModuleToUse, OS, *VMContext, CoverageInfo));
   BEConsumer = Result.get();
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index bece41e..1fca466 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -930,8 +930,9 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
       EmitCheck(std::make_pair(IsFalse, SanitizerKind::Return),
                 "missing_return", EmitCheckSourceLocation(FD->getLocation()),
                 None);
-    } else if (CGM.getCodeGenOpts().OptimizationLevel == 0)
-      Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::trap), {});
+    } else if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+      EmitTrapCall(llvm::Intrinsic::trap);
+    }
     Builder.CreateUnreachable();
     Builder.ClearInsertionPoint();
   }
@@ -970,8 +971,8 @@ bool CodeGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) {
     IgnoreCaseStmts = true;
 
   // Scan subexpressions for verboten labels.
-  for (Stmt::const_child_range I = S->children(); I; ++I)
-    if (ContainsLabel(*I, IgnoreCaseStmts))
+  for (const Stmt *SubStmt : S->children())
+    if (ContainsLabel(SubStmt, IgnoreCaseStmts))
       return true;
 
   return false;
@@ -994,8 +995,8 @@ bool CodeGenFunction::containsBreak(const Stmt *S) {
     return true;
 
   // Scan subexpressions for verboten breaks.
-  for (Stmt::const_child_range I = S->children(); I; ++I)
-    if (containsBreak(*I))
+  for (const Stmt *SubStmt : S->children())
+    if (containsBreak(SubStmt))
       return true;
 
   return false;
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 45475d1..a74c474 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -233,6 +233,20 @@ public:
   };
   CGCapturedStmtInfo *CapturedStmtInfo;
 
+  /// \brief RAII for correct setting/restoring of CapturedStmtInfo.
+  class CGCapturedStmtRAII {
+  private:
+    CodeGenFunction &CGF;
+    CGCapturedStmtInfo *PrevCapturedStmtInfo;
+  public:
+    CGCapturedStmtRAII(CodeGenFunction &CGF,
+                       CGCapturedStmtInfo *NewCapturedStmtInfo)
+        : CGF(CGF), PrevCapturedStmtInfo(CGF.CapturedStmtInfo) {
+      CGF.CapturedStmtInfo = NewCapturedStmtInfo;
+    }
+    ~CGCapturedStmtRAII() { CGF.CapturedStmtInfo = PrevCapturedStmtInfo; }
+  };
+
   /// BoundsChecking - Emit run-time bounds checks. Higher values mean
   /// potentially higher performance penalties.
   unsigned char BoundsChecking;
@@ -283,12 +297,12 @@ public:
   /// Header for data within LifetimeExtendedCleanupStack.
   struct LifetimeExtendedCleanupHeader {
     /// The size of the following cleanup object.
-    unsigned Size : 29;
+    unsigned Size;
     /// The kind of cleanup to push: a value from the CleanupKind enumeration.
-    unsigned Kind : 3;
+    CleanupKind Kind;
 
-    size_t getSize() const { return size_t(Size); }
-    CleanupKind getKind() const { return static_cast<CleanupKind>(Kind); }
+    size_t getSize() const { return Size; }
+    CleanupKind getKind() const { return Kind; }
   };
 
   /// i32s containing the indexes of the cleanup destinations.
@@ -388,6 +402,8 @@ public:
     LifetimeExtendedCleanupStack.resize(
         LifetimeExtendedCleanupStack.size() + sizeof(Header) + Header.Size);
 
+    static_assert(sizeof(Header) % llvm::AlignOf<T>::Alignment == 0,
+                  "Cleanup will be allocated on misaligned address");
     char *Buffer = &LifetimeExtendedCleanupStack[OldSize];
     new (Buffer) LifetimeExtendedCleanupHeader(Header);
     new (Buffer + sizeof(Header)) T(A...);
@@ -1280,8 +1296,9 @@ public:
   void GenerateThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo,
                      GlobalDecl GD, const ThunkInfo &Thunk);
 
-  void GenerateVarArgsThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo,
-                            GlobalDecl GD, const ThunkInfo &Thunk);
+  llvm::Function *GenerateVarArgsThunk(llvm::Function *Fn,
+                                       const CGFunctionInfo &FnInfo,
+                                       GlobalDecl GD, const ThunkInfo &Thunk);
 
   void EmitCtorPrologue(const CXXConstructorDecl *CD, CXXCtorType Type,
                         FunctionArgList &Args);
@@ -2185,6 +2202,9 @@ public:
   void EmitOMPAtomicDirective(const OMPAtomicDirective &S);
   void EmitOMPTargetDirective(const OMPTargetDirective &S);
   void EmitOMPTeamsDirective(const OMPTeamsDirective &S);
+  void
+  EmitOMPCancellationPointDirective(const OMPCancellationPointDirective &S);
+  void EmitOMPCancelDirective(const OMPCancelDirective &S);
 
   /// \brief Emit inner loop of the worksharing/simd construct.
   ///
@@ -2202,10 +2222,12 @@ public:
       const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
       const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen);
 
+  JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind);
+
 private:
 
   /// Helpers for the OpenMP loop directives.
-  void EmitOMPLoopBody(const OMPLoopDirective &D);
+  void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit);
   void EmitOMPSimdInit(const OMPLoopDirective &D);
   void EmitOMPSimdFinal(const OMPLoopDirective &D);
   /// \brief Emit code for the worksharing loop-based directive.
@@ -2217,6 +2239,8 @@ private:
                            OMPPrivateScope &LoopScope, bool Ordered,
                            llvm::Value *LB, llvm::Value *UB, llvm::Value *ST,
                            llvm::Value *IL, llvm::Value *Chunk);
+  /// \brief Emit code for sections directive.
+  OpenMPDirectiveKind EmitSections(const OMPExecutableDirective &S);
 
 public:
 
@@ -2586,6 +2610,7 @@ public:
   llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
 
   llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E);
   llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E);
@@ -2852,6 +2877,10 @@ public:
   /// conditional branch to it, for the -ftrapv checks.
   void EmitTrapCheck(llvm::Value *Checked);
 
+  /// \brief Emit a call to trap or debugtrap and attach function attribute
+  /// "trap-func-name" if specified.
+  llvm::CallInst *EmitTrapCall(llvm::Intrinsic::ID IntrID);
+
   /// \brief Create a check for a function parameter that may potentially be
   /// declared as non-null.
   void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc,
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 2dd5414..b905bd2 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -74,16 +74,19 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
   llvm_unreachable("invalid C++ ABI kind");
 }
 
-CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO,
-                             llvm::Module &M, const llvm::DataLayout &TD,
+CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
+                             const PreprocessorOptions &PPO,
+                             const CodeGenOptions &CGO, llvm::Module &M,
+                             const llvm::DataLayout &TD,
                              DiagnosticsEngine &diags,
                              CoverageSourceInfo *CoverageInfo)
-    : Context(C), LangOpts(C.getLangOpts()), CodeGenOpts(CGO), TheModule(M),
-      Diags(diags), TheDataLayout(TD), Target(C.getTargetInfo()),
-      ABI(createCXXABI(*this)), VMContext(M.getContext()), TBAA(nullptr),
-      TheTargetCodeGenInfo(nullptr), Types(*this), VTables(*this),
-      ObjCRuntime(nullptr), OpenCLRuntime(nullptr), OpenMPRuntime(nullptr),
-      CUDARuntime(nullptr), DebugInfo(nullptr), ARCData(nullptr),
+    : Context(C), LangOpts(C.getLangOpts()), HeaderSearchOpts(HSO),
+      PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags),
+      TheDataLayout(TD), Target(C.getTargetInfo()), ABI(createCXXABI(*this)),
+      VMContext(M.getContext()), TBAA(nullptr), TheTargetCodeGenInfo(nullptr),
+      Types(*this), VTables(*this), ObjCRuntime(nullptr),
+      OpenCLRuntime(nullptr), OpenMPRuntime(nullptr), CUDARuntime(nullptr),
+      DebugInfo(nullptr), ARCData(nullptr),
       NoObjCARCExceptionsMetadata(nullptr), RRData(nullptr), PGOReader(nullptr),
       CFConstantStringClassRef(nullptr), ConstantStringClassRef(nullptr),
       NSConstantStringType(nullptr), NSConcreteGlobalBlock(nullptr),
@@ -145,8 +148,9 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO,
         llvm::IndexedInstrProfReader::create(CodeGenOpts.InstrProfileInput);
     if (std::error_code EC = ReaderOrErr.getError()) {
       unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
-                                              "Could not read profile: %0");
-      getDiags().Report(DiagID) << EC.message();
+                                              "Could not read profile %0: %1");
+      getDiags().Report(DiagID) << CodeGenOpts.InstrProfileInput
+                                << EC.message();
     } else
       PGOReader = std::move(ReaderOrErr.get());
   }
@@ -1424,7 +1428,7 @@ namespace {
         return false;
       }
       unsigned BuiltinID = FD->getBuiltinID();
-      if (!BuiltinID)
+      if (!BuiltinID || !BI.isLibFunction(BuiltinID))
         return true;
       StringRef BuiltinName = BI.GetName(BuiltinID);
       if (BuiltinName.startswith("__builtin_") &&
@@ -3358,6 +3362,8 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
           Owner->getTopLevelModule()->Name == getLangOpts().CurrentModule)
         break;
     }
+    if (CGDebugInfo *DI = getModuleDebugInfo())
+      DI->EmitImportDecl(*Import);
 
     ImportedModules.insert(Import->getImportedModule());
     break;
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index 8e671fa..9a295fe 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -69,6 +69,8 @@ class ValueDecl;
 class VarDecl;
 class LangOptions;
 class CodeGenOptions;
+class HeaderSearchOptions;
+class PreprocessorOptions;
 class DiagnosticsEngine;
 class AnnotateAttr;
 class CXXDestructorDecl;
@@ -278,6 +280,8 @@ public:
 private:
   ASTContext &Context;
   const LangOptions &LangOpts;
+  const HeaderSearchOptions &HeaderSearchOpts; // Only used for debug info.
+  const PreprocessorOptions &PreprocessorOpts; // Only used for debug info.
   const CodeGenOptions &CodeGenOpts;
   llvm::Module &TheModule;
   DiagnosticsEngine &Diags;
@@ -488,7 +492,10 @@ private:
 
   std::unique_ptr<CoverageMappingModuleGen> CoverageMapping;
 public:
-  CodeGenModule(ASTContext &C, const CodeGenOptions &CodeGenOpts,
+  CodeGenModule(ASTContext &C,
+                const HeaderSearchOptions &headersearchopts,
+                const PreprocessorOptions &ppopts,
+                const CodeGenOptions &CodeGenOpts,
                 llvm::Module &M, const llvm::DataLayout &TD,
                 DiagnosticsEngine &Diags,
                 CoverageSourceInfo *CoverageInfo = nullptr);
@@ -600,6 +607,10 @@ public:
 
   ASTContext &getContext() const { return Context; }
   const LangOptions &getLangOpts() const { return LangOpts; }
+  const HeaderSearchOptions &getHeaderSearchOpts()
+    const { return HeaderSearchOpts; }
+  const PreprocessorOptions &getPreprocessorOpts()
+    const { return PreprocessorOpts; }
   const CodeGenOptions &getCodeGenOpts() const { return CodeGenOpts; }
   llvm::Module &getModule() const { return TheModule; }
   DiagnosticsEngine &getDiags() const { return Diags; }
@@ -732,6 +743,11 @@ public:
   /// Get a reference to the target of VD.
   llvm::Constant *GetWeakRefReference(const ValueDecl *VD);
 
+  CharUnits
+  computeNonVirtualBaseClassOffset(const CXXRecordDecl *DerivedClass,
+                                   CastExpr::path_const_iterator Start,
+                                   CastExpr::path_const_iterator End);
+
   /// Returns the offset from a derived class to  a class. Returns null if the
   /// offset is 0.
   llvm::Constant *
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
index f182a46..8dffefc 100644
--- a/lib/CodeGen/CodeGenPGO.cpp
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -275,10 +275,9 @@ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
 
   void VisitStmt(const Stmt *S) {
     RecordStmtCount(S);
-    for (Stmt::const_child_range I = S->children(); I; ++I) {
-      if (*I)
-        this->Visit(*I);
-    }
+    for (const Stmt *Child : S->children())
+      if (Child)
+        this->Visit(Child);
   }
 
   void VisitFunctionDecl(const FunctionDecl *D) {
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index e0f926c..a4a8654 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -154,14 +154,16 @@ isSafeToConvert(const RecordDecl *RD, CodeGenTypes &CGT,
 static bool
 isSafeToConvert(QualType T, CodeGenTypes &CGT,
                 llvm::SmallPtrSet<const RecordDecl*, 16> &AlreadyChecked) {
-  T = T.getCanonicalType();
-  
+  // Strip off atomic type sugar.
+  if (const auto *AT = T->getAs<AtomicType>())
+    T = AT->getValueType();
+
   // If this is a record, check it.
-  if (const RecordType *RT = dyn_cast<RecordType>(T))
+  if (const auto *RT = T->getAs<RecordType>())
     return isSafeToConvert(RT->getDecl(), CGT, AlreadyChecked);
-  
+
   // If this is an array, check the elements, which are embedded inline.
-  if (const ArrayType *AT = dyn_cast<ArrayType>(T))
+  if (const auto *AT = CGT.getContext().getAsArrayType(T))
     return isSafeToConvert(AT->getElementType(), CGT, AlreadyChecked);
 
   // Otherwise, there is no concern about transforming this.  We only care about
diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp
index 9ad5d14..eca9159 100644
--- a/lib/CodeGen/CoverageMappingGen.cpp
+++ b/lib/CodeGen/CoverageMappingGen.cpp
@@ -475,7 +475,8 @@ struct CounterCoverageMappingBuilder
   /// files, this adjusts our current region stack and creates the file regions
   /// for the exited file.
   void handleFileExit(SourceLocation NewLoc) {
-    if (SM.isWrittenInSameFile(MostRecentLocation, NewLoc))
+    if (NewLoc.isInvalid() ||
+        SM.isWrittenInSameFile(MostRecentLocation, NewLoc))
       return;
 
     // If NewLoc is not in a file that contains MostRecentLocation, walk up to
@@ -581,10 +582,9 @@ struct CounterCoverageMappingBuilder
   void VisitStmt(const Stmt *S) {
     if (!S->getLocStart().isInvalid())
       extendRegion(S);
-    for (Stmt::const_child_range I = S->children(); I; ++I) {
-      if (*I)
-        this->Visit(*I);
-    }
+    for (const Stmt *Child : S->children())
+      if (Child)
+        this->Visit(Child);
     handleFileExit(getEnd(S));
   }
 
@@ -931,7 +931,8 @@ void CoverageMappingModuleGen::addFunctionMappingRecord(
   if (!FunctionRecordTy) {
     llvm::Type *FunctionRecordTypes[] = {Int8PtrTy, Int32Ty, Int32Ty, Int64Ty};
     FunctionRecordTy =
-        llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes));
+        llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes),
+                              /*isPacked=*/true);
   }
 
   llvm::Constant *FunctionRecordVals[] = {
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index 3f5ad5d..fa86e52 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -94,7 +94,7 @@ public:
 
   llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT) override;
 
-  llvm::Constant *EmitMemberPointer(const CXXMethodDecl *MD) override;
+  llvm::Constant *EmitMemberFunctionPointer(const CXXMethodDecl *MD) override;
   llvm::Constant *EmitMemberDataPointer(const MemberPointerType *MPT,
                                         CharUnits offset) override;
   llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT) override;
@@ -219,7 +219,7 @@ public:
                        bool ReturnAdjustment) override {
     // Allow inlining of thunks by emitting them with available_externally
     // linkage together with vtables when needed.
-    if (ForVTable)
+    if (ForVTable && !Thunk->hasLocalLinkage())
       Thunk->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage);
   }
 
@@ -656,7 +656,8 @@ ItaniumCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT,
   return llvm::ConstantInt::get(CGM.PtrDiffTy, offset.getQuantity());
 }
 
-llvm::Constant *ItaniumCXXABI::EmitMemberPointer(const CXXMethodDecl *MD) {
+llvm::Constant *
+ItaniumCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) {
   return BuildMemberPointer(MD, CharUnits::Zero());
 }
 
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index 679516b..d149df6 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -247,6 +247,50 @@ public:
   getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
                    llvm::GlobalVariable::LinkageTypes Linkage);
 
+  llvm::GlobalVariable *
+  getAddrOfVirtualDisplacementMap(const CXXRecordDecl *SrcRD,
+                                  const CXXRecordDecl *DstRD) {
+    SmallString<256> OutName;
+    llvm::raw_svector_ostream Out(OutName);
+    getMangleContext().mangleCXXVirtualDisplacementMap(SrcRD, DstRD, Out);
+    Out.flush();
+    StringRef MangledName = OutName.str();
+
+    if (auto *VDispMap = CGM.getModule().getNamedGlobal(MangledName))
+      return VDispMap;
+
+    MicrosoftVTableContext &VTContext = CGM.getMicrosoftVTableContext();
+    unsigned NumEntries = 1 + SrcRD->getNumVBases();
+    SmallVector<llvm::Constant *, 4> Map(NumEntries,
+                                         llvm::UndefValue::get(CGM.IntTy));
+    Map[0] = llvm::ConstantInt::get(CGM.IntTy, 0);
+    bool AnyDifferent = false;
+    for (const auto &I : SrcRD->vbases()) {
+      const CXXRecordDecl *VBase = I.getType()->getAsCXXRecordDecl();
+      if (!DstRD->isVirtuallyDerivedFrom(VBase))
+        continue;
+
+      unsigned SrcVBIndex = VTContext.getVBTableIndex(SrcRD, VBase);
+      unsigned DstVBIndex = VTContext.getVBTableIndex(DstRD, VBase);
+      Map[SrcVBIndex] = llvm::ConstantInt::get(CGM.IntTy, DstVBIndex * 4);
+      AnyDifferent |= SrcVBIndex != DstVBIndex;
+    }
+    // This map would be useless, don't use it.
+    if (!AnyDifferent)
+      return nullptr;
+
+    llvm::ArrayType *VDispMapTy = llvm::ArrayType::get(CGM.IntTy, Map.size());
+    llvm::Constant *Init = llvm::ConstantArray::get(VDispMapTy, Map);
+    llvm::GlobalValue::LinkageTypes Linkage =
+        SrcRD->isExternallyVisible() && DstRD->isExternallyVisible()
+            ? llvm::GlobalValue::LinkOnceODRLinkage
+            : llvm::GlobalValue::InternalLinkage;
+    auto *VDispMap = new llvm::GlobalVariable(
+        CGM.getModule(), VDispMapTy, /*Constant=*/true, Linkage,
+        /*Initializer=*/Init, MangledName);
+    return VDispMap;
+  }
+
   void emitVBTableDefinition(const VPtrInfo &VBT, const CXXRecordDecl *RD,
                              llvm::GlobalVariable *GV) const;
 
@@ -502,10 +546,6 @@ private:
                                         CharUnits NonVirtualBaseAdjustment,
                                         unsigned VBTableIndex);
 
-  llvm::Constant *BuildMemberPointer(const CXXRecordDecl *RD,
-                                     const CXXMethodDecl *MD,
-                                     CharUnits NonVirtualBaseAdjustment);
-
   bool MemberPointerConstantIsNull(const MemberPointerType *MPT,
                                    llvm::Constant *MP);
 
@@ -545,7 +585,7 @@ public:
 
   llvm::Constant *EmitMemberDataPointer(const MemberPointerType *MPT,
                                         CharUnits offset) override;
-  llvm::Constant *EmitMemberPointer(const CXXMethodDecl *MD) override;
+  llvm::Constant *EmitMemberFunctionPointer(const CXXMethodDecl *MD) override;
   llvm::Constant *EmitMemberPointer(const APValue &MP, QualType MPT) override;
 
   llvm::Value *EmitMemberPointerComparison(CodeGenFunction &CGF,
@@ -563,6 +603,12 @@ public:
                                llvm::Value *Base, llvm::Value *MemPtr,
                                const MemberPointerType *MPT) override;
 
+  llvm::Value *EmitNonNullMemberPointerConversion(
+      const MemberPointerType *SrcTy, const MemberPointerType *DstTy,
+      CastKind CK, CastExpr::path_const_iterator PathBegin,
+      CastExpr::path_const_iterator PathEnd, llvm::Value *Src,
+      CGBuilderTy &Builder);
+
   llvm::Value *EmitMemberPointerConversion(CodeGenFunction &CGF,
                                            const CastExpr *E,
                                            llvm::Value *Src) override;
@@ -570,6 +616,11 @@ public:
   llvm::Constant *EmitMemberPointerConversion(const CastExpr *E,
                                               llvm::Constant *Src) override;
 
+  llvm::Constant *EmitMemberPointerConversion(
+      const MemberPointerType *SrcTy, const MemberPointerType *DstTy,
+      CastKind CK, CastExpr::path_const_iterator PathBegin,
+      CastExpr::path_const_iterator PathEnd, llvm::Constant *Src);
+
   llvm::Value *
   EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF, const Expr *E,
                                   llvm::Value *&This, llvm::Value *MemPtr,
@@ -2457,7 +2508,7 @@ MicrosoftCXXABI::EmitFullMemberPointer(llvm::Constant *FirstField,
 
   if (MSInheritanceAttr::hasVBPtrOffsetField(Inheritance)) {
     CharUnits Offs = CharUnits::Zero();
-    if (VBTableIndex && RD->getNumVBases())
+    if (VBTableIndex)
       Offs = getContext().getASTRecordLayout(RD).getVBPtrOffset();
     fields.push_back(llvm::ConstantInt::get(CGM.IntTy, Offs.getQuantity()));
   }
@@ -2473,43 +2524,76 @@ llvm::Constant *
 MicrosoftCXXABI::EmitMemberDataPointer(const MemberPointerType *MPT,
                                        CharUnits offset) {
   const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
+  if (RD->getMSInheritanceModel() ==
+      MSInheritanceAttr::Keyword_virtual_inheritance)
+    offset -= getContext().getOffsetOfBaseWithVBPtr(RD);
   llvm::Constant *FirstField =
     llvm::ConstantInt::get(CGM.IntTy, offset.getQuantity());
   return EmitFullMemberPointer(FirstField, /*IsMemberFunction=*/false, RD,
                                CharUnits::Zero(), /*VBTableIndex=*/0);
 }
 
-llvm::Constant *MicrosoftCXXABI::EmitMemberPointer(const CXXMethodDecl *MD) {
-  return BuildMemberPointer(MD->getParent(), MD, CharUnits::Zero());
-}
-
 llvm::Constant *MicrosoftCXXABI::EmitMemberPointer(const APValue &MP,
                                                    QualType MPType) {
-  const MemberPointerType *MPT = MPType->castAs<MemberPointerType>();
+  const MemberPointerType *DstTy = MPType->castAs<MemberPointerType>();
   const ValueDecl *MPD = MP.getMemberPointerDecl();
   if (!MPD)
-    return EmitNullMemberPointer(MPT);
+    return EmitNullMemberPointer(DstTy);
 
-  CharUnits ThisAdjustment = getMemberPointerPathAdjustment(MP);
+  ASTContext &Ctx = getContext();
+  ArrayRef<const CXXRecordDecl *> MemberPointerPath = MP.getMemberPointerPath();
 
-  // FIXME PR15713: Support virtual inheritance paths.
+  llvm::Constant *C;
+  if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MPD)) {
+    C = EmitMemberFunctionPointer(MD);
+  } else {
+    CharUnits FieldOffset = Ctx.toCharUnitsFromBits(Ctx.getFieldOffset(MPD));
+    C = EmitMemberDataPointer(DstTy, FieldOffset);
+  }
 
-  if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MPD))
-    return BuildMemberPointer(MPT->getMostRecentCXXRecordDecl(), MD,
-                              ThisAdjustment);
+  if (!MemberPointerPath.empty()) {
+    const CXXRecordDecl *SrcRD = cast<CXXRecordDecl>(MPD->getDeclContext());
+    const Type *SrcRecTy = Ctx.getTypeDeclType(SrcRD).getTypePtr();
+    const MemberPointerType *SrcTy =
+        Ctx.getMemberPointerType(DstTy->getPointeeType(), SrcRecTy)
+            ->castAs<MemberPointerType>();
+
+    bool DerivedMember = MP.isMemberPointerToDerivedMember();
+    SmallVector<const CXXBaseSpecifier *, 4> DerivedToBasePath;
+    const CXXRecordDecl *PrevRD = SrcRD;
+    for (const CXXRecordDecl *PathElem : MemberPointerPath) {
+      const CXXRecordDecl *Base = nullptr;
+      const CXXRecordDecl *Derived = nullptr;
+      if (DerivedMember) {
+        Base = PathElem;
+        Derived = PrevRD;
+      } else {
+        Base = PrevRD;
+        Derived = PathElem;
+      }
+      for (const CXXBaseSpecifier &BS : Derived->bases())
+        if (BS.getType()->getAsCXXRecordDecl()->getCanonicalDecl() ==
+            Base->getCanonicalDecl())
+          DerivedToBasePath.push_back(&BS);
+      PrevRD = PathElem;
+    }
+    assert(DerivedToBasePath.size() == MemberPointerPath.size());
 
-  CharUnits FieldOffset =
-    getContext().toCharUnitsFromBits(getContext().getFieldOffset(MPD));
-  return EmitMemberDataPointer(MPT, ThisAdjustment + FieldOffset);
+    CastKind CK = DerivedMember ? CK_DerivedToBaseMemberPointer
+                                : CK_BaseToDerivedMemberPointer;
+    C = EmitMemberPointerConversion(SrcTy, DstTy, CK, DerivedToBasePath.begin(),
+                                    DerivedToBasePath.end(), C);
+  }
+  return C;
 }
 
 llvm::Constant *
-MicrosoftCXXABI::BuildMemberPointer(const CXXRecordDecl *RD,
-                                    const CXXMethodDecl *MD,
-                                    CharUnits NonVirtualBaseAdjustment) {
+MicrosoftCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) {
   assert(MD->isInstance() && "Member function must not be static!");
+
   MD = MD->getCanonicalDecl();
-  RD = RD->getMostRecentDecl();
+  CharUnits NonVirtualBaseAdjustment = CharUnits::Zero();
+  const CXXRecordDecl *RD = MD->getParent()->getMostRecentDecl();
   CodeGenTypes &Types = CGM.getTypes();
 
   unsigned VBTableIndex = 0;
@@ -2527,20 +2611,24 @@ MicrosoftCXXABI::BuildMemberPointer(const CXXRecordDecl *RD,
       Ty = CGM.PtrDiffTy;
     }
     FirstField = CGM.GetAddrOfFunction(MD, Ty);
-    FirstField = llvm::ConstantExpr::getBitCast(FirstField, CGM.VoidPtrTy);
   } else {
     auto &VTableContext = CGM.getMicrosoftVTableContext();
     MicrosoftVTableContext::MethodVFTableLocation ML =
         VTableContext.getMethodVFTableLocation(MD);
-    llvm::Function *Thunk = EmitVirtualMemPtrThunk(MD, ML);
-    FirstField = llvm::ConstantExpr::getBitCast(Thunk, CGM.VoidPtrTy);
+    FirstField = EmitVirtualMemPtrThunk(MD, ML);
     // Include the vfptr adjustment if the method is in a non-primary vftable.
     NonVirtualBaseAdjustment += ML.VFPtrOffset;
     if (ML.VBase)
       VBTableIndex = VTableContext.getVBTableIndex(RD, ML.VBase) * 4;
   }
 
+  if (VBTableIndex == 0 &&
+      RD->getMSInheritanceModel() ==
+          MSInheritanceAttr::Keyword_virtual_inheritance)
+    NonVirtualBaseAdjustment -= getContext().getOffsetOfBaseWithVBPtr(RD);
+
   // The rest of the fields are common with data member pointers.
+  FirstField = llvm::ConstantExpr::getBitCast(FirstField, CGM.VoidPtrTy);
   return EmitFullMemberPointer(FirstField, /*IsMemberFunction=*/true, RD,
                                NonVirtualBaseAdjustment, VBTableIndex);
 }
@@ -2798,11 +2886,6 @@ llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress(
   return Builder.CreateBitCast(Addr, PType);
 }
 
-static MSInheritanceAttr::Spelling
-getInheritanceFromMemptr(const MemberPointerType *MPT) {
-  return MPT->getMostRecentCXXRecordDecl()->getMSInheritanceModel();
-}
-
 llvm::Value *
 MicrosoftCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF,
                                              const CastExpr *E,
@@ -2854,12 +2937,37 @@ MicrosoftCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF,
   Builder.CreateCondBr(IsNotNull, ConvertBB, ContinueBB);
   CGF.EmitBlock(ConvertBB);
 
+  llvm::Value *Dst = EmitNonNullMemberPointerConversion(
+      SrcTy, DstTy, E->getCastKind(), E->path_begin(), E->path_end(), Src,
+      Builder);
+
+  Builder.CreateBr(ContinueBB);
+
+  // In the continuation, choose between DstNull and Dst.
+  CGF.EmitBlock(ContinueBB);
+  llvm::PHINode *Phi = Builder.CreatePHI(DstNull->getType(), 2, "memptr.converted");
+  Phi->addIncoming(DstNull, OriginalBB);
+  Phi->addIncoming(Dst, ConvertBB);
+  return Phi;
+}
+
+llvm::Value *MicrosoftCXXABI::EmitNonNullMemberPointerConversion(
+    const MemberPointerType *SrcTy, const MemberPointerType *DstTy, CastKind CK,
+    CastExpr::path_const_iterator PathBegin,
+    CastExpr::path_const_iterator PathEnd, llvm::Value *Src,
+    CGBuilderTy &Builder) {
+  const CXXRecordDecl *SrcRD = SrcTy->getMostRecentCXXRecordDecl();
+  const CXXRecordDecl *DstRD = DstTy->getMostRecentCXXRecordDecl();
+  MSInheritanceAttr::Spelling SrcInheritance = SrcRD->getMSInheritanceModel();
+  MSInheritanceAttr::Spelling DstInheritance = DstRD->getMSInheritanceModel();
+  bool IsFunc = SrcTy->isMemberFunctionPointer();
+  bool IsConstant = isa<llvm::Constant>(Src);
+
   // Decompose src.
   llvm::Value *FirstField = Src;
-  llvm::Value *NonVirtualBaseAdjustment = nullptr;
-  llvm::Value *VirtualBaseAdjustmentOffset = nullptr;
-  llvm::Value *VBPtrOffset = nullptr;
-  MSInheritanceAttr::Spelling SrcInheritance = SrcRD->getMSInheritanceModel();
+  llvm::Value *NonVirtualBaseAdjustment = getZeroInt();
+  llvm::Value *VirtualBaseAdjustmentOffset = getZeroInt();
+  llvm::Value *VBPtrOffset = getZeroInt();
   if (!MSInheritanceAttr::hasOnlyOneField(IsFunc, SrcInheritance)) {
     // We need to extract values.
     unsigned I = 0;
@@ -2872,59 +2980,138 @@ MicrosoftCXXABI::EmitMemberPointerConversion(CodeGenFunction &CGF,
       VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(Src, I++);
   }
 
+  bool IsDerivedToBase = (CK == CK_DerivedToBaseMemberPointer);
+  const MemberPointerType *DerivedTy = IsDerivedToBase ? SrcTy : DstTy;
+  const CXXRecordDecl *DerivedClass = DerivedTy->getMostRecentCXXRecordDecl();
+
   // For data pointers, we adjust the field offset directly.  For functions, we
   // have a separate field.
-  llvm::Constant *Adj = getMemberPointerAdjustment(E);
-  if (Adj) {
-    Adj = llvm::ConstantExpr::getTruncOrBitCast(Adj, CGM.IntTy);
-    llvm::Value *&NVAdjustField = IsFunc ? NonVirtualBaseAdjustment : FirstField;
-    bool isDerivedToBase = (E->getCastKind() == CK_DerivedToBaseMemberPointer);
-    if (!NVAdjustField)  // If this field didn't exist in src, it's zero.
-      NVAdjustField = getZeroInt();
-    if (isDerivedToBase)
-      NVAdjustField = Builder.CreateNSWSub(NVAdjustField, Adj, "adj");
-    else
-      NVAdjustField = Builder.CreateNSWAdd(NVAdjustField, Adj, "adj");
+  llvm::Value *&NVAdjustField = IsFunc ? NonVirtualBaseAdjustment : FirstField;
+
+  // The virtual inheritance model has a quirk: the virtual base table is always
+  // referenced when dereferencing a member pointer even if the member pointer
+  // is non-virtual.  This is accounted for by adjusting the non-virtual offset
+  // to point backwards to the top of the MDC from the first VBase.  Undo this
+  // adjustment to normalize the member pointer.
+  llvm::Value *SrcVBIndexEqZero =
+      Builder.CreateICmpEQ(VirtualBaseAdjustmentOffset, getZeroInt());
+  if (SrcInheritance == MSInheritanceAttr::Keyword_virtual_inheritance) {
+    if (int64_t SrcOffsetToFirstVBase =
+            getContext().getOffsetOfBaseWithVBPtr(SrcRD).getQuantity()) {
+      llvm::Value *UndoSrcAdjustment = Builder.CreateSelect(
+          SrcVBIndexEqZero,
+          llvm::ConstantInt::get(CGM.IntTy, SrcOffsetToFirstVBase),
+          getZeroInt());
+      NVAdjustField = Builder.CreateNSWAdd(NVAdjustField, UndoSrcAdjustment);
+    }
   }
 
-  // FIXME PR15713: Support conversions through virtually derived classes.
+  // A non-zero vbindex implies that we are dealing with a source member in a
+  // floating virtual base in addition to some non-virtual offset.  If the
+  // vbindex is zero, we are dealing with a source that exists in a non-virtual,
+  // fixed, base.  The difference between these two cases is that the vbindex +
+  // nvoffset *always* point to the member regardless of what context they are
+  // evaluated in so long as the vbindex is adjusted.  A member inside a fixed
+  // base requires explicit nv adjustment.
+  llvm::Constant *BaseClassOffset = llvm::ConstantInt::get(
+      CGM.IntTy,
+      CGM.computeNonVirtualBaseClassOffset(DerivedClass, PathBegin, PathEnd)
+          .getQuantity());
+
+  llvm::Value *NVDisp;
+  if (IsDerivedToBase)
+    NVDisp = Builder.CreateNSWSub(NVAdjustField, BaseClassOffset, "adj");
+  else
+    NVDisp = Builder.CreateNSWAdd(NVAdjustField, BaseClassOffset, "adj");
+
+  NVAdjustField = Builder.CreateSelect(SrcVBIndexEqZero, NVDisp, getZeroInt());
+
+  // Update the vbindex to an appropriate value in the destination because
+  // SrcRD's vbtable might not be a strict prefix of the one in DstRD.
+  llvm::Value *DstVBIndexEqZero = SrcVBIndexEqZero;
+  if (MSInheritanceAttr::hasVBTableOffsetField(DstInheritance) &&
+      MSInheritanceAttr::hasVBTableOffsetField(SrcInheritance)) {
+    if (llvm::GlobalVariable *VDispMap =
+            getAddrOfVirtualDisplacementMap(SrcRD, DstRD)) {
+      llvm::Value *VBIndex = Builder.CreateExactUDiv(
+          VirtualBaseAdjustmentOffset, llvm::ConstantInt::get(CGM.IntTy, 4));
+      if (IsConstant) {
+        llvm::Constant *Mapping = VDispMap->getInitializer();
+        VirtualBaseAdjustmentOffset =
+            Mapping->getAggregateElement(cast<llvm::Constant>(VBIndex));
+      } else {
+        llvm::Value *Idxs[] = {getZeroInt(), VBIndex};
+        VirtualBaseAdjustmentOffset =
+            Builder.CreateLoad(Builder.CreateInBoundsGEP(VDispMap, Idxs));
+      }
+
+      DstVBIndexEqZero =
+          Builder.CreateICmpEQ(VirtualBaseAdjustmentOffset, getZeroInt());
+    }
+  }
+
+  // Set the VBPtrOffset to zero if the vbindex is zero.  Otherwise, initialize
+  // it to the offset of the vbptr.
+  if (MSInheritanceAttr::hasVBPtrOffsetField(DstInheritance)) {
+    llvm::Value *DstVBPtrOffset = llvm::ConstantInt::get(
+        CGM.IntTy,
+        getContext().getASTRecordLayout(DstRD).getVBPtrOffset().getQuantity());
+    VBPtrOffset =
+        Builder.CreateSelect(DstVBIndexEqZero, getZeroInt(), DstVBPtrOffset);
+  }
+
+  // Likewise, apply a similar adjustment so that dereferencing the member
+  // pointer correctly accounts for the distance between the start of the first
+  // virtual base and the top of the MDC.
+  if (DstInheritance == MSInheritanceAttr::Keyword_virtual_inheritance) {
+    if (int64_t DstOffsetToFirstVBase =
+            getContext().getOffsetOfBaseWithVBPtr(DstRD).getQuantity()) {
+      llvm::Value *DoDstAdjustment = Builder.CreateSelect(
+          DstVBIndexEqZero,
+          llvm::ConstantInt::get(CGM.IntTy, DstOffsetToFirstVBase),
+          getZeroInt());
+      NVAdjustField = Builder.CreateNSWSub(NVAdjustField, DoDstAdjustment);
+    }
+  }
 
   // Recompose dst from the null struct and the adjusted fields from src.
-  MSInheritanceAttr::Spelling DstInheritance = DstRD->getMSInheritanceModel();
   llvm::Value *Dst;
   if (MSInheritanceAttr::hasOnlyOneField(IsFunc, DstInheritance)) {
     Dst = FirstField;
   } else {
-    Dst = llvm::UndefValue::get(DstNull->getType());
+    Dst = llvm::UndefValue::get(ConvertMemberPointerType(DstTy));
     unsigned Idx = 0;
     Dst = Builder.CreateInsertValue(Dst, FirstField, Idx++);
     if (MSInheritanceAttr::hasNVOffsetField(IsFunc, DstInheritance))
-      Dst = Builder.CreateInsertValue(
-        Dst, getValueOrZeroInt(NonVirtualBaseAdjustment), Idx++);
+      Dst = Builder.CreateInsertValue(Dst, NonVirtualBaseAdjustment, Idx++);
     if (MSInheritanceAttr::hasVBPtrOffsetField(DstInheritance))
-      Dst = Builder.CreateInsertValue(
-        Dst, getValueOrZeroInt(VBPtrOffset), Idx++);
+      Dst = Builder.CreateInsertValue(Dst, VBPtrOffset, Idx++);
     if (MSInheritanceAttr::hasVBTableOffsetField(DstInheritance))
-      Dst = Builder.CreateInsertValue(
-        Dst, getValueOrZeroInt(VirtualBaseAdjustmentOffset), Idx++);
+      Dst = Builder.CreateInsertValue(Dst, VirtualBaseAdjustmentOffset, Idx++);
   }
-  Builder.CreateBr(ContinueBB);
-
-  // In the continuation, choose between DstNull and Dst.
-  CGF.EmitBlock(ContinueBB);
-  llvm::PHINode *Phi = Builder.CreatePHI(DstNull->getType(), 2, "memptr.converted");
-  Phi->addIncoming(DstNull, OriginalBB);
-  Phi->addIncoming(Dst, ConvertBB);
-  return Phi;
+  return Dst;
 }
 
 llvm::Constant *
 MicrosoftCXXABI::EmitMemberPointerConversion(const CastExpr *E,
                                              llvm::Constant *Src) {
   const MemberPointerType *SrcTy =
-    E->getSubExpr()->getType()->castAs<MemberPointerType>();
+      E->getSubExpr()->getType()->castAs<MemberPointerType>();
   const MemberPointerType *DstTy = E->getType()->castAs<MemberPointerType>();
 
+  CastKind CK = E->getCastKind();
+
+  return EmitMemberPointerConversion(SrcTy, DstTy, CK, E->path_begin(),
+                                     E->path_end(), Src);
+}
+
+llvm::Constant *MicrosoftCXXABI::EmitMemberPointerConversion(
+    const MemberPointerType *SrcTy, const MemberPointerType *DstTy, CastKind CK,
+    CastExpr::path_const_iterator PathBegin,
+    CastExpr::path_const_iterator PathEnd, llvm::Constant *Src) {
+  assert(CK == CK_DerivedToBaseMemberPointer ||
+         CK == CK_BaseToDerivedMemberPointer ||
+         CK == CK_ReinterpretMemberPointer);
   // If src is null, emit a new null for dst.  We can't return src because dst
   // might have a new representation.
   if (MemberPointerConstantIsNull(SrcTy, Src))
@@ -2933,61 +3120,14 @@ MicrosoftCXXABI::EmitMemberPointerConversion(const CastExpr *E,
   // We don't need to do anything for reinterpret_casts of non-null member
   // pointers.  We should only get here when the two type representations have
   // the same size.
-  if (E->getCastKind() == CK_ReinterpretMemberPointer)
+  if (CK == CK_ReinterpretMemberPointer)
     return Src;
 
-  MSInheritanceAttr::Spelling SrcInheritance = getInheritanceFromMemptr(SrcTy);
-  MSInheritanceAttr::Spelling DstInheritance = getInheritanceFromMemptr(DstTy);
-
-  // Decompose src.
-  llvm::Constant *FirstField = Src;
-  llvm::Constant *NonVirtualBaseAdjustment = nullptr;
-  llvm::Constant *VirtualBaseAdjustmentOffset = nullptr;
-  llvm::Constant *VBPtrOffset = nullptr;
-  bool IsFunc = SrcTy->isMemberFunctionPointer();
-  if (!MSInheritanceAttr::hasOnlyOneField(IsFunc, SrcInheritance)) {
-    // We need to extract values.
-    unsigned I = 0;
-    FirstField = Src->getAggregateElement(I++);
-    if (MSInheritanceAttr::hasNVOffsetField(IsFunc, SrcInheritance))
-      NonVirtualBaseAdjustment = Src->getAggregateElement(I++);
-    if (MSInheritanceAttr::hasVBPtrOffsetField(SrcInheritance))
-      VBPtrOffset = Src->getAggregateElement(I++);
-    if (MSInheritanceAttr::hasVBTableOffsetField(SrcInheritance))
-      VirtualBaseAdjustmentOffset = Src->getAggregateElement(I++);
-  }
-
-  // For data pointers, we adjust the field offset directly.  For functions, we
-  // have a separate field.
-  llvm::Constant *Adj = getMemberPointerAdjustment(E);
-  if (Adj) {
-    Adj = llvm::ConstantExpr::getTruncOrBitCast(Adj, CGM.IntTy);
-    llvm::Constant *&NVAdjustField =
-      IsFunc ? NonVirtualBaseAdjustment : FirstField;
-    bool IsDerivedToBase = (E->getCastKind() == CK_DerivedToBaseMemberPointer);
-    if (!NVAdjustField)  // If this field didn't exist in src, it's zero.
-      NVAdjustField = getZeroInt();
-    if (IsDerivedToBase)
-      NVAdjustField = llvm::ConstantExpr::getNSWSub(NVAdjustField, Adj);
-    else
-      NVAdjustField = llvm::ConstantExpr::getNSWAdd(NVAdjustField, Adj);
-  }
-
-  // FIXME PR15713: Support conversions through virtually derived classes.
+  CGBuilderTy Builder(CGM.getLLVMContext());
+  auto *Dst = cast<llvm::Constant>(EmitNonNullMemberPointerConversion(
+      SrcTy, DstTy, CK, PathBegin, PathEnd, Src, Builder));
 
-  // Recompose dst from the null struct and the adjusted fields from src.
-  if (MSInheritanceAttr::hasOnlyOneField(IsFunc, DstInheritance))
-    return FirstField;
-
-  llvm::SmallVector<llvm::Constant *, 4> Fields;
-  Fields.push_back(FirstField);
-  if (MSInheritanceAttr::hasNVOffsetField(IsFunc, DstInheritance))
-    Fields.push_back(getConstantOrZeroInt(NonVirtualBaseAdjustment));
-  if (MSInheritanceAttr::hasVBPtrOffsetField(DstInheritance))
-    Fields.push_back(getConstantOrZeroInt(VBPtrOffset));
-  if (MSInheritanceAttr::hasVBTableOffsetField(DstInheritance))
-    Fields.push_back(getConstantOrZeroInt(VirtualBaseAdjustmentOffset));
-  return llvm::ConstantStruct::getAnon(Fields);
+  return Dst;
 }
 
 llvm::Value *MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer(
@@ -3592,6 +3732,8 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
       ThunkTy, getLinkageForRTTI(RecordTy), ThunkName.str(), &CGM.getModule());
   ThunkFn->setCallingConv(static_cast<llvm::CallingConv::ID>(
       FnInfo.getEffectiveCallingConvention()));
+  if (ThunkFn->isWeakForLinker())
+    ThunkFn->setComdat(CGM.getModule().getOrInsertComdat(ThunkFn->getName()));
   bool IsCopy = CT == Ctor_CopyingClosure;
 
   // Start codegen.
diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp
index 25e5740..def56a9 100644
--- a/lib/CodeGen/ModuleBuilder.cpp
+++ b/lib/CodeGen/ModuleBuilder.cpp
@@ -32,6 +32,8 @@ namespace {
     DiagnosticsEngine &Diags;
     std::unique_ptr<const llvm::DataLayout> TD;
     ASTContext *Ctx;
+    const HeaderSearchOptions &HeaderSearchOpts; // Only used for debug info.
+    const PreprocessorOptions &PreprocessorOpts; // Only used for debug info.
     const CodeGenOptions CodeGenOpts;  // Intentionally copied in.
 
     unsigned HandlingTopLevelDecls;
@@ -56,12 +58,15 @@ namespace {
     SmallVector<CXXMethodDecl *, 8> DeferredInlineMethodDefinitions;
 
   public:
-    CodeGeneratorImpl(DiagnosticsEngine &diags, const std::string& ModuleName,
-                      const CodeGenOptions &CGO, llvm::LLVMContext& C,
+    CodeGeneratorImpl(DiagnosticsEngine &diags, const std::string &ModuleName,
+                      const HeaderSearchOptions &HSO,
+                      const PreprocessorOptions &PPO, const CodeGenOptions &CGO,
+                      llvm::LLVMContext &C,
                       CoverageSourceInfo *CoverageInfo = nullptr)
-      : Diags(diags), Ctx(nullptr), CodeGenOpts(CGO), HandlingTopLevelDecls(0),
-        CoverageInfo(CoverageInfo),
-        M(new llvm::Module(ModuleName, C)) {}
+        : Diags(diags), Ctx(nullptr), HeaderSearchOpts(HSO),
+          PreprocessorOpts(PPO), CodeGenOpts(CGO), HandlingTopLevelDecls(0),
+          CoverageInfo(CoverageInfo),
+          M(new llvm::Module(ModuleName, C)) {}
 
     ~CodeGeneratorImpl() override {
       // There should normally not be any leftover inline method definitions.
@@ -97,7 +102,10 @@ namespace {
       M->setDataLayout(Ctx->getTargetInfo().getTargetDescription());
       TD.reset(
           new llvm::DataLayout(Ctx->getTargetInfo().getTargetDescription()));
-      Builder.reset(new CodeGen::CodeGenModule(Context, CodeGenOpts, *M, *TD,
+      Builder.reset(new CodeGen::CodeGenModule(Context,
+                                               HeaderSearchOpts,
+                                               PreprocessorOpts,
+                                               CodeGenOpts, *M, *TD,
                                                Diags, CoverageInfo));
 
       for (size_t i = 0, e = CodeGenOpts.DependentLibraries.size(); i < e; ++i)
@@ -236,10 +244,11 @@ namespace {
 
 void CodeGenerator::anchor() { }
 
-CodeGenerator *clang::CreateLLVMCodeGen(DiagnosticsEngine &Diags,
-                                        const std::string& ModuleName,
-                                        const CodeGenOptions &CGO,
-                                        llvm::LLVMContext& C,
-                                        CoverageSourceInfo *CoverageInfo) {
-  return new CodeGeneratorImpl(Diags, ModuleName, CGO, C, CoverageInfo);
+CodeGenerator *clang::CreateLLVMCodeGen(
+    DiagnosticsEngine &Diags, const std::string &ModuleName,
+    const HeaderSearchOptions &HeaderSearchOpts,
+    const PreprocessorOptions &PreprocessorOpts, const CodeGenOptions &CGO,
+    llvm::LLVMContext &C, CoverageSourceInfo *CoverageInfo) {
+  return new CodeGeneratorImpl(Diags, ModuleName, HeaderSearchOpts,
+                               PreprocessorOpts, CGO, C, CoverageInfo);
 }
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index e77c124..e5ba200 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -1391,6 +1391,26 @@ bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable(
 
 
 namespace {
+/// The AVX ABI level for X86 targets.
+enum class X86AVXABILevel {
+  None,
+  AVX,
+  AVX512
+};
+
+/// \p returns the size in bits of the largest (native) vector for \p AVXLevel.
+static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
+  switch (AVXLevel) {
+  case X86AVXABILevel::AVX512:
+    return 512;
+  case X86AVXABILevel::AVX:
+    return 256;
+  case X86AVXABILevel::None:
+    return 128;
+  }
+  llvm_unreachable("Unknown AVXLevel");
+}
+
 /// X86_64ABIInfo - The X86_64 ABI information.
 class X86_64ABIInfo : public ABIInfo {
   enum Class {
@@ -1496,13 +1516,14 @@ class X86_64ABIInfo : public ABIInfo {
     return !getTarget().getTriple().isOSDarwin();
   }
 
+  X86AVXABILevel AVXLevel;
   // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
   // 64-bit hardware.
   bool Has64BitPointers;
 
 public:
-  X86_64ABIInfo(CodeGen::CodeGenTypes &CGT) :
-      ABIInfo(CGT),
+  X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) :
+      ABIInfo(CGT), AVXLevel(AVXLevel),
       Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {
   }
 
@@ -1527,10 +1548,6 @@ public:
   bool has64BitPointers() const {
     return Has64BitPointers;
   }
-
-  bool hasAVX() const {
-    return getTarget().getABI() == "avx";
-  }
 };
 
 /// WinX86_64ABIInfo - The Windows X86_64 ABI information.
@@ -1561,8 +1578,8 @@ public:
 
 class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
-  X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
-      : TargetCodeGenInfo(new X86_64ABIInfo(CGT)) {}
+  X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
+      : TargetCodeGenInfo(new X86_64ABIInfo(CGT, AVXLevel)) {}
 
   const X86_64ABIInfo &getABIInfo() const {
     return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
@@ -1628,16 +1645,12 @@ public:
             ('T' << 24);
     return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
   }
-
-  unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
-    return getABIInfo().hasAVX() ? 32 : 16;
-  }
 };
 
 class PS4TargetCodeGenInfo : public X86_64TargetCodeGenInfo {
 public:
-  PS4TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
-    : X86_64TargetCodeGenInfo(CGT) {}
+  PS4TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
+    : X86_64TargetCodeGenInfo(CGT, AVXLevel) {}
 
   void getDependentLibraryOption(llvm::StringRef Lib,
                                  llvm::SmallString<24> &Opt) const override {
@@ -1703,11 +1716,10 @@ void WinX86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
 }
 
 class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
-  bool hasAVX() const { return getABIInfo().getTarget().getABI() == "avx"; }
-
 public:
-  WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
-    : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {}
+  WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
+                             X86AVXABILevel AVXLevel)
+      : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
@@ -1737,10 +1749,6 @@ public:
                                llvm::SmallString<32> &Opt) const override {
     Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
   }
-
-  unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
-    return hasAVX() ? 32 : 16;
-  }
 };
 
 void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D,
@@ -1928,7 +1936,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
       // split.
       if (OffsetBase && OffsetBase != 64)
         Hi = Lo;
-    } else if (Size == 128 || (hasAVX() && isNamedArg && Size == 256)) {
+    } else if (Size == 128 ||
+               (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) {
       // Arguments of 256-bits are split into four eightbyte chunks. The
       // least significant one belongs to class SSE and all the others to class
       // SSEUP. The original Lo and Hi design considers that types can't be
@@ -1940,6 +1949,9 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
       // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in
       // registers if they are "named", i.e. not part of the "..." of a
       // variadic function.
+      //
+      // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are
+      // split into eight eightbyte chunks, one SSE and seven SSEUP.
       Lo = SSE;
       Hi = SSEUp;
     }
@@ -2150,7 +2162,7 @@ ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const {
 bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
   if (const VectorType *VecTy = Ty->getAs<VectorType>()) {
     uint64_t Size = getContext().getTypeSize(VecTy);
-    unsigned LargestVector = hasAVX() ? 256 : 128;
+    unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
     if (Size <= 64 || Size > LargestVector)
       return true;
   }
@@ -2475,13 +2487,16 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi,
   // of the second element because it might make us access off the end of the
   // struct.
   if (HiStart != 8) {
-    // There are only two sorts of types the ABI generation code can produce for
-    // the low part of a pair that aren't 8 bytes in size: float or i8/i16/i32.
+    // There are usually two sorts of types the ABI generation code can produce
+    // for the low part of a pair that aren't 8 bytes in size: float or
+    // i8/i16/i32.  This can also include pointers when they are 32-bit (X32 and
+    // NaCl).
     // Promote these to a larger type.
     if (Lo->isFloatTy())
       Lo = llvm::Type::getDoubleTy(Lo->getContext());
     else {
-      assert(Lo->isIntegerTy() && "Invalid/unknown lo type");
+      assert((Lo->isIntegerTy() || Lo->isPointerTy())
+             && "Invalid/unknown lo type");
       Lo = llvm::Type::getInt64Ty(Lo->getContext());
     }
   }
@@ -3145,10 +3160,6 @@ public:
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override;
-
-  unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
-    return 16; // Natural alignment for Altivec vectors.
-  }
 };
 
 }
@@ -3388,13 +3399,11 @@ public:
 };
 
 class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
-  bool HasQPX;
 
 public:
   PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT,
                                PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX)
-    : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX)),
-      HasQPX(HasQPX) {}
+      : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX)) {}
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     // This is recovered from gcc output.
@@ -3403,15 +3412,6 @@ public:
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override;
-
-  unsigned getOpenMPSimdDefaultAlignment(QualType QT) const override {
-    if (HasQPX)
-      if (const PointerType *PT = QT->getAs<PointerType>())
-        if (PT->getPointeeType()->isSpecificBuiltinType(BuiltinType::Double))
-          return 32; // Natural alignment for QPX doubles.
-
-    return 16; // Natural alignment for Altivec and VSX vectors.
-  }
 };
 
 class PPC64TargetCodeGenInfo : public DefaultTargetCodeGenInfo {
@@ -3425,10 +3425,6 @@ public:
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override;
-
-  unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
-    return 16; // Natural alignment for Altivec vectors.
-  }
 };
 
 }
@@ -7194,13 +7190,21 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
   }
 
   case llvm::Triple::x86_64: {
+    StringRef ABI = getTarget().getABI();
+    X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512 :
+                               ABI == "avx" ? X86AVXABILevel::AVX :
+                               X86AVXABILevel::None);
+
     switch (Triple.getOS()) {
     case llvm::Triple::Win32:
-      return *(TheTargetCodeGenInfo = new WinX86_64TargetCodeGenInfo(Types));
+      return *(TheTargetCodeGenInfo =
+                   new WinX86_64TargetCodeGenInfo(Types, AVXLevel));
     case llvm::Triple::PS4:
-      return *(TheTargetCodeGenInfo = new PS4TargetCodeGenInfo(Types));
+      return *(TheTargetCodeGenInfo =
+                   new PS4TargetCodeGenInfo(Types, AVXLevel));
     default:
-      return *(TheTargetCodeGenInfo = new X86_64TargetCodeGenInfo(Types));
+      return *(TheTargetCodeGenInfo =
+                   new X86_64TargetCodeGenInfo(Types, AVXLevel));
     }
   }
   case llvm::Triple::hexagon:
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h
index bf63265..95275d5 100644
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -218,13 +218,6 @@ public:
   virtual void getDetectMismatchOption(llvm::StringRef Name,
                                        llvm::StringRef Value,
                                        llvm::SmallString<32> &Opt) const {}
-
-  /// Gets the target-specific default alignment used when an 'aligned' clause
-  /// is used with a 'simd' OpenMP directive without specifying a specific
-  /// alignment.
-  virtual unsigned getOpenMPSimdDefaultAlignment(QualType Type) const {
-    return 0;
-  }
 };
 }
 
diff --git a/lib/Driver/CMakeLists.txt b/lib/Driver/CMakeLists.txt
index 412840b..fa0430e 100644
--- a/lib/Driver/CMakeLists.txt
+++ b/lib/Driver/CMakeLists.txt
@@ -10,6 +10,7 @@ add_clang_library(clangDriver
   Driver.cpp
   DriverOptions.cpp
   Job.cpp
+  MinGWToolChain.cpp
   Multilib.cpp
   MSVCToolChain.cpp
   Phases.cpp
diff --git a/lib/Driver/Compilation.cpp b/lib/Driver/Compilation.cpp
index 2bcbd5c..101d1fc 100644
--- a/lib/Driver/Compilation.cpp
+++ b/lib/Driver/Compilation.cpp
@@ -192,18 +192,14 @@ static bool InputsOk(const Command &C,
   return !ActionFailed(&C.getSource(), FailingCommands);
 }
 
-void Compilation::ExecuteJob(const Job &J,
-                             FailingCommandList &FailingCommands) const {
-  if (const Command *C = dyn_cast<Command>(&J)) {
-    if (!InputsOk(*C, FailingCommands))
-      return;
+void Compilation::ExecuteJobs(const JobList &Jobs,
+                              FailingCommandList &FailingCommands) const {
+  for (const auto &Job : Jobs) {
+    if (!InputsOk(Job, FailingCommands))
+      continue;
     const Command *FailingCommand = nullptr;
-    if (int Res = ExecuteCommand(*C, FailingCommand))
+    if (int Res = ExecuteCommand(Job, FailingCommand))
       FailingCommands.push_back(std::make_pair(Res, FailingCommand));
-  } else {
-    const JobList *Jobs = cast<JobList>(&J);
-    for (const auto &Job : *Jobs)
-      ExecuteJob(Job, FailingCommands);
   }
 }
 
diff --git a/lib/Driver/CrossWindowsToolChain.cpp b/lib/Driver/CrossWindowsToolChain.cpp
index 82456e7..ffb1469 100644
--- a/lib/Driver/CrossWindowsToolChain.cpp
+++ b/lib/Driver/CrossWindowsToolChain.cpp
@@ -108,10 +108,9 @@ AddCXXStdlibLibArgs(const llvm::opt::ArgList &DriverArgs,
 }
 
 Tool *CrossWindowsToolChain::buildLinker() const {
-  return new tools::CrossWindows::Link(*this);
+  return new tools::CrossWindows::Linker(*this);
 }
 
 Tool *CrossWindowsToolChain::buildAssembler() const {
-  return new tools::CrossWindows::Assemble(*this);
+  return new tools::CrossWindows::Assembler(*this);
 }
-
diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp
index 0f3ebef..b9dc35d 100644
--- a/lib/Driver/Driver.cpp
+++ b/lib/Driver/Driver.cpp
@@ -58,7 +58,7 @@ Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,
       CCCUsePCH(true), SuppressMissingInputWarning(false) {
 
   Name = llvm::sys::path::filename(ClangExecutable);
-  Dir  = llvm::sys::path::parent_path(ClangExecutable);
+  Dir = llvm::sys::path::parent_path(ClangExecutable);
 
   // Compute the path to the resource directory.
   StringRef ClangResourceDir(CLANG_RESOURCE_DIR);
@@ -81,23 +81,23 @@ Driver::~Driver() {
 
 void Driver::ParseDriverMode(ArrayRef<const char *> Args) {
   const std::string OptName =
-    getOpts().getOption(options::OPT_driver_mode).getPrefixedName();
+      getOpts().getOption(options::OPT_driver_mode).getPrefixedName();
 
-  for (size_t I = 0, E = Args.size(); I != E; ++I) {
+  for (const char *ArgPtr : Args) {
     // Ingore nullptrs, they are response file's EOL markers
-    if (Args[I] == nullptr)
+    if (ArgPtr == nullptr)
       continue;
-    const StringRef Arg = Args[I];
+    const StringRef Arg = ArgPtr;
     if (!Arg.startswith(OptName))
       continue;
 
     const StringRef Value = Arg.drop_front(OptName.size());
     const unsigned M = llvm::StringSwitch<unsigned>(Value)
-        .Case("gcc", GCCMode)
-        .Case("g++", GXXMode)
-        .Case("cpp", CPPMode)
-        .Case("cl",  CLMode)
-        .Default(~0U);
+                           .Case("gcc", GCCMode)
+                           .Case("g++", GXXMode)
+                           .Case("cpp", CPPMode)
+                           .Case("cl", CLMode)
+                           .Default(~0U);
 
     if (M != ~0U)
       Mode = static_cast<DriverMode>(M);
@@ -106,42 +106,39 @@ void Driver::ParseDriverMode(ArrayRef<const char *> Args) {
   }
 }
 
-InputArgList *Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings) {
+InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings) {
   llvm::PrettyStackTraceString CrashInfo("Command line argument parsing");
 
   unsigned IncludedFlagsBitmask;
   unsigned ExcludedFlagsBitmask;
   std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
-    getIncludeExcludeOptionFlagMasks();
+      getIncludeExcludeOptionFlagMasks();
 
   unsigned MissingArgIndex, MissingArgCount;
-  InputArgList *Args = getOpts().ParseArgs(ArgStrings.begin(), ArgStrings.end(),
-                                           MissingArgIndex, MissingArgCount,
-                                           IncludedFlagsBitmask,
-                                           ExcludedFlagsBitmask);
+  InputArgList Args =
+      getOpts().ParseArgs(ArgStrings, MissingArgIndex, MissingArgCount,
+                          IncludedFlagsBitmask, ExcludedFlagsBitmask);
 
   // Check for missing argument error.
   if (MissingArgCount)
     Diag(clang::diag::err_drv_missing_argument)
-      << Args->getArgString(MissingArgIndex) << MissingArgCount;
+        << Args.getArgString(MissingArgIndex) << MissingArgCount;
 
   // Check for unsupported options.
-  for (const Arg *A : *Args) {
+  for (const Arg *A : Args) {
     if (A->getOption().hasFlag(options::Unsupported)) {
-      Diag(clang::diag::err_drv_unsupported_opt) << A->getAsString(*Args);
+      Diag(clang::diag::err_drv_unsupported_opt) << A->getAsString(Args);
       continue;
     }
 
     // Warn about -mcpu= without an argument.
-    if (A->getOption().matches(options::OPT_mcpu_EQ) &&
-        A->containsValue("")) {
-      Diag(clang::diag::warn_drv_empty_joined_argument) <<
-        A->getAsString(*Args);
+    if (A->getOption().matches(options::OPT_mcpu_EQ) && A->containsValue("")) {
+      Diag(clang::diag::warn_drv_empty_joined_argument) << A->getAsString(Args);
     }
   }
 
-  for (const Arg *A : Args->filtered(options::OPT_UNKNOWN))
-    Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(*Args);
+  for (const Arg *A : Args.filtered(options::OPT_UNKNOWN))
+    Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args);
 
   return Args;
 }
@@ -149,14 +146,13 @@ InputArgList *Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings) {
 // Determine which compilation mode we are in. We look for options which
 // affect the phase, starting with the earliest phases, and record which
 // option we used to determine the final phase.
-phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, Arg **FinalPhaseArg)
-const {
+phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
+                                 Arg **FinalPhaseArg) const {
   Arg *PhaseArg = nullptr;
   phases::ID FinalPhase;
 
   // -{E,EP,P,M,MM} only run the preprocessor.
-  if (CCCIsCPP() ||
-      (PhaseArg = DAL.getLastArg(options::OPT_E)) ||
+  if (CCCIsCPP() || (PhaseArg = DAL.getLastArg(options::OPT_E)) ||
       (PhaseArg = DAL.getLastArg(options::OPT__SLASH_EP)) ||
       (PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM)) ||
       (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P))) {
@@ -192,7 +188,7 @@ const {
   return FinalPhase;
 }
 
-static Arg* MakeInputArg(DerivedArgList &Args, OptTable *Opts,
+static Arg *MakeInputArg(DerivedArgList &Args, OptTable *Opts,
                          StringRef Value) {
   Arg *A = new Arg(Opts->getOption(options::OPT_INPUT), Value,
                    Args.getBaseArgs().MakeIndex(Value), Value.data());
@@ -219,10 +215,9 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
       DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_Xlinker__no_demangle));
 
       // Add the remaining values as Xlinker arguments.
-      for (unsigned i = 0, e = A->getNumValues(); i != e; ++i)
-        if (StringRef(A->getValue(i)) != "--no-demangle")
-          DAL->AddSeparateArg(A, Opts->getOption(options::OPT_Xlinker),
-                              A->getValue(i));
+      for (const StringRef Val : A->getValues())
+        if (Val != "--no-demangle")
+          DAL->AddSeparateArg(A, Opts->getOption(options::OPT_Xlinker), Val);
 
       continue;
     }
@@ -250,15 +245,13 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
 
       // Rewrite unless -nostdlib is present.
       if (!HasNostdlib && Value == "stdc++") {
-        DAL->AddFlagArg(A, Opts->getOption(
-                              options::OPT_Z_reserved_lib_stdcxx));
+        DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_reserved_lib_stdcxx));
         continue;
       }
 
       // Rewrite unconditionally.
       if (Value == "cc_kext") {
-        DAL->AddFlagArg(A, Opts->getOption(
-                              options::OPT_Z_reserved_lib_cckext));
+        DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_reserved_lib_cckext));
         continue;
       }
     }
@@ -266,16 +259,16 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
     // Pick up inputs via the -- option.
     if (A->getOption().matches(options::OPT__DASH_DASH)) {
       A->claim();
-      for (unsigned i = 0, e = A->getNumValues(); i != e; ++i)
-        DAL->append(MakeInputArg(*DAL, Opts, A->getValue(i)));
+      for (const StringRef Val : A->getValues())
+        DAL->append(MakeInputArg(*DAL, Opts, Val));
       continue;
     }
 
     DAL->append(A);
   }
 
-  // Add a default value of -mlinker-version=, if one was given and the user
-  // didn't specify one.
+// Add a default value of -mlinker-version=, if one was given and the user
+// didn't specify one.
 #if defined(HOST_LINK_VERSION)
   if (!Args.hasArg(options::OPT_mlinker_version_EQ) &&
       strlen(HOST_LINK_VERSION) > 0) {
@@ -297,8 +290,8 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   if (char *env = ::getenv("COMPILER_PATH")) {
     StringRef CompilerPath = env;
     while (!CompilerPath.empty()) {
-      std::pair<StringRef, StringRef> Split
-        = CompilerPath.split(llvm::sys::EnvPathSeparator);
+      std::pair<StringRef, StringRef> Split =
+          CompilerPath.split(llvm::sys::EnvPathSeparator);
       PrefixDirs.push_back(Split.first);
       CompilerPath = Split.second;
     }
@@ -311,15 +304,15 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   // FIXME: What are we going to do with -V and -b?
 
   // FIXME: This stuff needs to go into the Compilation, not the driver.
-  bool CCCPrintActions;
+  bool CCCPrintPhases;
 
-  InputArgList *Args = ParseArgStrings(ArgList.slice(1));
+  InputArgList Args = ParseArgStrings(ArgList.slice(1));
 
   // -no-canonical-prefixes is used very early in main.
-  Args->ClaimAllArgs(options::OPT_no_canonical_prefixes);
+  Args.ClaimAllArgs(options::OPT_no_canonical_prefixes);
 
   // Ignore -pipe.
-  Args->ClaimAllArgs(options::OPT_pipe);
+  Args.ClaimAllArgs(options::OPT_pipe);
 
   // Extract -ccc args.
   //
@@ -327,12 +320,12 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   // should be outside in the client; the parts that aren't should have proper
   // options, either by introducing new ones or by overloading gcc ones like -V
   // or -b.
-  CCCPrintActions = Args->hasArg(options::OPT_ccc_print_phases);
-  CCCPrintBindings = Args->hasArg(options::OPT_ccc_print_bindings);
-  if (const Arg *A = Args->getLastArg(options::OPT_ccc_gcc_name))
+  CCCPrintPhases = Args.hasArg(options::OPT_ccc_print_phases);
+  CCCPrintBindings = Args.hasArg(options::OPT_ccc_print_bindings);
+  if (const Arg *A = Args.getLastArg(options::OPT_ccc_gcc_name))
     CCCGenericGCCName = A->getValue();
-  CCCUsePCH = Args->hasFlag(options::OPT_ccc_pch_is_pch,
-                            options::OPT_ccc_pch_is_pth);
+  CCCUsePCH =
+      Args.hasFlag(options::OPT_ccc_pch_is_pch, options::OPT_ccc_pch_is_pth);
   // FIXME: DefaultTargetTriple is used by the target-prefixed calls to as/ld
   // and getToolChain is const.
   if (IsCLMode()) {
@@ -342,39 +335,42 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
     T.setEnvironment(llvm::Triple::MSVC);
     DefaultTargetTriple = T.str();
   }
-  if (const Arg *A = Args->getLastArg(options::OPT_target))
+  if (const Arg *A = Args.getLastArg(options::OPT_target))
     DefaultTargetTriple = A->getValue();
-  if (const Arg *A = Args->getLastArg(options::OPT_ccc_install_dir))
+  if (const Arg *A = Args.getLastArg(options::OPT_ccc_install_dir))
     Dir = InstalledDir = A->getValue();
-  for (const Arg *A : Args->filtered(options::OPT_B)) {
+  for (const Arg *A : Args.filtered(options::OPT_B)) {
     A->claim();
     PrefixDirs.push_back(A->getValue(0));
   }
-  if (const Arg *A = Args->getLastArg(options::OPT__sysroot_EQ))
+  if (const Arg *A = Args.getLastArg(options::OPT__sysroot_EQ))
     SysRoot = A->getValue();
-  if (const Arg *A = Args->getLastArg(options::OPT__dyld_prefix_EQ))
+  if (const Arg *A = Args.getLastArg(options::OPT__dyld_prefix_EQ))
     DyldPrefix = A->getValue();
-  if (Args->hasArg(options::OPT_nostdlib))
+  if (Args.hasArg(options::OPT_nostdlib))
     UseStdLib = false;
 
-  if (const Arg *A = Args->getLastArg(options::OPT_resource_dir))
+  if (const Arg *A = Args.getLastArg(options::OPT_resource_dir))
     ResourceDir = A->getValue();
 
-  if (const Arg *A = Args->getLastArg(options::OPT_save_temps_EQ)) {
+  if (const Arg *A = Args.getLastArg(options::OPT_save_temps_EQ)) {
     SaveTemps = llvm::StringSwitch<SaveTempsMode>(A->getValue())
                     .Case("cwd", SaveTempsCwd)
                     .Case("obj", SaveTempsObj)
                     .Default(SaveTempsCwd);
   }
 
+  std::unique_ptr<llvm::opt::InputArgList> UArgs =
+      llvm::make_unique<InputArgList>(std::move(Args));
+
   // Perform the default argument translations.
-  DerivedArgList *TranslatedArgs = TranslateInputArgs(*Args);
+  DerivedArgList *TranslatedArgs = TranslateInputArgs(*UArgs);
 
   // Owned by the host.
-  const ToolChain &TC = getToolChain(*Args);
+  const ToolChain &TC = getToolChain(*UArgs);
 
   // The compilation takes ownership of Args.
-  Compilation *C = new Compilation(*this, TC, Args, TranslatedArgs);
+  Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs);
 
   if (!HandleImmediateArgs(*C))
     return C;
@@ -386,13 +382,13 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   // Construct the list of abstract actions to perform for this compilation. On
   // MachO targets this uses the driver-driver and universal actions.
   if (TC.getTriple().isOSBinFormatMachO())
-    BuildUniversalActions(C->getDefaultToolChain(), C->getArgs(),
-                          Inputs, C->getActions());
+    BuildUniversalActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
+                          C->getActions());
   else
     BuildActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
                  C->getActions());
 
-  if (CCCPrintActions) {
+  if (CCCPrintPhases) {
     PrintActions(*C);
     return C;
   }
@@ -419,8 +415,8 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
   PrintVersion(C, llvm::errs());
 
   Diag(clang::diag::note_drv_command_failed_diag_msg)
-    << "PLEASE submit a bug report to " BUG_REPORT_URL " and include the "
-    "crash backtrace, preprocessed source, and associated run script.";
+      << "PLEASE submit a bug report to " BUG_REPORT_URL " and include the "
+         "crash backtrace, preprocessed source, and associated run script.";
 
   // Suppress driver output and emit preprocessor output to temp file.
   Mode = CPPMode;
@@ -445,12 +441,12 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
 
     // Ignore input from stdin or any inputs that cannot be preprocessed.
     // Check type first as not all linker inputs have a value.
-   if (types::getPreprocessedType(it->first) == types::TY_INVALID) {
+    if (types::getPreprocessedType(it->first) == types::TY_INVALID) {
       IgnoreInput = true;
     } else if (!strcmp(it->second->getValue(), "-")) {
       Diag(clang::diag::note_drv_command_failed_diag_msg)
-        << "Error generating preprocessed source(s) - ignoring input from stdin"
-        ".";
+          << "Error generating preprocessed source(s) - "
+             "ignoring input from stdin.";
       IgnoreInput = true;
     }
 
@@ -464,7 +460,8 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
 
   if (Inputs.empty()) {
     Diag(clang::diag::note_drv_command_failed_diag_msg)
-      << "Error generating preprocessed source(s) - no preprocessable inputs.";
+        << "Error generating preprocessed source(s) - "
+           "no preprocessable inputs.";
     return;
   }
 
@@ -479,8 +476,8 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
   }
   if (ArchNames.size() > 1) {
     Diag(clang::diag::note_drv_command_failed_diag_msg)
-      << "Error generating preprocessed source(s) - cannot generate "
-      "preprocessed source with multiple -arch options.";
+        << "Error generating preprocessed source(s) - cannot generate "
+           "preprocessed source with multiple -arch options.";
     return;
   }
 
@@ -497,13 +494,13 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
   // If there were errors building the compilation, quit now.
   if (Trap.hasErrorOccurred()) {
     Diag(clang::diag::note_drv_command_failed_diag_msg)
-      << "Error generating preprocessed source(s).";
+        << "Error generating preprocessed source(s).";
     return;
   }
 
   // Generate preprocessed output.
   SmallVector<std::pair<int, const Command *>, 4> FailingCommands;
-  C.ExecuteJob(C.getJobs(), FailingCommands);
+  C.ExecuteJobs(C.getJobs(), FailingCommands);
 
   // If any of the preprocessing commands failed, clean up and exit.
   if (!FailingCommands.empty()) {
@@ -511,14 +508,14 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
       C.CleanupFileList(C.getTempFiles(), true);
 
     Diag(clang::diag::note_drv_command_failed_diag_msg)
-      << "Error generating preprocessed source(s).";
+        << "Error generating preprocessed source(s).";
     return;
   }
 
   const ArgStringList &TempFiles = C.getTempFiles();
   if (TempFiles.empty()) {
     Diag(clang::diag::note_drv_command_failed_diag_msg)
-      << "Error generating preprocessed source(s).";
+        << "Error generating preprocessed source(s).";
     return;
   }
 
@@ -563,31 +560,22 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
       << "\n\n********************";
 }
 
-void Driver::setUpResponseFiles(Compilation &C, Job &J) {
-  if (JobList *Jobs = dyn_cast<JobList>(&J)) {
-    for (auto &Job : *Jobs)
-      setUpResponseFiles(C, Job);
-    return;
-  }
-
-  Command *CurCommand = dyn_cast<Command>(&J);
-  if (!CurCommand)
-    return;
-
+void Driver::setUpResponseFiles(Compilation &C, Command &Cmd) {
   // Since argumentsFitWithinSystemLimits() may underestimate system's capacity
   // if the tool does not support response files, there is a chance/ that things
   // will just work without a response file, so we silently just skip it.
-  if (CurCommand->getCreator().getResponseFilesSupport() == Tool::RF_None ||
-      llvm::sys::argumentsFitWithinSystemLimits(CurCommand->getArguments()))
+  if (Cmd.getCreator().getResponseFilesSupport() == Tool::RF_None ||
+      llvm::sys::argumentsFitWithinSystemLimits(Cmd.getArguments()))
     return;
 
   std::string TmpName = GetTemporaryPath("response", "txt");
-  CurCommand->setResponseFile(C.addTempFile(C.getArgs().MakeArgString(
-      TmpName.c_str())));
+  Cmd.setResponseFile(
+      C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str())));
 }
 
-int Driver::ExecuteCompilation(Compilation &C,
-    SmallVectorImpl< std::pair<int, const Command *> > &FailingCommands) {
+int Driver::ExecuteCompilation(
+    Compilation &C,
+    SmallVectorImpl<std::pair<int, const Command *>> &FailingCommands) {
   // Just print if -### was present.
   if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) {
     C.getJobs().Print(llvm::errs(), "\n", true);
@@ -599,9 +587,10 @@ int Driver::ExecuteCompilation(Compilation &C,
     return 1;
 
   // Set up response file names for each command, if necessary
-  setUpResponseFiles(C, C.getJobs());
+  for (auto &Job : C.getJobs())
+    setUpResponseFiles(C, Job);
 
-  C.ExecuteJob(C.getJobs(), FailingCommands);
+  C.ExecuteJobs(C.getJobs(), FailingCommands);
 
   // Remove temp files.
   C.CleanupFileList(C.getTempFiles());
@@ -612,10 +601,9 @@ int Driver::ExecuteCompilation(Compilation &C,
 
   // Otherwise, remove result files and print extra information about abnormal
   // failures.
-  for (SmallVectorImpl< std::pair<int, const Command *> >::iterator it =
-         FailingCommands.begin(), ie = FailingCommands.end(); it != ie; ++it) {
-    int Res = it->first;
-    const Command *FailingCommand = it->second;
+  for (const auto &CmdPair : FailingCommands) {
+    int Res = CmdPair.first;
+    const Command *FailingCommand = CmdPair.second;
 
     // Remove result files if we're not saving temps.
     if (!isSaveTempsEnabled()) {
@@ -640,10 +628,10 @@ int Driver::ExecuteCompilation(Compilation &C,
       // FIXME: See FIXME above regarding result code interpretation.
       if (Res < 0)
         Diag(clang::diag::err_drv_command_signalled)
-          << FailingTool.getShortName();
+            << FailingTool.getShortName();
       else
-        Diag(clang::diag::err_drv_command_failed)
-          << FailingTool.getShortName() << Res;
+        Diag(clang::diag::err_drv_command_failed) << FailingTool.getShortName()
+                                                  << Res;
     }
   }
   return 0;
@@ -653,7 +641,7 @@ void Driver::PrintHelp(bool ShowHidden) const {
   unsigned IncludedFlagsBitmask;
   unsigned ExcludedFlagsBitmask;
   std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
-    getIncludeExcludeOptionFlagMasks();
+      getIncludeExcludeOptionFlagMasks();
 
   ExcludedFlagsBitmask |= options::NoDriverOption;
   if (!ShowHidden)
@@ -684,8 +672,8 @@ void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const {
 /// option.
 static void PrintDiagnosticCategories(raw_ostream &OS) {
   // Skip the empty category.
-  for (unsigned i = 1, max = DiagnosticIDs::getNumberOfCategories();
-       i != max; ++i)
+  for (unsigned i = 1, max = DiagnosticIDs::getNumberOfCategories(); i != max;
+       ++i)
     OS << i << ',' << DiagnosticIDs::getCategoryNameFromID(i) << '\n';
 }
 
@@ -739,25 +727,26 @@ bool Driver::HandleImmediateArgs(const Compilation &C) {
 
   if (C.getArgs().hasArg(options::OPT_print_search_dirs)) {
     llvm::outs() << "programs: =";
-    for (ToolChain::path_list::const_iterator it = TC.getProgramPaths().begin(),
-           ie = TC.getProgramPaths().end(); it != ie; ++it) {
-      if (it != TC.getProgramPaths().begin())
+    bool separator = false;
+    for (const std::string &Path : TC.getProgramPaths()) {
+      if (separator)
         llvm::outs() << ':';
-      llvm::outs() << *it;
+      llvm::outs() << Path;
+      separator = true;
     }
     llvm::outs() << "\n";
     llvm::outs() << "libraries: =" << ResourceDir;
 
     StringRef sysroot = C.getSysRoot();
 
-    for (ToolChain::path_list::const_iterator it = TC.getFilePaths().begin(),
-           ie = TC.getFilePaths().end(); it != ie; ++it) {
+    for (const std::string &Path : TC.getFilePaths()) {
+      // Always print a separator. ResourceDir was the first item shown.
       llvm::outs() << ':';
-      const char *path = it->c_str();
-      if (path[0] == '=')
-        llvm::outs() << sysroot << path + 1;
+      // Interpretation of leading '=' is needed only for NetBSD.
+      if (Path[0] == '=')
+        llvm::outs() << sysroot << Path.substr(1);
       else
-        llvm::outs() << path;
+        llvm::outs() << Path;
     }
     llvm::outs() << "\n";
     return false;
@@ -781,23 +770,17 @@ bool Driver::HandleImmediateArgs(const Compilation &C) {
   }
 
   if (C.getArgs().hasArg(options::OPT_print_multi_lib)) {
-    const MultilibSet &Multilibs = TC.getMultilibs();
-
-    for (MultilibSet::const_iterator I = Multilibs.begin(), E = Multilibs.end();
-         I != E; ++I) {
-      llvm::outs() << *I << "\n";
-    }
+    for (const Multilib &Multilib : TC.getMultilibs())
+      llvm::outs() << Multilib << "\n";
     return false;
   }
 
   if (C.getArgs().hasArg(options::OPT_print_multi_directory)) {
-    const MultilibSet &Multilibs = TC.getMultilibs();
-    for (MultilibSet::const_iterator I = Multilibs.begin(), E = Multilibs.end();
-         I != E; ++I) {
-      if (I->gccSuffix().empty())
+    for (const Multilib &Multilib : TC.getMultilibs()) {
+      if (Multilib.gccSuffix().empty())
         llvm::outs() << ".\n";
       else {
-        StringRef Suffix(I->gccSuffix());
+        StringRef Suffix(Multilib.gccSuffix());
         assert(Suffix.front() == '/');
         llvm::outs() << Suffix.substr(1) << "\n";
       }
@@ -819,7 +802,7 @@ bool Driver::HandleImmediateArgs(const Compilation &C) {
 // and latest-occuring action. Traversal is in pre-order, visiting the
 // inputs to each action before printing the action itself.
 static unsigned PrintActions1(const Compilation &C, Action *A,
-                              std::map<Action*, unsigned> &Ids) {
+                              std::map<Action *, unsigned> &Ids) {
   if (Ids.count(A)) // A was already visited.
     return Ids[A];
 
@@ -830,15 +813,13 @@ static unsigned PrintActions1(const Compilation &C, Action *A,
   if (InputAction *IA = dyn_cast<InputAction>(A)) {
     os << "\"" << IA->getInputArg().getValue() << "\"";
   } else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) {
-    os << '"' << BIA->getArchName() << '"'
-       << ", {" << PrintActions1(C, *BIA->begin(), Ids) << "}";
+    os << '"' << BIA->getArchName() << '"' << ", {"
+       << PrintActions1(C, *BIA->begin(), Ids) << "}";
   } else {
-    os << "{";
-    for (Action::iterator it = A->begin(), ie = A->end(); it != ie;) {
-      os << PrintActions1(C, *it, Ids);
-      ++it;
-      if (it != ie)
-        os << ", ";
+    const char *Prefix = "{";
+    for (Action *PreRequisite : *A) {
+      os << Prefix << PrintActions1(C, PreRequisite, Ids);
+      Prefix = ", ";
     }
     os << "}";
   }
@@ -854,17 +835,15 @@ static unsigned PrintActions1(const Compilation &C, Action *A,
 // Print the action graphs in a compilation C.
 // For example "clang -c file1.c file2.c" is composed of two subgraphs.
 void Driver::PrintActions(const Compilation &C) const {
-  std::map<Action*, unsigned> Ids;
-  for (ActionList::const_iterator it = C.getActions().begin(),
-         ie = C.getActions().end(); it != ie; ++it)
-    PrintActions1(C, *it, Ids);
+  std::map<Action *, unsigned> Ids;
+  for (Action *A : C.getActions())
+    PrintActions1(C, A, Ids);
 }
 
 /// \brief Check whether the given input tree contains any compilation or
 /// assembly actions.
 static bool ContainsCompileOrAssembleAction(const Action *A) {
-  if (isa<CompileJobAction>(A) ||
-      isa<BackendJobAction>(A) ||
+  if (isa<CompileJobAction>(A) || isa<BackendJobAction>(A) ||
       isa<AssembleJobAction>(A))
     return true;
 
@@ -875,8 +854,7 @@ static bool ContainsCompileOrAssembleAction(const Action *A) {
   return false;
 }
 
-void Driver::BuildUniversalActions(const ToolChain &TC,
-                                   DerivedArgList &Args,
+void Driver::BuildUniversalActions(const ToolChain &TC, DerivedArgList &Args,
                                    const InputList &BAInputs,
                                    ActionList &Actions) const {
   llvm::PrettyStackTraceString CrashInfo("Building universal build actions");
@@ -889,10 +867,9 @@ void Driver::BuildUniversalActions(const ToolChain &TC,
       // Validate the option here; we don't save the type here because its
       // particular spelling may participate in other driver choices.
       llvm::Triple::ArchType Arch =
-        tools::darwin::getArchTypeForMachOArchName(A->getValue());
+          tools::darwin::getArchTypeForMachOArchName(A->getValue());
       if (Arch == llvm::Triple::UnknownArch) {
-        Diag(clang::diag::err_drv_invalid_arch_name)
-          << A->getAsString(Args);
+        Diag(clang::diag::err_drv_invalid_arch_name) << A->getAsString(Args);
         continue;
       }
 
@@ -923,7 +900,7 @@ void Driver::BuildUniversalActions(const ToolChain &TC,
 
     if (Archs.size() > 1 && !types::canLipoType(Act->getType()))
       Diag(clang::diag::err_drv_invalid_output_with_multiple_archs)
-        << types::getTypeName(Act->getType());
+          << types::getTypeName(Act->getType());
 
     ActionList Inputs;
     for (unsigned i = 0, e = Archs.size(); i != e; ++i) {
@@ -1013,10 +990,11 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
                                          options::OPT__SLASH_TP)) {
     InputTypeArg = TCTP;
     InputType = TCTP->getOption().matches(options::OPT__SLASH_TC)
-        ? types::TY_C : types::TY_CXX;
+                    ? types::TY_C
+                    : types::TY_CXX;
 
-    arg_iterator it = Args.filtered_begin(options::OPT__SLASH_TC,
-                                          options::OPT__SLASH_TP);
+    arg_iterator it =
+        Args.filtered_begin(options::OPT__SLASH_TC, options::OPT__SLASH_TP);
     const arg_iterator ie = Args.filtered_end();
     Arg *Previous = *it++;
     bool ShowNote = false;
@@ -1078,7 +1056,7 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
 
             if (Ty != OldTy)
               Diag(clang::diag::warn_drv_treating_input_as_cxx)
-                << getTypeName(OldTy) << getTypeName(Ty);
+                  << getTypeName(OldTy) << getTypeName(Ty);
           }
         }
 
@@ -1180,7 +1158,7 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
         !llvm::sys::path::is_separator(V.back())) {
       // Check whether /Fo tries to name an output file for multiple inputs.
       Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
-        << A->getSpelling() << V;
+          << A->getSpelling() << V;
       Args.eraseArg(options::OPT__SLASH_Fo);
     }
   }
@@ -1192,7 +1170,7 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
         !llvm::sys::path::is_separator(V.back())) {
       // Check whether /Fa tries to name an asm file for multiple inputs.
       Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources)
-        << A->getSpelling() << V;
+          << A->getSpelling() << V;
       Args.eraseArg(options::OPT__SLASH_Fa);
     }
   }
@@ -1232,30 +1210,27 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
       // by a command-line argument with a corresponding Arg.
       if (CCCIsCPP())
         Diag(clang::diag::warn_drv_input_file_unused_by_cpp)
-          << InputArg->getAsString(Args)
-          << getPhaseName(InitialPhase);
+            << InputArg->getAsString(Args) << getPhaseName(InitialPhase);
       // Special case '-E' warning on a previously preprocessed file to make
       // more sense.
       else if (InitialPhase == phases::Compile &&
                FinalPhase == phases::Preprocess &&
                getPreprocessedType(InputType) == types::TY_INVALID)
         Diag(clang::diag::warn_drv_preprocessed_input_file_unused)
-          << InputArg->getAsString(Args)
-          << !!FinalPhaseArg
-          << (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
+            << InputArg->getAsString(Args) << !!FinalPhaseArg
+            << (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
       else
         Diag(clang::diag::warn_drv_input_file_unused)
-          << InputArg->getAsString(Args)
-          << getPhaseName(InitialPhase)
-          << !!FinalPhaseArg
-          << (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
+            << InputArg->getAsString(Args) << getPhaseName(InitialPhase)
+            << !!FinalPhaseArg
+            << (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : "");
       continue;
     }
 
     // Build the pipeline for this file.
     std::unique_ptr<Action> Current(new InputAction(*InputArg, InputType));
-    for (SmallVectorImpl<phases::ID>::iterator
-           i = PL.begin(), e = PL.end(); i != e; ++i) {
+    for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end();
+         i != e; ++i) {
       phases::ID Phase = *i;
 
       // We are done if this step is past what the user requested.
@@ -1308,7 +1283,8 @@ Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args,
   llvm::PrettyStackTraceString CrashInfo("Constructing phase actions");
   // Build the appropriate action.
   switch (Phase) {
-  case phases::Link: llvm_unreachable("link action invalid here.");
+  case phases::Link:
+    llvm_unreachable("link action invalid here.");
   case phases::Preprocess: {
     types::ID OutputTy;
     // -{M, MM} alter the output type.
@@ -1364,12 +1340,12 @@ Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args,
   case phases::Backend: {
     if (IsUsingLTO(Args)) {
       types::ID Output =
-        Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
+          Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
       return llvm::make_unique<BackendJobAction>(std::move(Input), Output);
     }
     if (Args.hasArg(options::OPT_emit_llvm)) {
       types::ID Output =
-        Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC;
+          Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC;
       return llvm::make_unique<BackendJobAction>(std::move(Input), Output);
     }
     return llvm::make_unique<BackendJobAction>(std::move(Input),
@@ -1430,11 +1406,10 @@ void Driver::BuildJobs(Compilation &C) const {
 
     InputInfo II;
     BuildJobsForAction(C, A, &C.getDefaultToolChain(),
-                       /*BoundArch*/nullptr,
+                       /*BoundArch*/ nullptr,
                        /*AtTopLevel*/ true,
                        /*MultipleArchs*/ ArchNames.size() > 1,
-                       /*LinkingOutput*/ LinkingOutput,
-                       II);
+                       /*LinkingOutput*/ LinkingOutput, II);
   }
 
   // If the user passed -Qunused-arguments or there were errors, don't warn
@@ -1444,10 +1419,10 @@ void Driver::BuildJobs(Compilation &C) const {
     return;
 
   // Claim -### here.
-  (void) C.getArgs().hasArg(options::OPT__HASH_HASH_HASH);
+  (void)C.getArgs().hasArg(options::OPT__HASH_HASH_HASH);
 
   // Claim --driver-mode, it was handled earlier.
-  (void) C.getArgs().hasArg(options::OPT_driver_mode);
+  (void)C.getArgs().hasArg(options::OPT_driver_mode);
 
   for (Arg *A : C.getArgs()) {
     // FIXME: It would be nice to be able to send the argument to the
@@ -1475,7 +1450,7 @@ void Driver::BuildJobs(Compilation &C) const {
       }
 
       Diag(clang::diag::warn_drv_unused_argument)
-        << A->getAsString(C.getArgs());
+          << A->getAsString(C.getArgs());
     }
   }
 }
@@ -1489,13 +1464,12 @@ static const Tool *SelectToolForJob(Compilation &C, bool SaveTemps,
   // bottom up, so what we are actually looking for is an assembler job with a
   // compiler input.
 
-  if (TC->useIntegratedAs() &&
-      !SaveTemps &&
+  if (TC->useIntegratedAs() && !SaveTemps &&
       !C.getArgs().hasArg(options::OPT_via_file_asm) &&
       !C.getArgs().hasArg(options::OPT__SLASH_FA) &&
       !C.getArgs().hasArg(options::OPT__SLASH_Fa) &&
-      isa<AssembleJobAction>(JA) &&
-      Inputs->size() == 1 && isa<BackendJobAction>(*Inputs->begin())) {
+      isa<AssembleJobAction>(JA) && Inputs->size() == 1 &&
+      isa<BackendJobAction>(*Inputs->begin())) {
     // A BackendJob is always preceded by a CompileJob, and without
     // -save-temps they will always get combined together, so instead of
     // checking the backend tool, check if the tool for the CompileJob
@@ -1536,8 +1510,7 @@ static const Tool *SelectToolForJob(Compilation &C, bool SaveTemps,
   // (irrelevant since we don't support combine yet).
   if (Inputs->size() == 1 && isa<PreprocessJobAction>(*Inputs->begin()) &&
       !C.getArgs().hasArg(options::OPT_no_integrated_cpp) &&
-      !C.getArgs().hasArg(options::OPT_traditional_cpp) &&
-      !SaveTemps &&
+      !C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps &&
       !C.getArgs().hasArg(options::OPT_rewrite_objc) &&
       ToolForJob->hasIntegratedCPP())
     Inputs = &(*Inputs)[0]->getInputs();
@@ -1545,12 +1518,9 @@ static const Tool *SelectToolForJob(Compilation &C, bool SaveTemps,
   return ToolForJob;
 }
 
-void Driver::BuildJobsForAction(Compilation &C,
-                                const Action *A,
-                                const ToolChain *TC,
-                                const char *BoundArch,
-                                bool AtTopLevel,
-                                bool MultipleArchs,
+void Driver::BuildJobsForAction(Compilation &C, const Action *A,
+                                const ToolChain *TC, const char *BoundArch,
+                                bool AtTopLevel, bool MultipleArchs,
                                 const char *LinkingOutput,
                                 InputInfo &Result) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
@@ -1578,8 +1548,8 @@ void Driver::BuildJobsForAction(Compilation &C,
     else
       TC = &C.getDefaultToolChain();
 
-    BuildJobsForAction(C, *BAA->begin(), TC, BAA->getArchName(),
-                       AtTopLevel, MultipleArchs, LinkingOutput, Result);
+    BuildJobsForAction(C, *BAA->begin(), TC, BAA->getArchName(), AtTopLevel,
+                       MultipleArchs, LinkingOutput, Result);
     return;
   }
 
@@ -1647,7 +1617,8 @@ const char *Driver::getDefaultImageName() const {
 /// does not provide a filename, then use BaseName, and use the extension
 /// suitable for FileType.
 static const char *MakeCLOutputFilename(const ArgList &Args, StringRef ArgValue,
-                                        StringRef BaseName, types::ID FileType) {
+                                        StringRef BaseName,
+                                        types::ID FileType) {
   SmallString<128> Filename = ArgValue;
 
   if (ArgValue.empty()) {
@@ -1674,16 +1645,13 @@ static const char *MakeCLOutputFilename(const ArgList &Args, StringRef ArgValue,
   return Args.MakeArgString(Filename.c_str());
 }
 
-const char *Driver::GetNamedOutputPath(Compilation &C,
-                                       const JobAction &JA,
+const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
                                        const char *BaseInput,
-                                       const char *BoundArch,
-                                       bool AtTopLevel,
+                                       const char *BoundArch, bool AtTopLevel,
                                        bool MultipleArchs) const {
   llvm::PrettyStackTraceString CrashInfo("Computing output path");
   // Output to a user requested destination?
-  if (AtTopLevel && !isa<DsymutilJobAction>(JA) &&
-      !isa<VerifyJobAction>(JA)) {
+  if (AtTopLevel && !isa<DsymutilJobAction>(JA) && !isa<VerifyJobAction>(JA)) {
     if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
       return C.addResultFile(FinalOutput->getValue(), &JA);
   }
@@ -1695,8 +1663,9 @@ const char *Driver::GetNamedOutputPath(Compilation &C,
     StringRef NameArg;
     if (Arg *A = C.getArgs().getLastArg(options::OPT__SLASH_Fi))
       NameArg = A->getValue();
-    return C.addResultFile(MakeCLOutputFilename(C.getArgs(), NameArg, BaseName,
-                                                types::TY_PP_C), &JA);
+    return C.addResultFile(
+        MakeCLOutputFilename(C.getArgs(), NameArg, BaseName, types::TY_PP_C),
+        &JA);
   }
 
   // Default to writing to stdout?
@@ -1711,19 +1680,19 @@ const char *Driver::GetNamedOutputPath(Compilation &C,
     // Use /Fa and the input filename to determine the asm file name.
     StringRef BaseName = llvm::sys::path::filename(BaseInput);
     StringRef FaValue = C.getArgs().getLastArgValue(options::OPT__SLASH_Fa);
-    return C.addResultFile(MakeCLOutputFilename(C.getArgs(), FaValue, BaseName,
-                                                JA.getType()), &JA);
+    return C.addResultFile(
+        MakeCLOutputFilename(C.getArgs(), FaValue, BaseName, JA.getType()),
+        &JA);
   }
 
   // Output to a temporary file?
   if ((!AtTopLevel && !isSaveTempsEnabled() &&
-        !C.getArgs().hasArg(options::OPT__SLASH_Fo)) ||
+       !C.getArgs().hasArg(options::OPT__SLASH_Fo)) ||
       CCGenDiagnostics) {
     StringRef Name = llvm::sys::path::filename(BaseInput);
     std::pair<StringRef, StringRef> Split = Name.split('.');
-    std::string TmpName =
-      GetTemporaryPath(Split.first,
-          types::getTypeTempSuffix(JA.getType(), IsCLMode()));
+    std::string TmpName = GetTemporaryPath(
+        Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode()));
     return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str()));
   }
 
@@ -1742,22 +1711,27 @@ const char *Driver::GetNamedOutputPath(Compilation &C,
   if (JA.getType() == types::TY_Object &&
       C.getArgs().hasArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)) {
     // The /Fo or /o flag decides the object filename.
-    StringRef Val = C.getArgs().getLastArg(options::OPT__SLASH_Fo,
-                                           options::OPT__SLASH_o)->getValue();
-    NamedOutput = MakeCLOutputFilename(C.getArgs(), Val, BaseName,
-                                       types::TY_Object);
+    StringRef Val =
+        C.getArgs()
+            .getLastArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)
+            ->getValue();
+    NamedOutput =
+        MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Object);
   } else if (JA.getType() == types::TY_Image &&
-             C.getArgs().hasArg(options::OPT__SLASH_Fe, options::OPT__SLASH_o)) {
+             C.getArgs().hasArg(options::OPT__SLASH_Fe,
+                                options::OPT__SLASH_o)) {
     // The /Fe or /o flag names the linked file.
-    StringRef Val = C.getArgs().getLastArg(options::OPT__SLASH_Fe,
-                                           options::OPT__SLASH_o)->getValue();
-    NamedOutput = MakeCLOutputFilename(C.getArgs(), Val, BaseName,
-                                       types::TY_Image);
+    StringRef Val =
+        C.getArgs()
+            .getLastArg(options::OPT__SLASH_Fe, options::OPT__SLASH_o)
+            ->getValue();
+    NamedOutput =
+        MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Image);
   } else if (JA.getType() == types::TY_Image) {
     if (IsCLMode()) {
       // clang-cl uses BaseName for the executable name.
-      NamedOutput = MakeCLOutputFilename(C.getArgs(), "", BaseName,
-                                         types::TY_Image);
+      NamedOutput =
+          MakeCLOutputFilename(C.getArgs(), "", BaseName, types::TY_Image);
     } else if (MultipleArchs && BoundArch) {
       SmallString<128> Output(getDefaultImageName());
       Output += "-";
@@ -1811,9 +1785,8 @@ const char *Driver::GetNamedOutputPath(Compilation &C,
     if (SameFile) {
       StringRef Name = llvm::sys::path::filename(BaseInput);
       std::pair<StringRef, StringRef> Split = Name.split('.');
-      std::string TmpName =
-        GetTemporaryPath(Split.first,
-            types::getTypeTempSuffix(JA.getType(), IsCLMode()));
+      std::string TmpName = GetTemporaryPath(
+          Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode()));
       return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str()));
     }
   }
@@ -1834,14 +1807,10 @@ const char *Driver::GetNamedOutputPath(Compilation &C,
 std::string Driver::GetFilePath(const char *Name, const ToolChain &TC) const {
   // Respect a limited subset of the '-Bprefix' functionality in GCC by
   // attempting to use this prefix when looking for file paths.
-  for (Driver::prefix_list::const_iterator it = PrefixDirs.begin(),
-       ie = PrefixDirs.end(); it != ie; ++it) {
-    std::string Dir(*it);
+  for (const std::string &Dir : PrefixDirs) {
     if (Dir.empty())
       continue;
-    if (Dir[0] == '=')
-      Dir = SysRoot + Dir.substr(1);
-    SmallString<128> P(Dir);
+    SmallString<128> P(Dir[0] == '=' ? SysRoot + Dir.substr(1) : Dir);
     llvm::sys::path::append(P, Name);
     if (llvm::sys::fs::exists(Twine(P)))
       return P.str();
@@ -1852,15 +1821,10 @@ std::string Driver::GetFilePath(const char *Name, const ToolChain &TC) const {
   if (llvm::sys::fs::exists(Twine(P)))
     return P.str();
 
-  const ToolChain::path_list &List = TC.getFilePaths();
-  for (ToolChain::path_list::const_iterator
-         it = List.begin(), ie = List.end(); it != ie; ++it) {
-    std::string Dir(*it);
+  for (const std::string &Dir : TC.getFilePaths()) {
     if (Dir.empty())
       continue;
-    if (Dir[0] == '=')
-      Dir = SysRoot + Dir.substr(1);
-    SmallString<128> P(Dir);
+    SmallString<128> P(Dir[0] == '=' ? SysRoot + Dir.substr(1) : Dir);
     llvm::sys::path::append(P, Name);
     if (llvm::sys::fs::exists(Twine(P)))
       return P.str();
@@ -1869,9 +1833,9 @@ std::string Driver::GetFilePath(const char *Name, const ToolChain &TC) const {
   return Name;
 }
 
-void
-Driver::generatePrefixedToolNames(const char *Tool, const ToolChain &TC,
-                                  SmallVectorImpl<std::string> &Names) const {
+void Driver::generatePrefixedToolNames(
+    const char *Tool, const ToolChain &TC,
+    SmallVectorImpl<std::string> &Names) const {
   // FIXME: Needs a better variable than DefaultTargetTriple
   Names.emplace_back(DefaultTargetTriple + "-" + Tool);
   Names.emplace_back(Tool);
@@ -1923,8 +1887,8 @@ std::string Driver::GetProgramPath(const char *Name,
   return Name;
 }
 
-std::string Driver::GetTemporaryPath(StringRef Prefix, const char *Suffix)
-  const {
+std::string Driver::GetTemporaryPath(StringRef Prefix,
+                                     const char *Suffix) const {
   SmallString<128> Path;
   std::error_code EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, Path);
   if (EC) {
@@ -1998,7 +1962,7 @@ static llvm::Triple computeTargetTriple(StringRef DefaultTargetTriple,
       if (Target.getEnvironment() == llvm::Triple::GNUX32)
         Target.setEnvironment(llvm::Triple::GNU);
     } else if (A->getOption().matches(options::OPT_mx32) &&
-             Target.get64BitArchVariant().getArch() == llvm::Triple::x86_64) {
+               Target.get64BitArchVariant().getArch() == llvm::Triple::x86_64) {
       AT = llvm::Triple::x86_64;
       Target.setEnvironment(llvm::Triple::GNUX32);
     } else if (A->getOption().matches(options::OPT_m32)) {
@@ -2006,7 +1970,7 @@ static llvm::Triple computeTargetTriple(StringRef DefaultTargetTriple,
       if (Target.getEnvironment() == llvm::Triple::GNUX32)
         Target.setEnvironment(llvm::Triple::GNU);
     } else if (A->getOption().matches(options::OPT_m16) &&
-             Target.get32BitArchVariant().getArch() == llvm::Triple::x86) {
+               Target.get32BitArchVariant().getArch() == llvm::Triple::x86) {
       AT = llvm::Triple::x86;
       Target.setEnvironment(llvm::Triple::CODE16);
     }
@@ -2075,12 +2039,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
           TC = new toolchains::Generic_GCC(*this, Target, Args);
         break;
       case llvm::Triple::GNU:
-        // FIXME: We need a MinGW toolchain.  Use the default Generic_GCC
-        // toolchain for now as the default case would below otherwise.
-        if (Target.isOSBinFormatELF())
-          TC = new toolchains::Generic_ELF(*this, Target, Args);
-        else
-          TC = new toolchains::Generic_GCC(*this, Target, Args);
+        TC = new toolchains::MinGW(*this, Target, Args);
         break;
       case llvm::Triple::Itanium:
         TC = new toolchains::CrossWindowsToolChain(*this, Target, Args);
@@ -2116,8 +2075,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
 
 bool Driver::ShouldUseClangCompiler(const JobAction &JA) const {
   // Say "no" if there is not exactly one input of a type clang understands.
-  if (JA.size() != 1 ||
-      !types::isAcceptedByClang((*JA.begin())->getType()))
+  if (JA.size() != 1 || !types::isAcceptedByClang((*JA.begin())->getType()))
     return false;
 
   // And say "no" if this is not a kind of action clang understands.
@@ -2143,21 +2101,21 @@ bool Driver::GetReleaseVersion(const char *Str, unsigned &Major,
     return false;
 
   char *End;
-  Major = (unsigned) strtol(Str, &End, 10);
+  Major = (unsigned)strtol(Str, &End, 10);
   if (*Str != '\0' && *End == '\0')
     return true;
   if (*End != '.')
     return false;
 
-  Str = End+1;
-  Minor = (unsigned) strtol(Str, &End, 10);
+  Str = End + 1;
+  Minor = (unsigned)strtol(Str, &End, 10);
   if (*Str != '\0' && *End == '\0')
     return true;
   if (*End != '.')
     return false;
 
-  Str = End+1;
-  Micro = (unsigned) strtol(Str, &End, 10);
+  Str = End + 1;
+  Micro = (unsigned)strtol(Str, &End, 10);
   if (*Str != '\0' && *End == '\0')
     return true;
   if (Str == End)
diff --git a/lib/Driver/Job.cpp b/lib/Driver/Job.cpp
index 6d18a41..ac18e1e 100644
--- a/lib/Driver/Job.cpp
+++ b/lib/Driver/Job.cpp
@@ -25,14 +25,10 @@ using llvm::raw_ostream;
 using llvm::StringRef;
 using llvm::ArrayRef;
 
-Job::~Job() {}
-
-Command::Command(const Action &_Source, const Tool &_Creator,
-                 const char *_Executable,
-                 const ArgStringList &_Arguments)
-    : Job(CommandClass), Source(_Source), Creator(_Creator),
-      Executable(_Executable), Arguments(_Arguments),
-      ResponseFile(nullptr) {}
+Command::Command(const Action &Source, const Tool &Creator,
+                 const char *Executable, const ArgStringList &Arguments)
+    : Source(Source), Creator(Creator), Executable(Executable),
+      Arguments(Arguments), ResponseFile(nullptr) {}
 
 static int skipArgs(const char *Flag, bool HaveCrashVFS) {
   // These flags are all of the form -Flag <Arg> and are treated as two
@@ -295,8 +291,6 @@ int FallbackCommand::Execute(const StringRef **Redirects, std::string *ErrMsg,
   return SecondaryStatus;
 }
 
-JobList::JobList() : Job(JobListClass) {}
-
 void JobList::Print(raw_ostream &OS, const char *Terminator, bool Quote,
                     CrashReportInfo *CrashInfo) const {
   for (const auto &Job : *this)
diff --git a/lib/Driver/MSVCToolChain.cpp b/lib/Driver/MSVCToolChain.cpp
index 7216121..f20f58a 100644
--- a/lib/Driver/MSVCToolChain.cpp
+++ b/lib/Driver/MSVCToolChain.cpp
@@ -53,12 +53,12 @@ MSVCToolChain::MSVCToolChain(const Driver &D, const llvm::Triple& Triple,
 }
 
 Tool *MSVCToolChain::buildLinker() const {
-  return new tools::visualstudio::Link(*this);
+  return new tools::visualstudio::Linker(*this);
 }
 
 Tool *MSVCToolChain::buildAssembler() const {
   if (getTriple().isOSBinFormatMachO())
-    return new tools::darwin::Assemble(*this);
+    return new tools::darwin::Assembler(*this);
   getDriver().Diag(clang::diag::err_no_external_assembler);
   return nullptr;
 }
diff --git a/lib/Driver/MinGWToolChain.cpp b/lib/Driver/MinGWToolChain.cpp
new file mode 100644
index 0000000..606508d
--- /dev/null
+++ b/lib/Driver/MinGWToolChain.cpp
@@ -0,0 +1,143 @@
+//===--- MinGWToolChain.cpp - MinGWToolChain Implementation
+//-----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ToolChains.h"
+#include "clang/Driver/Driver.h"
+#include "clang/Driver/Options.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+
+using namespace clang::diag;
+using namespace clang::driver;
+using namespace clang::driver::toolchains;
+using namespace clang;
+using namespace llvm::opt;
+
+MinGW::MinGW(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
+    : ToolChain(D, Triple, Args) {
+  getProgramPaths().push_back(getDriver().getInstalledDir());
+
+  if (getDriver().SysRoot.size())
+    Base = getDriver().SysRoot;
+  else if (llvm::ErrorOr<std::string> GPPName =
+               llvm::sys::findProgramByName("gcc"))
+    Base = llvm::sys::path::parent_path(
+        llvm::sys::path::parent_path(GPPName.get()));
+  else
+    Base = llvm::sys::path::parent_path(getDriver().getInstalledDir());
+  Base += llvm::sys::path::get_separator();
+  llvm::SmallString<1024> LibDir(Base);
+  llvm::sys::path::append(LibDir, "lib", "gcc");
+  LibDir += llvm::sys::path::get_separator();
+
+  // First look for mingw-w64.
+  Arch = getTriple().getArchName();
+  Arch += "-w64-mingw32";
+  std::error_code EC;
+  llvm::sys::fs::directory_iterator MingW64Entry(LibDir + Arch, EC);
+  if (!EC) {
+    GccLibDir = MingW64Entry->path();
+  } else {
+    // If mingw-w64 not found, try looking for mingw.org.
+    Arch = "mingw32";
+    llvm::sys::fs::directory_iterator MingwOrgEntry(LibDir + Arch, EC);
+    if (!EC)
+      GccLibDir = MingwOrgEntry->path();
+  }
+  Arch += llvm::sys::path::get_separator();
+  // GccLibDir must precede Base/lib so that the
+  // correct crtbegin.o ,cetend.o would be found.
+  getFilePaths().push_back(GccLibDir);
+  getFilePaths().push_back(Base + "lib");
+  getFilePaths().push_back(Base + Arch + "lib");
+}
+
+bool MinGW::IsIntegratedAssemblerDefault() const { return true; }
+
+Tool *MinGW::getTool(Action::ActionClass AC) const {
+  switch (AC) {
+  case Action::PreprocessJobClass:
+    if (!Preprocessor)
+      Preprocessor.reset(new tools::gcc::Preprocessor(*this));
+    return Preprocessor.get();
+  case Action::CompileJobClass:
+    if (!Compiler)
+      Compiler.reset(new tools::gcc::Compiler(*this));
+    return Compiler.get();
+  default:
+    return ToolChain::getTool(AC);
+  }
+}
+
+Tool *MinGW::buildAssembler() const {
+  return new tools::MinGW::Assembler(*this);
+}
+
+Tool *MinGW::buildLinker() const { return new tools::MinGW::Linker(*this); }
+
+bool MinGW::IsUnwindTablesDefault() const {
+  return getArch() == llvm::Triple::x86_64;
+}
+
+bool MinGW::isPICDefault() const { return getArch() == llvm::Triple::x86_64; }
+
+bool MinGW::isPIEDefault() const { return false; }
+
+bool MinGW::isPICDefaultForced() const {
+  return getArch() == llvm::Triple::x86_64;
+}
+
+bool MinGW::UseSEHExceptions() const {
+  return getArch() == llvm::Triple::x86_64;
+}
+
+void MinGW::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
+                                      ArgStringList &CC1Args) const {
+  if (DriverArgs.hasArg(options::OPT_nostdinc))
+    return;
+
+  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
+    SmallString<1024> P(getDriver().ResourceDir);
+    llvm::sys::path::append(P, "include");
+    addSystemInclude(DriverArgs, CC1Args, P.str());
+  }
+
+  if (DriverArgs.hasArg(options::OPT_nostdlibinc))
+    return;
+
+  llvm::SmallString<1024> IncludeDir(GccLibDir);
+  llvm::sys::path::append(IncludeDir, "include");
+  addSystemInclude(DriverArgs, CC1Args, IncludeDir.c_str());
+  IncludeDir += "-fixed";
+  addSystemInclude(DriverArgs, CC1Args, IncludeDir.c_str());
+  addSystemInclude(DriverArgs, CC1Args, Base + Arch + "include");
+  addSystemInclude(DriverArgs, CC1Args, Base + "include");
+}
+
+void MinGW::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
+                                         ArgStringList &CC1Args) const {
+  if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
+      DriverArgs.hasArg(options::OPT_nostdincxx))
+    return;
+
+  llvm::SmallString<1024> IncludeDir;
+  for (bool MingW64 : {true, false}) {
+    if (MingW64)
+      IncludeDir = Base + Arch;
+    else
+      IncludeDir = GccLibDir;
+    llvm::sys::path::append(IncludeDir, "include", "c++");
+    addSystemInclude(DriverArgs, CC1Args, IncludeDir.str());
+    IncludeDir += llvm::sys::path::get_separator();
+    addSystemInclude(DriverArgs, CC1Args, IncludeDir.str() + Arch);
+    addSystemInclude(DriverArgs, CC1Args, IncludeDir.str() + "backward");
+  }
+}
diff --git a/lib/Driver/SanitizerArgs.cpp b/lib/Driver/SanitizerArgs.cpp
index 14c3702..3043481 100644
--- a/lib/Driver/SanitizerArgs.cpp
+++ b/lib/Driver/SanitizerArgs.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "clang/Driver/SanitizerArgs.h"
+#include "Tools.h"
 #include "clang/Basic/Sanitizers.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
@@ -26,6 +27,7 @@ using namespace llvm::opt;
 
 enum : SanitizerMask {
   NeedsUbsanRt = Undefined | Integer | CFI,
+  NeedsUbsanCxxRt = Vptr | CFI,
   NotAllowedWithTrap = Vptr,
   RequiresPIE = Memory | DataFlow,
   NeedsUnwindTables = Address | Thread | Memory | DataFlow,
@@ -194,7 +196,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
   SanitizerMask DiagnosedKinds = 0;  // All Kinds we have diagnosed up to now.
                                      // Used to deduplicate diagnostics.
   SanitizerMask Kinds = 0;
-  SanitizerMask Supported = setGroupBits(TC.getSupportedSanitizers());
+  const SanitizerMask Supported = setGroupBits(TC.getSupportedSanitizers());
   ToolChain::RTTIMode RTTIMode = TC.getRTTIMode();
 
   const Driver &D = TC.getDriver();
@@ -282,6 +284,21 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
         << lastArgumentForMask(D, Args, Kinds & NeedsLTO) << "-flto";
   }
 
+  // Report error if there are non-trapping sanitizers that require
+  // c++abi-specific  parts of UBSan runtime, and they are not provided by the
+  // toolchain. We don't have a good way to check the latter, so we just
+  // check if the toolchan supports vptr.
+  if (~Supported & Vptr) {
+    if (SanitizerMask KindsToDiagnose =
+            Kinds & ~TrappingKinds & NeedsUbsanCxxRt) {
+      SanitizerSet S;
+      S.Mask = KindsToDiagnose;
+      D.Diag(diag::err_drv_unsupported_opt_for_target)
+          << ("-fno-sanitize-trap=" + toString(S)) << TC.getTriple().str();
+      Kinds &= ~KindsToDiagnose;
+    }
+  }
+
   // Warn about incompatible groups of sanitizers.
   std::pair<SanitizerMask, SanitizerMask> IncompatibleGroups[] = {
       std::make_pair(Address, Thread), std::make_pair(Address, Memory),
@@ -517,8 +534,9 @@ static std::string toString(const clang::SanitizerSet &Sanitizers) {
   return Res;
 }
 
-void SanitizerArgs::addArgs(const llvm::opt::ArgList &Args,
-                            llvm::opt::ArgStringList &CmdArgs) const {
+void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
+                            llvm::opt::ArgStringList &CmdArgs,
+                            types::ID InputType) const {
   if (Sanitizers.empty())
     return;
   CmdArgs.push_back(Args.MakeArgString("-fsanitize=" + toString(Sanitizers)));
@@ -565,6 +583,17 @@ void SanitizerArgs::addArgs(const llvm::opt::ArgList &Args,
   // affect compilation.
   if (Sanitizers.has(Memory) || Sanitizers.has(Address))
     CmdArgs.push_back(Args.MakeArgString("-fno-assume-sane-operator-new"));
+
+  if (TC.getTriple().isOSWindows() && needsUbsanRt()) {
+    // Instruct the code generator to embed linker directives in the object file
+    // that cause the required runtime libraries to be linked.
+    CmdArgs.push_back(Args.MakeArgString(
+        "--dependent-lib=" + tools::getCompilerRT(TC, "ubsan_standalone")));
+    if (types::isCXX(InputType))
+      CmdArgs.push_back(
+          Args.MakeArgString("--dependent-lib=" +
+                             tools::getCompilerRT(TC, "ubsan_standalone_cxx")));
+  }
 }
 
 SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
diff --git a/lib/Driver/ToolChains.cpp b/lib/Driver/ToolChains.cpp
index 987e063..df74b41 100644
--- a/lib/Driver/ToolChains.cpp
+++ b/lib/Driver/ToolChains.cpp
@@ -39,9 +39,8 @@ using namespace clang::driver::toolchains;
 using namespace clang;
 using namespace llvm::opt;
 
-MachO::MachO(const Driver &D, const llvm::Triple &Triple,
-                       const ArgList &Args)
-  : ToolChain(D, Triple, Args) {
+MachO::MachO(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
+    : ToolChain(D, Triple, Args) {
   // We expect 'as', 'ld', etc. to be adjacent to our install dir.
   getProgramPaths().push_back(getDriver().getInstalledDir());
   if (getDriver().getInstalledDir() != getDriver().Dir)
@@ -62,9 +61,7 @@ types::ID MachO::LookupTypeForExtension(const char *Ext) const {
   return Ty;
 }
 
-bool MachO::HasNativeLLVMSupport() const {
-  return true;
-}
+bool MachO::HasNativeLLVMSupport() const { return true; }
 
 /// Darwin provides an ARC runtime starting in MacOS X 10.7 and iOS 5.0.
 ObjCRuntime Darwin::getDefaultObjCRuntime(bool isNonFragile) const {
@@ -90,20 +87,20 @@ bool Darwin::hasBlocksRuntime() const {
 // other assumptions. Maybe MachO should consider standardising
 // their nomenclature.
 static const char *ArmMachOArchName(StringRef Arch) {
-  return llvm::StringSwitch<const char*>(Arch)
-    .Case("armv6k", "armv6")
-    .Case("armv6m", "armv6m")
-    .Case("armv5tej", "armv5")
-    .Case("xscale", "xscale")
-    .Case("armv4t", "armv4t")
-    .Case("armv7", "armv7")
-    .Cases("armv7a", "armv7-a", "armv7")
-    .Cases("armv7r", "armv7-r", "armv7")
-    .Cases("armv7em", "armv7e-m", "armv7em")
-    .Cases("armv7k", "armv7-k", "armv7k")
-    .Cases("armv7m", "armv7-m", "armv7m")
-    .Cases("armv7s", "armv7-s", "armv7s")
-    .Default(nullptr);
+  return llvm::StringSwitch<const char *>(Arch)
+      .Case("armv6k", "armv6")
+      .Case("armv6m", "armv6m")
+      .Case("armv5tej", "armv5")
+      .Case("xscale", "xscale")
+      .Case("armv4t", "armv4t")
+      .Case("armv7", "armv7")
+      .Cases("armv7a", "armv7-a", "armv7")
+      .Cases("armv7r", "armv7-r", "armv7")
+      .Cases("armv7em", "armv7e-m", "armv7em")
+      .Cases("armv7k", "armv7-k", "armv7k")
+      .Cases("armv7m", "armv7-m", "armv7m")
+      .Cases("armv7s", "armv7-s", "armv7s")
+      .Default(nullptr);
 }
 
 static const char *ArmMachOArchNameCPU(StringRef CPU) {
@@ -159,15 +156,12 @@ StringRef MachO::getMachOArchName(const ArgList &Args) const {
   }
 }
 
-Darwin::~Darwin() {
-}
-
-MachO::~MachO() {
-}
+Darwin::~Darwin() {}
 
+MachO::~MachO() {}
 
 std::string MachO::ComputeEffectiveClangTriple(const ArgList &Args,
-                                                    types::ID InputType) const {
+                                               types::ID InputType) const {
   llvm::Triple Triple(ComputeLLVMTriple(Args, InputType));
 
   return Triple.getTriple();
@@ -211,18 +205,15 @@ Tool *MachO::getTool(Action::ActionClass AC) const {
   }
 }
 
-Tool *MachO::buildLinker() const {
-  return new tools::darwin::Link(*this);
-}
+Tool *MachO::buildLinker() const { return new tools::darwin::Linker(*this); }
 
 Tool *MachO::buildAssembler() const {
-  return new tools::darwin::Assemble(*this);
+  return new tools::darwin::Assembler(*this);
 }
 
-DarwinClang::DarwinClang(const Driver &D, const llvm::Triple& Triple,
+DarwinClang::DarwinClang(const Driver &D, const llvm::Triple &Triple,
                          const ArgList &Args)
-  : Darwin(D, Triple, Args) {
-}
+    : Darwin(D, Triple, Args) {}
 
 void DarwinClang::addClangWarningOptions(ArgStringList &CC1Args) const {
   // For iOS, 64-bit, promote certain warnings to errors.
@@ -308,7 +299,7 @@ void MachO::AddLinkRuntimeLib(const ArgList &Args, ArgStringList &CmdArgs,
 }
 
 void Darwin::addProfileRTLibs(const ArgList &Args,
-                             ArgStringList &CmdArgs) const {
+                              ArgStringList &CmdArgs) const {
   if (!(Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs,
                      false) ||
         Args.hasArg(options::OPT_fprofile_generate) ||
@@ -337,10 +328,11 @@ void DarwinClang::AddLinkSanitizerLibArgs(const ArgList &Args,
   }
   assert(isTargetMacOS() || isTargetIOSSimulator());
   StringRef OS = isTargetMacOS() ? "osx" : "iossim";
-  AddLinkRuntimeLib(Args, CmdArgs, (Twine("libclang_rt.") + Sanitizer + "_" +
-                                    OS + "_dynamic.dylib").str(),
-                    /*AlwaysLink*/ true, /*IsEmbedded*/ false,
-                    /*AddRPath*/ true);
+  AddLinkRuntimeLib(
+      Args, CmdArgs,
+      (Twine("libclang_rt.") + Sanitizer + "_" + OS + "_dynamic.dylib").str(),
+      /*AlwaysLink*/ true, /*IsEmbedded*/ false,
+      /*AddRPath*/ true);
 
   if (GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) {
     // Add explicit dependcy on -lc++abi, as -lc++ doesn't re-export
@@ -357,7 +349,7 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
     break;
   default:
     getDriver().Diag(diag::err_drv_unsupported_rtlib_for_platform)
-      << Args.getLastArg(options::OPT_rtlib_EQ)->getValue() << "darwin";
+        << Args.getLastArg(options::OPT_rtlib_EQ)->getValue() << "darwin";
     return;
   }
 
@@ -372,12 +364,10 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
   // cares. This is useful in situations where someone wants to statically link
   // something like libstdc++, and needs its runtime support routines.
   if (const Arg *A = Args.getLastArg(options::OPT_static_libgcc)) {
-    getDriver().Diag(diag::err_drv_unsupported_opt)
-      << A->getAsString(Args);
+    getDriver().Diag(diag::err_drv_unsupported_opt) << A->getAsString(Args);
     return;
   }
 
-
   const SanitizerArgs &Sanitize = getSanitizerArgs();
   if (Sanitize.needsAsanRt())
     AddLinkSanitizerLibArgs(Args, CmdArgs, "asan");
@@ -443,7 +433,7 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
       if (llvm::sys::path::is_absolute(env) && llvm::sys::fs::exists(env) &&
           StringRef(env) != "/") {
         Args.append(Args.MakeSeparateArg(
-                      nullptr, Opts.getOption(options::OPT_isysroot), env));
+            nullptr, Opts.getOption(options::OPT_isysroot), env));
       }
     }
   }
@@ -453,8 +443,7 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
 
   if (OSXVersion && iOSVersion) {
     getDriver().Diag(diag::err_drv_argument_not_allowed_with)
-          << OSXVersion->getAsString(Args)
-          << iOSVersion->getAsString(Args);
+        << OSXVersion->getAsString(Args) << iOSVersion->getAsString(Args);
     iOSVersion = nullptr;
   } else if (!OSXVersion && !iOSVersion) {
     // If no deployment target was specified on the command line, check for
@@ -466,16 +455,31 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
     if (char *env = ::getenv("IPHONEOS_DEPLOYMENT_TARGET"))
       iOSTarget = env;
 
-    // If no '-miphoneos-version-min' specified on the command line and
-    // IPHONEOS_DEPLOYMENT_TARGET is not defined, see if we can set the default
-    // based on -isysroot.
-    if (iOSTarget.empty()) {
+    // If there is no command-line argument to specify the Target version and
+    // no environment variable defined, see if we can set the default based
+    // on -isysroot.
+    if (iOSTarget.empty() && OSXTarget.empty() &&
+        Args.hasArg(options::OPT_isysroot)) {
       if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
-        StringRef first, second;
         StringRef isysroot = A->getValue();
-        std::tie(first, second) = isysroot.split(StringRef("SDKs/iPhoneOS"));
-        if (second != "")
-          iOSTarget = second.substr(0,3);
+        // Assume SDK has path: SOME_PATH/SDKs/PlatformXX.YY.sdk
+        size_t BeginSDK = isysroot.rfind("SDKs/");
+        size_t EndSDK = isysroot.rfind(".sdk");
+        if (BeginSDK != StringRef::npos && EndSDK != StringRef::npos) {
+          StringRef SDK = isysroot.slice(BeginSDK + 5, EndSDK);
+          // Slice the version number out.
+          // Version number is between the first and the last number.
+          size_t StartVer = SDK.find_first_of("0123456789");
+          size_t EndVer = SDK.find_last_of("0123456789");
+          if (StartVer != StringRef::npos && EndVer > StartVer) {
+            StringRef Version = SDK.slice(StartVer, EndVer + 1);
+            if (SDK.startswith("iPhoneOS") ||
+                SDK.startswith("iPhoneSimulator"))
+              iOSTarget = Version;
+            else if (SDK.startswith("MacOSX"))
+              OSXTarget = Version;
+          }
+        }
       }
     }
 
@@ -535,18 +539,18 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
   bool HadExtra;
   if (Platform == MacOS) {
     assert(!iOSVersion && "Unknown target platform!");
-    if (!Driver::GetReleaseVersion(OSXVersion->getValue(), Major, Minor,
-                                   Micro, HadExtra) || HadExtra ||
-        Major != 10 || Minor >= 100 || Micro >= 100)
+    if (!Driver::GetReleaseVersion(OSXVersion->getValue(), Major, Minor, Micro,
+                                   HadExtra) ||
+        HadExtra || Major != 10 || Minor >= 100 || Micro >= 100)
       getDriver().Diag(diag::err_drv_invalid_version_number)
-        << OSXVersion->getAsString(Args);
+          << OSXVersion->getAsString(Args);
   } else if (Platform == IPhoneOS) {
     assert(iOSVersion && "Unknown target platform!");
-    if (!Driver::GetReleaseVersion(iOSVersion->getValue(), Major, Minor,
-                                   Micro, HadExtra) || HadExtra ||
-        Major >= 10 || Minor >= 100 || Micro >= 100)
+    if (!Driver::GetReleaseVersion(iOSVersion->getValue(), Major, Minor, Micro,
+                                   HadExtra) ||
+        HadExtra || Major >= 10 || Minor >= 100 || Micro >= 100)
       getDriver().Diag(diag::err_drv_invalid_version_number)
-        << iOSVersion->getAsString(Args);
+          << iOSVersion->getAsString(Args);
   } else
     llvm_unreachable("unknown kind of Darwin platform");
 
@@ -646,10 +650,11 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args,
       // Skip this argument unless the architecture matches either the toolchain
       // triple arch, or the arch being bound.
       llvm::Triple::ArchType XarchArch =
-        tools::darwin::getArchTypeForMachOArchName(A->getValue(0));
-      if (!(XarchArch == getArch()  ||
-            (BoundArch && XarchArch ==
-             tools::darwin::getArchTypeForMachOArchName(BoundArch))))
+          tools::darwin::getArchTypeForMachOArchName(A->getValue(0));
+      if (!(XarchArch == getArch() ||
+            (BoundArch &&
+             XarchArch ==
+                 tools::darwin::getArchTypeForMachOArchName(BoundArch))))
         continue;
 
       Arg *OriginalArg = A;
@@ -667,11 +672,11 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args,
       // like -O4 are going to slip through.
       if (!XarchArg || Index > Prev + 1) {
         getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
-          << A->getAsString(Args);
+            << A->getAsString(Args);
         continue;
       } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
         getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
-          << A->getAsString(Args);
+            << A->getAsString(Args);
         continue;
       }
 
@@ -685,11 +690,9 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args,
       // "input arguments".
       if (A->getOption().hasFlag(options::LinkerInput)) {
         // Convert the argument into individual Zlinker_input_args.
-        for (unsigned i = 0, e = A->getNumValues(); i != e; ++i) {
-          DAL->AddSeparateArg(OriginalArg,
-                              Opts.getOption(options::OPT_Zlinker_input),
-                              A->getValue(i));
-
+        for (const char *Value : A->getValues()) {
+          DAL->AddSeparateArg(
+              OriginalArg, Opts.getOption(options::OPT_Zlinker_input), Value);
         }
         continue;
       }
@@ -698,7 +701,7 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args,
     // Sob. These is strictly gcc compatible for the time being. Apple
     // gcc translates options twice, which means that self-expanding
     // options add duplicates.
-    switch ((options::ID) A->getOption().getID()) {
+    switch ((options::ID)A->getOption().getID()) {
     default:
       DAL->append(A);
       break;
@@ -710,20 +713,19 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args,
       break;
 
     case options::OPT_dependency_file:
-      DAL->AddSeparateArg(A, Opts.getOption(options::OPT_MF),
-                          A->getValue());
+      DAL->AddSeparateArg(A, Opts.getOption(options::OPT_MF), A->getValue());
       break;
 
     case options::OPT_gfull:
       DAL->AddFlagArg(A, Opts.getOption(options::OPT_g_Flag));
-      DAL->AddFlagArg(A,
-               Opts.getOption(options::OPT_fno_eliminate_unused_debug_symbols));
+      DAL->AddFlagArg(
+          A, Opts.getOption(options::OPT_fno_eliminate_unused_debug_symbols));
       break;
 
     case options::OPT_gused:
       DAL->AddFlagArg(A, Opts.getOption(options::OPT_g_Flag));
-      DAL->AddFlagArg(A,
-             Opts.getOption(options::OPT_feliminate_unused_debug_symbols));
+      DAL->AddFlagArg(
+          A, Opts.getOption(options::OPT_feliminate_unused_debug_symbols));
       break;
 
     case options::OPT_shared:
@@ -744,8 +746,8 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args,
       break;
 
     case options::OPT_Wno_nonportable_cfstrings:
-      DAL->AddFlagArg(A,
-                   Opts.getOption(options::OPT_mno_warn_nonportable_cfstrings));
+      DAL->AddFlagArg(
+          A, Opts.getOption(options::OPT_mno_warn_nonportable_cfstrings));
       break;
 
     case options::OPT_fpascal_strings:
@@ -861,7 +863,6 @@ void MachO::AddLinkRuntimeLibArgs(const ArgList &Args,
   AddLinkRuntimeLib(Args, CmdArgs, CompilerRT, false, true);
 }
 
-
 DerivedArgList *Darwin::TranslateArgs(const DerivedArgList &Args,
                                       const char *BoundArch) const {
   // First get the generic Apple args, before moving onto Darwin-specific ones.
@@ -882,7 +883,7 @@ DerivedArgList *Darwin::TranslateArgs(const DerivedArgList &Args,
   // but we can't check the deployment target in the translation code until
   // it is set here.
   if (isTargetIOSBased() && !isIPhoneOSVersionLT(6, 0)) {
-    for (ArgList::iterator it = DAL->begin(), ie = DAL->end(); it != ie; ) {
+    for (ArgList::iterator it = DAL->begin(), ie = DAL->end(); it != ie;) {
       Arg *A = *it;
       ++it;
       if (A->getOption().getID() != options::OPT_mkernel &&
@@ -914,8 +915,7 @@ DerivedArgList *Darwin::TranslateArgs(const DerivedArgList &Args,
       where = "iOS 5.0";
 
     if (where != StringRef()) {
-      getDriver().Diag(clang::diag::err_drv_invalid_libcxx_deployment)
-        << where;
+      getDriver().Diag(clang::diag::err_drv_invalid_libcxx_deployment) << where;
     }
   }
 
@@ -938,13 +938,9 @@ bool Darwin::UseSjLjExceptions() const {
           getTriple().getArch() == llvm::Triple::thumb);
 }
 
-bool MachO::isPICDefault() const {
-  return true;
-}
+bool MachO::isPICDefault() const { return true; }
 
-bool MachO::isPIEDefault() const {
-  return false;
-}
+bool MachO::isPIEDefault() const { return false; }
 
 bool MachO::isPICDefaultForced() const {
   return (getArch() == llvm::Triple::x86_64 ||
@@ -1058,25 +1054,23 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args,
   }
 }
 
-bool Darwin::SupportsObjCGC() const {
-  return isTargetMacOS();
-}
+bool Darwin::SupportsObjCGC() const { return isTargetMacOS(); }
 
 void Darwin::CheckObjCARC() const {
-  if (isTargetIOSBased()|| (isTargetMacOS() && !isMacosxVersionLT(10, 6)))
+  if (isTargetIOSBased() || (isTargetMacOS() && !isMacosxVersionLT(10, 6)))
     return;
   getDriver().Diag(diag::err_arc_unsupported_on_toolchain);
 }
 
 SanitizerMask Darwin::getSupportedSanitizers() const {
   SanitizerMask Res = ToolChain::getSupportedSanitizers();
-  if (isTargetMacOS() || isTargetIOSSimulator()) {
-    // ASan and UBSan are available on Mac OS and on iOS simulator.
+  if (isTargetMacOS() || isTargetIOSSimulator())
     Res |= SanitizerKind::Address;
-    Res |= SanitizerKind::Vptr;
-  }
-  if (isTargetMacOS())
+  if (isTargetMacOS()) {
+    if (!isMacosxVersionLT(10, 9))
+      Res |= SanitizerKind::Vptr;
     Res |= SanitizerKind::SafeStack;
+  }
   return Res;
 }
 
@@ -1089,17 +1083,15 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
 /// This is the primary means of forming GCCVersion objects.
 /*static*/
 Generic_GCC::GCCVersion Linux::GCCVersion::Parse(StringRef VersionText) {
-  const GCCVersion BadVersion = { VersionText.str(), -1, -1, -1, "", "", "" };
+  const GCCVersion BadVersion = {VersionText.str(), -1, -1, -1, "", "", ""};
   std::pair<StringRef, StringRef> First = VersionText.split('.');
   std::pair<StringRef, StringRef> Second = First.second.split('.');
 
-  GCCVersion GoodVersion = { VersionText.str(), -1, -1, -1, "", "", "" };
-  if (First.first.getAsInteger(10, GoodVersion.Major) ||
-      GoodVersion.Major < 0)
+  GCCVersion GoodVersion = {VersionText.str(), -1, -1, -1, "", "", ""};
+  if (First.first.getAsInteger(10, GoodVersion.Major) || GoodVersion.Major < 0)
     return BadVersion;
   GoodVersion.MajorStr = First.first.str();
-  if (Second.first.getAsInteger(10, GoodVersion.Minor) ||
-      GoodVersion.Minor < 0)
+  if (Second.first.getAsInteger(10, GoodVersion.Minor) || GoodVersion.Minor < 0)
     return BadVersion;
   GoodVersion.MinorStr = Second.first.str();
 
@@ -1176,12 +1168,11 @@ static llvm::StringRef getGCCToolchainDir(const ArgList &Args) {
 /// should instead pull the target out of the driver. This is currently
 /// necessary because the driver doesn't store the final version of the target
 /// triple.
-void
-Generic_GCC::GCCInstallationDetector::init(
+void Generic_GCC::GCCInstallationDetector::init(
     const Driver &D, const llvm::Triple &TargetTriple, const ArgList &Args) {
-  llvm::Triple BiarchVariantTriple =
-      TargetTriple.isArch32Bit() ? TargetTriple.get64BitArchVariant()
-                                 : TargetTriple.get32BitArchVariant();
+  llvm::Triple BiarchVariantTriple = TargetTriple.isArch32Bit()
+                                         ? TargetTriple.get64BitArchVariant()
+                                         : TargetTriple.get32BitArchVariant();
   // The library directories which may contain GCC installations.
   SmallVector<StringRef, 4> CandidateLibDirs, CandidateBiarchLibDirs;
   // The compatible GCC triples for this particular architecture.
@@ -1219,25 +1210,22 @@ Generic_GCC::GCCInstallationDetector::init(
   // Loop over the various components which exist and select the best GCC
   // installation available. GCC installs are ranked by version number.
   Version = GCCVersion::Parse("0.0.0");
-  for (unsigned i = 0, ie = Prefixes.size(); i < ie; ++i) {
-    if (!llvm::sys::fs::exists(Prefixes[i]))
+  for (const std::string &Prefix : Prefixes) {
+    if (!llvm::sys::fs::exists(Prefix))
       continue;
-    for (unsigned j = 0, je = CandidateLibDirs.size(); j < je; ++j) {
-      const std::string LibDir = Prefixes[i] + CandidateLibDirs[j].str();
+    for (const StringRef Suffix : CandidateLibDirs) {
+      const std::string LibDir = Prefix + Suffix.str();
       if (!llvm::sys::fs::exists(LibDir))
         continue;
-      for (unsigned k = 0, ke = CandidateTripleAliases.size(); k < ke; ++k)
-        ScanLibDirForGCCTriple(TargetTriple, Args, LibDir,
-                               CandidateTripleAliases[k]);
+      for (const StringRef Candidate : CandidateTripleAliases)
+        ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, Candidate);
     }
-    for (unsigned j = 0, je = CandidateBiarchLibDirs.size(); j < je; ++j) {
-      const std::string LibDir = Prefixes[i] + CandidateBiarchLibDirs[j].str();
+    for (const StringRef Suffix : CandidateBiarchLibDirs) {
+      const std::string LibDir = Prefix + Suffix.str();
       if (!llvm::sys::fs::exists(LibDir))
         continue;
-      for (unsigned k = 0, ke = CandidateBiarchTripleAliases.size(); k < ke;
-           ++k)
-        ScanLibDirForGCCTriple(TargetTriple, Args, LibDir,
-                               CandidateBiarchTripleAliases[k],
+      for (const StringRef Candidate : CandidateBiarchTripleAliases)
+        ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, Candidate,
                                /*NeedsBiarchSuffix=*/ true);
     }
   }
@@ -1274,91 +1262,82 @@ bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const {
   // Declare a bunch of static data sets that we'll select between below. These
   // are specifically designed to always refer to string literals to avoid any
   // lifetime or initialization issues.
-  static const char *const AArch64LibDirs[] = { "/lib64", "/lib" };
-  static const char *const AArch64Triples[] = { "aarch64-none-linux-gnu",
-                                                "aarch64-linux-gnu",
-                                                "aarch64-linux-android",
-                                                "aarch64-redhat-linux" };
-  static const char *const AArch64beLibDirs[] = { "/lib" };
-  static const char *const AArch64beTriples[] = { "aarch64_be-none-linux-gnu",
-                                                  "aarch64_be-linux-gnu" };
-
-  static const char *const ARMLibDirs[] = { "/lib" };
-  static const char *const ARMTriples[] = { "arm-linux-gnueabi",
-                                            "arm-linux-androideabi" };
-  static const char *const ARMHFTriples[] = { "arm-linux-gnueabihf",
-                                              "armv7hl-redhat-linux-gnueabi" };
-  static const char *const ARMebLibDirs[] = { "/lib" };
-  static const char *const ARMebTriples[] = { "armeb-linux-gnueabi",
-                                              "armeb-linux-androideabi" };
-  static const char *const ARMebHFTriples[] = { "armeb-linux-gnueabihf",
-                                                "armebv7hl-redhat-linux-gnueabi" };
-
-  static const char *const X86_64LibDirs[] = { "/lib64", "/lib" };
+  static const char *const AArch64LibDirs[] = {"/lib64", "/lib"};
+  static const char *const AArch64Triples[] = {
+      "aarch64-none-linux-gnu", "aarch64-linux-gnu", "aarch64-linux-android",
+      "aarch64-redhat-linux"};
+  static const char *const AArch64beLibDirs[] = {"/lib"};
+  static const char *const AArch64beTriples[] = {"aarch64_be-none-linux-gnu",
+                                                 "aarch64_be-linux-gnu"};
+
+  static const char *const ARMLibDirs[] = {"/lib"};
+  static const char *const ARMTriples[] = {"arm-linux-gnueabi",
+                                           "arm-linux-androideabi"};
+  static const char *const ARMHFTriples[] = {"arm-linux-gnueabihf",
+                                             "armv7hl-redhat-linux-gnueabi"};
+  static const char *const ARMebLibDirs[] = {"/lib"};
+  static const char *const ARMebTriples[] = {"armeb-linux-gnueabi",
+                                             "armeb-linux-androideabi"};
+  static const char *const ARMebHFTriples[] = {
+      "armeb-linux-gnueabihf", "armebv7hl-redhat-linux-gnueabi"};
+
+  static const char *const X86_64LibDirs[] = {"/lib64", "/lib"};
   static const char *const X86_64Triples[] = {
-    "x86_64-linux-gnu", "x86_64-unknown-linux-gnu", "x86_64-pc-linux-gnu",
-    "x86_64-redhat-linux6E", "x86_64-redhat-linux", "x86_64-suse-linux",
-    "x86_64-manbo-linux-gnu", "x86_64-linux-gnu", "x86_64-slackware-linux",
-    "x86_64-linux-android", "x86_64-unknown-linux"
-  };
-  static const char *const X32LibDirs[] = { "/libx32" };
-  static const char *const X86LibDirs[] = { "/lib32", "/lib" };
+      "x86_64-linux-gnu",       "x86_64-unknown-linux-gnu",
+      "x86_64-pc-linux-gnu",    "x86_64-redhat-linux6E",
+      "x86_64-redhat-linux",    "x86_64-suse-linux",
+      "x86_64-manbo-linux-gnu", "x86_64-linux-gnu",
+      "x86_64-slackware-linux", "x86_64-linux-android",
+      "x86_64-unknown-linux"};
+  static const char *const X32LibDirs[] = {"/libx32"};
+  static const char *const X86LibDirs[] = {"/lib32", "/lib"};
   static const char *const X86Triples[] = {
-    "i686-linux-gnu", "i686-pc-linux-gnu", "i486-linux-gnu", "i386-linux-gnu",
-    "i386-redhat-linux6E", "i686-redhat-linux", "i586-redhat-linux",
-    "i386-redhat-linux", "i586-suse-linux", "i486-slackware-linux",
-    "i686-montavista-linux", "i686-linux-android", "i586-linux-gnu"
-  };
-
-  static const char *const MIPSLibDirs[] = { "/lib" };
-  static const char *const MIPSTriples[] = { "mips-linux-gnu",
-                                             "mips-mti-linux-gnu",
-                                             "mips-img-linux-gnu" };
-  static const char *const MIPSELLibDirs[] = { "/lib" };
-  static const char *const MIPSELTriples[] = { "mipsel-linux-gnu",
-                                               "mipsel-linux-android",
-                                               "mips-img-linux-gnu" };
-
-  static const char *const MIPS64LibDirs[] = { "/lib64", "/lib" };
-  static const char *const MIPS64Triples[] = { "mips64-linux-gnu",
-                                               "mips-mti-linux-gnu",
-                                               "mips-img-linux-gnu",
-                                               "mips64-linux-gnuabi64" };
-  static const char *const MIPS64ELLibDirs[] = { "/lib64", "/lib" };
-  static const char *const MIPS64ELTriples[] = { "mips64el-linux-gnu",
-                                                 "mips-mti-linux-gnu",
-                                                 "mips-img-linux-gnu",
-                                                 "mips64el-linux-android",
-                                                 "mips64el-linux-gnuabi64" };
-
-  static const char *const PPCLibDirs[] = { "/lib32", "/lib" };
+      "i686-linux-gnu",       "i686-pc-linux-gnu",     "i486-linux-gnu",
+      "i386-linux-gnu",       "i386-redhat-linux6E",   "i686-redhat-linux",
+      "i586-redhat-linux",    "i386-redhat-linux",     "i586-suse-linux",
+      "i486-slackware-linux", "i686-montavista-linux", "i686-linux-android",
+      "i586-linux-gnu"};
+
+  static const char *const MIPSLibDirs[] = {"/lib"};
+  static const char *const MIPSTriples[] = {
+      "mips-linux-gnu", "mips-mti-linux-gnu", "mips-img-linux-gnu"};
+  static const char *const MIPSELLibDirs[] = {"/lib"};
+  static const char *const MIPSELTriples[] = {
+      "mipsel-linux-gnu", "mipsel-linux-android", "mips-img-linux-gnu"};
+
+  static const char *const MIPS64LibDirs[] = {"/lib64", "/lib"};
+  static const char *const MIPS64Triples[] = {
+      "mips64-linux-gnu", "mips-mti-linux-gnu", "mips-img-linux-gnu",
+      "mips64-linux-gnuabi64"};
+  static const char *const MIPS64ELLibDirs[] = {"/lib64", "/lib"};
+  static const char *const MIPS64ELTriples[] = {
+      "mips64el-linux-gnu", "mips-mti-linux-gnu", "mips-img-linux-gnu",
+      "mips64el-linux-android", "mips64el-linux-gnuabi64"};
+
+  static const char *const PPCLibDirs[] = {"/lib32", "/lib"};
   static const char *const PPCTriples[] = {
-    "powerpc-linux-gnu", "powerpc-unknown-linux-gnu", "powerpc-linux-gnuspe",
-    "powerpc-suse-linux", "powerpc-montavista-linuxspe"
-  };
-  static const char *const PPC64LibDirs[] = { "/lib64", "/lib" };
-  static const char *const PPC64Triples[] = { "powerpc64-linux-gnu",
-                                              "powerpc64-unknown-linux-gnu",
-                                              "powerpc64-suse-linux",
-                                              "ppc64-redhat-linux" };
-  static const char *const PPC64LELibDirs[] = { "/lib64", "/lib" };
-  static const char *const PPC64LETriples[] = { "powerpc64le-linux-gnu",
-                                                "powerpc64le-unknown-linux-gnu",
-                                                "powerpc64le-suse-linux",
-                                                "ppc64le-redhat-linux" };
-
-  static const char *const SPARCv8LibDirs[] = { "/lib32", "/lib" };
-  static const char *const SPARCv8Triples[] = { "sparc-linux-gnu",
-                                                "sparcv8-linux-gnu" };
-  static const char *const SPARCv9LibDirs[] = { "/lib64", "/lib" };
-  static const char *const SPARCv9Triples[] = { "sparc64-linux-gnu",
-                                                "sparcv9-linux-gnu" };
-
-  static const char *const SystemZLibDirs[] = { "/lib64", "/lib" };
+      "powerpc-linux-gnu", "powerpc-unknown-linux-gnu", "powerpc-linux-gnuspe",
+      "powerpc-suse-linux", "powerpc-montavista-linuxspe"};
+  static const char *const PPC64LibDirs[] = {"/lib64", "/lib"};
+  static const char *const PPC64Triples[] = {
+      "powerpc64-linux-gnu", "powerpc64-unknown-linux-gnu",
+      "powerpc64-suse-linux", "ppc64-redhat-linux"};
+  static const char *const PPC64LELibDirs[] = {"/lib64", "/lib"};
+  static const char *const PPC64LETriples[] = {
+      "powerpc64le-linux-gnu", "powerpc64le-unknown-linux-gnu",
+      "powerpc64le-suse-linux", "ppc64le-redhat-linux"};
+
+  static const char *const SPARCv8LibDirs[] = {"/lib32", "/lib"};
+  static const char *const SPARCv8Triples[] = {"sparc-linux-gnu",
+                                               "sparcv8-linux-gnu"};
+  static const char *const SPARCv9LibDirs[] = {"/lib64", "/lib"};
+  static const char *const SPARCv9Triples[] = {"sparc64-linux-gnu",
+                                               "sparcv9-linux-gnu"};
+
+  static const char *const SystemZLibDirs[] = {"/lib64", "/lib"};
   static const char *const SystemZTriples[] = {
-    "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu",
-    "s390x-suse-linux", "s390x-redhat-linux"
-  };
+      "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu",
+      "s390x-suse-linux", "s390x-redhat-linux"};
 
   using std::begin;
   using std::end;
@@ -1526,14 +1505,12 @@ static bool isMipsEL(llvm::Triple::ArchType Arch) {
 }
 
 static bool isMips16(const ArgList &Args) {
-  Arg *A = Args.getLastArg(options::OPT_mips16,
-                           options::OPT_mno_mips16);
+  Arg *A = Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16);
   return A && A->getOption().matches(options::OPT_mips16);
 }
 
 static bool isMicroMips(const ArgList &Args) {
-  Arg *A = Args.getLastArg(options::OPT_mmicromips,
-                           options::OPT_mno_micromips);
+  Arg *A = Args.getLastArg(options::OPT_mmicromips, options::OPT_mno_micromips);
   return A && A->getOption().matches(options::OPT_mmicromips);
 }
 
@@ -1554,8 +1531,7 @@ static Multilib makeMultilib(StringRef commonSuffix) {
 }
 
 static bool findMIPSMultilibs(const llvm::Triple &TargetTriple, StringRef Path,
-                              const ArgList &Args,
-                              DetectedMultilibs &Result) {
+                              const ArgList &Args, DetectedMultilibs &Result) {
   // Some MIPS toolchains put libraries and object files compiled
   // using different options in to the sub-directoris which names
   // reflects the flags used for compilation. For example sysroot
@@ -1587,181 +1563,182 @@ static bool findMIPSMultilibs(const llvm::Triple &TargetTriple, StringRef Path,
   MultilibSet FSFMipsMultilibs;
   {
     auto MArchMips32 = makeMultilib("/mips32")
-      .flag("+m32").flag("-m64").flag("-mmicromips").flag("+march=mips32");
+                           .flag("+m32")
+                           .flag("-m64")
+                           .flag("-mmicromips")
+                           .flag("+march=mips32");
 
     auto MArchMicroMips = makeMultilib("/micromips")
-      .flag("+m32").flag("-m64").flag("+mmicromips");
+                              .flag("+m32")
+                              .flag("-m64")
+                              .flag("+mmicromips");
 
     auto MArchMips64r2 = makeMultilib("/mips64r2")
-      .flag("-m32").flag("+m64").flag("+march=mips64r2");
+                             .flag("-m32")
+                             .flag("+m64")
+                             .flag("+march=mips64r2");
 
-    auto MArchMips64 = makeMultilib("/mips64")
-      .flag("-m32").flag("+m64").flag("-march=mips64r2");
+    auto MArchMips64 = makeMultilib("/mips64").flag("-m32").flag("+m64").flag(
+        "-march=mips64r2");
 
     auto MArchDefault = makeMultilib("")
-      .flag("+m32").flag("-m64").flag("-mmicromips").flag("+march=mips32r2");
-
-    auto Mips16 = makeMultilib("/mips16")
-      .flag("+mips16");
-
-    auto UCLibc = makeMultilib("/uclibc")
-      .flag("+muclibc");
-
-    auto MAbi64 = makeMultilib("/64")
-      .flag("+mabi=n64").flag("-mabi=n32").flag("-m32");
-
-    auto BigEndian = makeMultilib("")
-      .flag("+EB").flag("-EL");
-
-    auto LittleEndian = makeMultilib("/el")
-      .flag("+EL").flag("-EB");
-
-    auto SoftFloat = makeMultilib("/sof")
-      .flag("+msoft-float");
-
-    auto Nan2008 = makeMultilib("/nan2008")
-      .flag("+mnan=2008");
-
-    FSFMipsMultilibs = MultilibSet()
-      .Either(MArchMips32, MArchMicroMips, 
-              MArchMips64r2, MArchMips64, MArchDefault)
-      .Maybe(UCLibc)
-      .Maybe(Mips16)
-      .FilterOut("/mips64/mips16")
-      .FilterOut("/mips64r2/mips16")
-      .FilterOut("/micromips/mips16")
-      .Maybe(MAbi64)
-      .FilterOut("/micromips/64")
-      .FilterOut("/mips32/64")
-      .FilterOut("^/64")
-      .FilterOut("/mips16/64")
-      .Either(BigEndian, LittleEndian)
-      .Maybe(SoftFloat)
-      .Maybe(Nan2008)
-      .FilterOut(".*sof/nan2008")
-      .FilterOut(NonExistent)
-      .setIncludeDirsCallback([](
-          StringRef InstallDir, StringRef TripleStr, const Multilib &M) {
-        std::vector<std::string> Dirs;
-        Dirs.push_back((InstallDir + "/include").str());
-        std::string SysRootInc = InstallDir.str() + "/../../../../sysroot";
-        if (StringRef(M.includeSuffix()).startswith("/uclibc"))
-          Dirs.push_back(SysRootInc + "/uclibc/usr/include");
-        else
-          Dirs.push_back(SysRootInc + "/usr/include");
-        return Dirs;
-      });
+                            .flag("+m32")
+                            .flag("-m64")
+                            .flag("-mmicromips")
+                            .flag("+march=mips32r2");
+
+    auto Mips16 = makeMultilib("/mips16").flag("+mips16");
+
+    auto UCLibc = makeMultilib("/uclibc").flag("+muclibc");
+
+    auto MAbi64 =
+        makeMultilib("/64").flag("+mabi=n64").flag("-mabi=n32").flag("-m32");
+
+    auto BigEndian = makeMultilib("").flag("+EB").flag("-EL");
+
+    auto LittleEndian = makeMultilib("/el").flag("+EL").flag("-EB");
+
+    auto SoftFloat = makeMultilib("/sof").flag("+msoft-float");
+
+    auto Nan2008 = makeMultilib("/nan2008").flag("+mnan=2008");
+
+    FSFMipsMultilibs =
+        MultilibSet()
+            .Either(MArchMips32, MArchMicroMips, MArchMips64r2, MArchMips64,
+                    MArchDefault)
+            .Maybe(UCLibc)
+            .Maybe(Mips16)
+            .FilterOut("/mips64/mips16")
+            .FilterOut("/mips64r2/mips16")
+            .FilterOut("/micromips/mips16")
+            .Maybe(MAbi64)
+            .FilterOut("/micromips/64")
+            .FilterOut("/mips32/64")
+            .FilterOut("^/64")
+            .FilterOut("/mips16/64")
+            .Either(BigEndian, LittleEndian)
+            .Maybe(SoftFloat)
+            .Maybe(Nan2008)
+            .FilterOut(".*sof/nan2008")
+            .FilterOut(NonExistent)
+            .setIncludeDirsCallback([](StringRef InstallDir,
+                                       StringRef TripleStr, const Multilib &M) {
+              std::vector<std::string> Dirs;
+              Dirs.push_back((InstallDir + "/include").str());
+              std::string SysRootInc =
+                  InstallDir.str() + "/../../../../sysroot";
+              if (StringRef(M.includeSuffix()).startswith("/uclibc"))
+                Dirs.push_back(SysRootInc + "/uclibc/usr/include");
+              else
+                Dirs.push_back(SysRootInc + "/usr/include");
+              return Dirs;
+            });
   }
 
   // Check for Code Sourcery toolchain multilibs
   MultilibSet CSMipsMultilibs;
   {
-    auto MArchMips16 = makeMultilib("/mips16")
-      .flag("+m32").flag("+mips16");
+    auto MArchMips16 = makeMultilib("/mips16").flag("+m32").flag("+mips16");
 
-    auto MArchMicroMips = makeMultilib("/micromips")
-      .flag("+m32").flag("+mmicromips");
+    auto MArchMicroMips =
+        makeMultilib("/micromips").flag("+m32").flag("+mmicromips");
 
-    auto MArchDefault = makeMultilib("")
-      .flag("-mips16").flag("-mmicromips");
+    auto MArchDefault = makeMultilib("").flag("-mips16").flag("-mmicromips");
 
-    auto UCLibc = makeMultilib("/uclibc")
-      .flag("+muclibc");
+    auto UCLibc = makeMultilib("/uclibc").flag("+muclibc");
 
-    auto SoftFloat = makeMultilib("/soft-float")
-      .flag("+msoft-float");
+    auto SoftFloat = makeMultilib("/soft-float").flag("+msoft-float");
 
-    auto Nan2008 = makeMultilib("/nan2008")
-      .flag("+mnan=2008");
+    auto Nan2008 = makeMultilib("/nan2008").flag("+mnan=2008");
 
-    auto DefaultFloat = makeMultilib("")
-      .flag("-msoft-float").flag("-mnan=2008");
+    auto DefaultFloat =
+        makeMultilib("").flag("-msoft-float").flag("-mnan=2008");
 
-    auto BigEndian = makeMultilib("")
-      .flag("+EB").flag("-EL");
+    auto BigEndian = makeMultilib("").flag("+EB").flag("-EL");
 
-    auto LittleEndian = makeMultilib("/el")
-      .flag("+EL").flag("-EB");
+    auto LittleEndian = makeMultilib("/el").flag("+EL").flag("-EB");
 
     // Note that this one's osSuffix is ""
     auto MAbi64 = makeMultilib("")
-      .gccSuffix("/64")
-      .includeSuffix("/64")
-      .flag("+mabi=n64").flag("-mabi=n32").flag("-m32");
-
-    CSMipsMultilibs = MultilibSet()
-      .Either(MArchMips16, MArchMicroMips, MArchDefault)
-      .Maybe(UCLibc)
-      .Either(SoftFloat, Nan2008, DefaultFloat)
-      .FilterOut("/micromips/nan2008")
-      .FilterOut("/mips16/nan2008")
-      .Either(BigEndian, LittleEndian)
-      .Maybe(MAbi64)
-      .FilterOut("/mips16.*/64")
-      .FilterOut("/micromips.*/64")
-      .FilterOut(NonExistent)
-      .setIncludeDirsCallback([](
-          StringRef InstallDir, StringRef TripleStr, const Multilib &M) {
-        std::vector<std::string> Dirs;
-        Dirs.push_back((InstallDir + "/include").str());
-        std::string SysRootInc =
-            InstallDir.str() + "/../../../../" + TripleStr.str();
-        if (StringRef(M.includeSuffix()).startswith("/uclibc"))
-          Dirs.push_back(SysRootInc + "/libc/uclibc/usr/include");
-        else
-          Dirs.push_back(SysRootInc + "/libc/usr/include");
-        return Dirs;
-      });
-  }
-
-  MultilibSet AndroidMipsMultilibs = MultilibSet()
-    .Maybe(Multilib("/mips-r2").flag("+march=mips32r2"))
-    .Maybe(Multilib("/mips-r6").flag("+march=mips32r6"))
-    .FilterOut(NonExistent);
+                      .gccSuffix("/64")
+                      .includeSuffix("/64")
+                      .flag("+mabi=n64")
+                      .flag("-mabi=n32")
+                      .flag("-m32");
+
+    CSMipsMultilibs =
+        MultilibSet()
+            .Either(MArchMips16, MArchMicroMips, MArchDefault)
+            .Maybe(UCLibc)
+            .Either(SoftFloat, Nan2008, DefaultFloat)
+            .FilterOut("/micromips/nan2008")
+            .FilterOut("/mips16/nan2008")
+            .Either(BigEndian, LittleEndian)
+            .Maybe(MAbi64)
+            .FilterOut("/mips16.*/64")
+            .FilterOut("/micromips.*/64")
+            .FilterOut(NonExistent)
+            .setIncludeDirsCallback([](StringRef InstallDir,
+                                       StringRef TripleStr, const Multilib &M) {
+              std::vector<std::string> Dirs;
+              Dirs.push_back((InstallDir + "/include").str());
+              std::string SysRootInc =
+                  InstallDir.str() + "/../../../../" + TripleStr.str();
+              if (StringRef(M.includeSuffix()).startswith("/uclibc"))
+                Dirs.push_back(SysRootInc + "/libc/uclibc/usr/include");
+              else
+                Dirs.push_back(SysRootInc + "/libc/usr/include");
+              return Dirs;
+            });
+  }
+
+  MultilibSet AndroidMipsMultilibs =
+      MultilibSet()
+          .Maybe(Multilib("/mips-r2").flag("+march=mips32r2"))
+          .Maybe(Multilib("/mips-r6").flag("+march=mips32r6"))
+          .FilterOut(NonExistent);
 
   MultilibSet DebianMipsMultilibs;
   {
-    Multilib MAbiN32 = Multilib()
-      .gccSuffix("/n32")
-      .includeSuffix("/n32")
-      .flag("+mabi=n32");
+    Multilib MAbiN32 =
+        Multilib().gccSuffix("/n32").includeSuffix("/n32").flag("+mabi=n32");
 
     Multilib M64 = Multilib()
-      .gccSuffix("/64")
-      .includeSuffix("/64")
-      .flag("+m64").flag("-m32").flag("-mabi=n32");
+                       .gccSuffix("/64")
+                       .includeSuffix("/64")
+                       .flag("+m64")
+                       .flag("-m32")
+                       .flag("-mabi=n32");
 
-    Multilib M32 = Multilib()
-      .flag("-m64").flag("+m32").flag("-mabi=n32");
+    Multilib M32 = Multilib().flag("-m64").flag("+m32").flag("-mabi=n32");
 
-    DebianMipsMultilibs = MultilibSet()
-      .Either(M32, M64, MAbiN32)
-      .FilterOut(NonExistent);
+    DebianMipsMultilibs =
+        MultilibSet().Either(M32, M64, MAbiN32).FilterOut(NonExistent);
   }
 
   MultilibSet ImgMultilibs;
   {
-    auto Mips64r6 = makeMultilib("/mips64r6")
-      .flag("+m64").flag("-m32");
+    auto Mips64r6 = makeMultilib("/mips64r6").flag("+m64").flag("-m32");
 
-    auto LittleEndian = makeMultilib("/el")
-      .flag("+EL").flag("-EB");
+    auto LittleEndian = makeMultilib("/el").flag("+EL").flag("-EB");
 
-    auto MAbi64 = makeMultilib("/64")
-      .flag("+mabi=n64").flag("-mabi=n32").flag("-m32");
+    auto MAbi64 =
+        makeMultilib("/64").flag("+mabi=n64").flag("-mabi=n32").flag("-m32");
 
-    ImgMultilibs = MultilibSet()
-      .Maybe(Mips64r6)
-      .Maybe(MAbi64)
-      .Maybe(LittleEndian)
-      .FilterOut(NonExistent)
-      .setIncludeDirsCallback([](
-          StringRef InstallDir, StringRef TripleStr, const Multilib &M) {
-        std::vector<std::string> Dirs;
-        Dirs.push_back((InstallDir + "/include").str());
-        Dirs.push_back((InstallDir + "/../../../../sysroot/usr/include").str());
-        return Dirs;
-      });
+    ImgMultilibs =
+        MultilibSet()
+            .Maybe(Mips64r6)
+            .Maybe(MAbi64)
+            .Maybe(LittleEndian)
+            .FilterOut(NonExistent)
+            .setIncludeDirsCallback([](StringRef InstallDir,
+                                       StringRef TripleStr, const Multilib &M) {
+              std::vector<std::string> Dirs;
+              Dirs.push_back((InstallDir + "/include").str());
+              Dirs.push_back(
+                  (InstallDir + "/../../../../sysroot/usr/include").str());
+              return Dirs;
+            });
   }
 
   StringRef CPUName;
@@ -1816,8 +1793,8 @@ static bool findMIPSMultilibs(const llvm::Triple &TargetTriple, StringRef Path,
 
   // Sort candidates. Toolchain that best meets the directories goes first.
   // Then select the first toolchains matches command line flags.
-  MultilibSet *candidates[] = { &DebianMipsMultilibs, &FSFMipsMultilibs,
-                                &CSMipsMultilibs };
+  MultilibSet *candidates[] = {&DebianMipsMultilibs, &FSFMipsMultilibs,
+                               &CSMipsMultilibs};
   std::sort(
       std::begin(candidates), std::end(candidates),
       [](MultilibSet *a, MultilibSet *b) { return a->size() > b->size(); });
@@ -1859,17 +1836,23 @@ static bool findBiarchMultilibs(const llvm::Triple &TargetTriple,
 
   Multilib Default;
   Multilib Alt64 = Multilib()
-    .gccSuffix("/64")
-    .includeSuffix("/64")
-    .flag("-m32").flag("+m64").flag("-mx32");
+                       .gccSuffix("/64")
+                       .includeSuffix("/64")
+                       .flag("-m32")
+                       .flag("+m64")
+                       .flag("-mx32");
   Multilib Alt32 = Multilib()
-    .gccSuffix("/32")
-    .includeSuffix("/32")
-    .flag("+m32").flag("-m64").flag("-mx32");
+                       .gccSuffix("/32")
+                       .includeSuffix("/32")
+                       .flag("+m32")
+                       .flag("-m64")
+                       .flag("-mx32");
   Multilib Altx32 = Multilib()
-    .gccSuffix("/x32")
-    .includeSuffix("/x32")
-    .flag("-m32").flag("-m64").flag("+mx32");
+                        .gccSuffix("/x32")
+                        .includeSuffix("/x32")
+                        .flag("-m32")
+                        .flag("-m64")
+                        .flag("+mx32");
 
   FilterNonExistent NonExistent(Path);
 
@@ -1916,8 +1899,7 @@ static bool findBiarchMultilibs(const llvm::Triple &TargetTriple,
   if (!Result.Multilibs.select(Flags, Result.SelectedMultilib))
     return false;
 
-  if (Result.SelectedMultilib == Alt64 ||
-      Result.SelectedMultilib == Alt32 ||
+  if (Result.SelectedMultilib == Alt64 || Result.SelectedMultilib == Alt32 ||
       Result.SelectedMultilib == Altx32)
     Result.BiarchSibling = Default;
 
@@ -1933,27 +1915,26 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
   // check for. We also record what is necessary to walk from each back
   // up to the lib directory.
   const std::string LibSuffixes[] = {
-    "/gcc/" + CandidateTriple.str(),
-    // Debian puts cross-compilers in gcc-cross
-    "/gcc-cross/" + CandidateTriple.str(),
-    "/" + CandidateTriple.str() + "/gcc/" + CandidateTriple.str(),
-
-    // The Freescale PPC SDK has the gcc libraries in
-    // <sysroot>/usr/lib/<triple>/x.y.z so have a look there as well.
-    "/" + CandidateTriple.str(),
-
-    // Ubuntu has a strange mis-matched pair of triples that this happens to
-    // match.
-    // FIXME: It may be worthwhile to generalize this and look for a second
-    // triple.
-    "/i386-linux-gnu/gcc/" + CandidateTriple.str()
-  };
+      "/gcc/" + CandidateTriple.str(),
+      // Debian puts cross-compilers in gcc-cross
+      "/gcc-cross/" + CandidateTriple.str(),
+      "/" + CandidateTriple.str() + "/gcc/" + CandidateTriple.str(),
+
+      // The Freescale PPC SDK has the gcc libraries in
+      // <sysroot>/usr/lib/<triple>/x.y.z so have a look there as well.
+      "/" + CandidateTriple.str(),
+
+      // Ubuntu has a strange mis-matched pair of triples that this happens to
+      // match.
+      // FIXME: It may be worthwhile to generalize this and look for a second
+      // triple.
+      "/i386-linux-gnu/gcc/" + CandidateTriple.str()};
   const std::string InstallSuffixes[] = {
-    "/../../..",    // gcc/
-    "/../../..",    // gcc-cross/
-    "/../../../..", // <triple>/gcc/
-    "/../..",       // <triple>/
-    "/../../../.."  // i386-linux-gnu/gcc/<triple>/
+      "/../../..",    // gcc/
+      "/../../..",    // gcc-cross/
+      "/../../../..", // <triple>/gcc/
+      "/../..",       // <triple>/
+      "/../../../.."  // i386-linux-gnu/gcc/<triple>/
   };
   // Only look at the final, weird Ubuntu suffix for i386-linux-gnu.
   const unsigned NumLibSuffixes =
@@ -2000,26 +1981,25 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
   }
 }
 
-Generic_GCC::Generic_GCC(const Driver &D, const llvm::Triple& Triple,
+Generic_GCC::Generic_GCC(const Driver &D, const llvm::Triple &Triple,
                          const ArgList &Args)
-  : ToolChain(D, Triple, Args), GCCInstallation() {
+    : ToolChain(D, Triple, Args), GCCInstallation() {
   getProgramPaths().push_back(getDriver().getInstalledDir());
   if (getDriver().getInstalledDir() != getDriver().Dir)
     getProgramPaths().push_back(getDriver().Dir);
 }
 
-Generic_GCC::~Generic_GCC() {
-}
+Generic_GCC::~Generic_GCC() {}
 
 Tool *Generic_GCC::getTool(Action::ActionClass AC) const {
   switch (AC) {
   case Action::PreprocessJobClass:
     if (!Preprocess)
-      Preprocess.reset(new tools::gcc::Preprocess(*this));
+      Preprocess.reset(new tools::gcc::Preprocessor(*this));
     return Preprocess.get();
   case Action::CompileJobClass:
     if (!Compile)
-      Compile.reset(new tools::gcc::Compile(*this));
+      Compile.reset(new tools::gcc::Compiler(*this));
     return Compile.get();
   default:
     return ToolChain::getTool(AC);
@@ -2027,12 +2007,10 @@ Tool *Generic_GCC::getTool(Action::ActionClass AC) const {
 }
 
 Tool *Generic_GCC::buildAssembler() const {
-  return new tools::gnutools::Assemble(*this);
+  return new tools::gnutools::Assembler(*this);
 }
 
-Tool *Generic_GCC::buildLinker() const {
-  return new tools::gcc::Link(*this);
-}
+Tool *Generic_GCC::buildLinker() const { return new tools::gcc::Linker(*this); }
 
 void Generic_GCC::printVerboseInfo(raw_ostream &OS) const {
   // Print the information about how we detected the GCC installation.
@@ -2044,15 +2022,13 @@ bool Generic_GCC::IsUnwindTablesDefault() const {
 }
 
 bool Generic_GCC::isPICDefault() const {
-  return false;
+  return getArch() == llvm::Triple::x86_64 && getTriple().isOSWindows();
 }
 
-bool Generic_GCC::isPIEDefault() const {
-  return false;
-}
+bool Generic_GCC::isPIEDefault() const { return false; }
 
 bool Generic_GCC::isPICDefaultForced() const {
-  return false;
+  return getArch() == llvm::Triple::x86_64 && getTriple().isOSWindows();
 }
 
 bool Generic_GCC::IsIntegratedAssemblerDefault() const {
@@ -2092,8 +2068,7 @@ void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs,
       getTriple().getOS() == llvm::Triple::NaCl;
 
   if (DriverArgs.hasFlag(options::OPT_fuse_init_array,
-                         options::OPT_fno_use_init_array,
-                         UseInitArrayDefault))
+                         options::OPT_fno_use_init_array, UseInitArrayDefault))
     CC1Args.push_back("-fuse-init-array");
 }
 
@@ -2119,18 +2094,15 @@ std::string Hexagon_TC::GetGnuDir(const std::string &InstalledDir,
   return InstallRelDir;
 }
 
-const char *Hexagon_TC::GetSmallDataThreshold(const ArgList &Args)
-{
+const char *Hexagon_TC::GetSmallDataThreshold(const ArgList &Args) {
   Arg *A;
 
-  A = Args.getLastArg(options::OPT_G,
-                      options::OPT_G_EQ,
+  A = Args.getLastArg(options::OPT_G, options::OPT_G_EQ,
                       options::OPT_msmall_data_threshold_EQ);
   if (A)
     return A->getValue();
 
-  A = Args.getLastArg(options::OPT_shared,
-                      options::OPT_fpic,
+  A = Args.getLastArg(options::OPT_shared, options::OPT_fpic,
                       options::OPT_fPIC);
   if (A)
     return "0";
@@ -2138,26 +2110,22 @@ const char *Hexagon_TC::GetSmallDataThreshold(const ArgList &Args)
   return 0;
 }
 
-bool Hexagon_TC::UsesG0(const char* smallDataThreshold)
-{
+bool Hexagon_TC::UsesG0(const char *smallDataThreshold) {
   return smallDataThreshold && smallDataThreshold[0] == '0';
 }
 
-static void GetHexagonLibraryPaths(
-  const ArgList &Args,
-  const std::string &Ver,
-  const std::string &MarchString,
-  const std::string &InstalledDir,
-  ToolChain::path_list *LibPaths)
-{
+static void GetHexagonLibraryPaths(const ArgList &Args, const std::string &Ver,
+                                   const std::string &MarchString,
+                                   const std::string &InstalledDir,
+                                   ToolChain::path_list *LibPaths) {
   bool buildingLib = Args.hasArg(options::OPT_shared);
 
   //----------------------------------------------------------------------------
   // -L Args
   //----------------------------------------------------------------------------
-  for (const Arg *A : Args.filtered(options::OPT_L))
-    for (unsigned i = 0, e = A->getNumValues(); i != e; ++i)
-      LibPaths->push_back(A->getValue(i));
+  for (Arg *A : Args.filtered(options::OPT_L))
+    for (const char *Value : A->getValues())
+      LibPaths->push_back(Value);
 
   //----------------------------------------------------------------------------
   // Other standard paths
@@ -2191,7 +2159,7 @@ static void GetHexagonLibraryPaths(
 
 Hexagon_TC::Hexagon_TC(const Driver &D, const llvm::Triple &Triple,
                        const ArgList &Args)
-  : Linux(D, Triple, Args) {
+    : Linux(D, Triple, Args) {
   const std::string InstalledDir(getDriver().getInstalledDir());
   const std::string GnuDir = Hexagon_TC::GetGnuDir(InstalledDir, Args);
 
@@ -2204,7 +2172,7 @@ Hexagon_TC::Hexagon_TC(const Driver &D, const llvm::Triple &Triple,
   // Determine version of GCC libraries and headers to use.
   const std::string HexagonDir(GnuDir + "/lib/gcc/hexagon");
   std::error_code ec;
-  GCCVersion MaxVersion= GCCVersion::Parse("0.0.0");
+  GCCVersion MaxVersion = GCCVersion::Parse("0.0.0");
   for (llvm::sys::fs::directory_iterator di(HexagonDir, ec), de;
        !ec && di != de; di = di.increment(ec)) {
     GCCVersion cv = GCCVersion::Parse(llvm::sys::path::filename(di->path()));
@@ -2213,30 +2181,25 @@ Hexagon_TC::Hexagon_TC(const Driver &D, const llvm::Triple &Triple,
   }
   GCCLibAndIncVersion = MaxVersion;
 
-  ToolChain::path_list *LibPaths= &getFilePaths();
+  ToolChain::path_list *LibPaths = &getFilePaths();
 
   // Remove paths added by Linux toolchain. Currently Hexagon_TC really targets
   // 'elf' OS type, so the Linux paths are not appropriate. When we actually
   // support 'linux' we'll need to fix this up
   LibPaths->clear();
 
-  GetHexagonLibraryPaths(
-    Args,
-    GetGCCLibAndIncVersion(),
-    GetTargetCPU(Args),
-    InstalledDir,
-    LibPaths);
+  GetHexagonLibraryPaths(Args, GetGCCLibAndIncVersion(), GetTargetCPU(Args),
+                         InstalledDir, LibPaths);
 }
 
-Hexagon_TC::~Hexagon_TC() {
-}
+Hexagon_TC::~Hexagon_TC() {}
 
 Tool *Hexagon_TC::buildAssembler() const {
-  return new tools::hexagon::Assemble(*this);
+  return new tools::hexagon::Assembler(*this);
 }
 
 Tool *Hexagon_TC::buildLinker() const {
-  return new tools::hexagon::Link(*this);
+  return new tools::hexagon::Linker(*this);
 }
 
 void Hexagon_TC::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
@@ -2280,8 +2243,7 @@ Hexagon_TC::GetCXXStdlibType(const ArgList &Args) const {
 
   StringRef Value = A->getValue();
   if (Value != "libstdc++") {
-    getDriver().Diag(diag::err_drv_invalid_stdlib_name)
-      << A->getAsString(Args);
+    getDriver().Diag(diag::err_drv_invalid_stdlib_name) << A->getAsString(Args);
   }
 
   return ToolChain::CST_Libstdcxx;
@@ -2310,8 +2272,7 @@ static int getHexagonVersion(const ArgList &Args) {
   return 4;
 }
 
-StringRef Hexagon_TC::GetTargetCPU(const ArgList &Args)
-{
+StringRef Hexagon_TC::GetTargetCPU(const ArgList &Args) {
   int V = getHexagonVersion(Args);
   // FIXME: We don't support versions < 4. We should error on them.
   switch (V) {
@@ -2334,13 +2295,13 @@ StringRef Hexagon_TC::GetTargetCPU(const ArgList &Args)
 /// NaCl Toolchain
 NaCl_TC::NaCl_TC(const Driver &D, const llvm::Triple &Triple,
                  const ArgList &Args)
-  : Generic_ELF(D, Triple, Args) {
+    : Generic_ELF(D, Triple, Args) {
 
   // Remove paths added by Generic_GCC. NaCl Toolchain cannot use the
   // default paths, and must instead only use the paths provided
   // with this toolchain based on architecture.
-  path_list& file_paths = getFilePaths();
-  path_list& prog_paths = getProgramPaths();
+  path_list &file_paths = getFilePaths();
+  path_list &prog_paths = getProgramPaths();
 
   file_paths.clear();
   prog_paths.clear();
@@ -2354,30 +2315,30 @@ NaCl_TC::NaCl_TC(const Driver &D, const llvm::Triple &Triple,
   // Path for toolchain libraries (libgcc.a, ...)
   std::string ToolPath(getDriver().ResourceDir + "/lib/");
 
-  switch(Triple.getArch()) {
-    case llvm::Triple::x86: {
-      file_paths.push_back(FilePath + "x86_64-nacl/lib32");
-      file_paths.push_back(FilePath + "x86_64-nacl/usr/lib32");
-      prog_paths.push_back(ProgPath + "x86_64-nacl/bin");
-      file_paths.push_back(ToolPath + "i686-nacl");
-      break;
-    }
-    case llvm::Triple::x86_64: {
-      file_paths.push_back(FilePath + "x86_64-nacl/lib");
-      file_paths.push_back(FilePath + "x86_64-nacl/usr/lib");
-      prog_paths.push_back(ProgPath + "x86_64-nacl/bin");
-      file_paths.push_back(ToolPath + "x86_64-nacl");
-      break;
-    }
-    case llvm::Triple::arm: {
-      file_paths.push_back(FilePath + "arm-nacl/lib");
-      file_paths.push_back(FilePath + "arm-nacl/usr/lib");
-      prog_paths.push_back(ProgPath + "arm-nacl/bin");
-      file_paths.push_back(ToolPath + "arm-nacl");
-      break;
-    }
-    default:
-      break;
+  switch (Triple.getArch()) {
+  case llvm::Triple::x86: {
+    file_paths.push_back(FilePath + "x86_64-nacl/lib32");
+    file_paths.push_back(FilePath + "x86_64-nacl/usr/lib32");
+    prog_paths.push_back(ProgPath + "x86_64-nacl/bin");
+    file_paths.push_back(ToolPath + "i686-nacl");
+    break;
+  }
+  case llvm::Triple::x86_64: {
+    file_paths.push_back(FilePath + "x86_64-nacl/lib");
+    file_paths.push_back(FilePath + "x86_64-nacl/usr/lib");
+    prog_paths.push_back(ProgPath + "x86_64-nacl/bin");
+    file_paths.push_back(ToolPath + "x86_64-nacl");
+    break;
+  }
+  case llvm::Triple::arm: {
+    file_paths.push_back(FilePath + "arm-nacl/lib");
+    file_paths.push_back(FilePath + "arm-nacl/usr/lib");
+    prog_paths.push_back(ProgPath + "arm-nacl/bin");
+    file_paths.push_back(ToolPath + "arm-nacl");
+    break;
+  }
+  default:
+    break;
   }
 
   // Use provided linker, not system linker
@@ -2401,13 +2362,17 @@ void NaCl_TC::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
     return;
 
   SmallString<128> P(D.Dir + "/../");
-  if (getTriple().getArch() == llvm::Triple::arm) {
+  switch (getTriple().getArch()) {
+  case llvm::Triple::arm:
     llvm::sys::path::append(P, "arm-nacl/usr/include");
-  } else if (getTriple().getArch() == llvm::Triple::x86) {
+    break;
+  case llvm::Triple::x86:
     llvm::sys::path::append(P, "x86_64-nacl/usr/include");
-  } else if (getTriple().getArch() == llvm::Triple::x86_64) {
+    break;
+  case llvm::Triple::x86_64:
     llvm::sys::path::append(P, "x86_64-nacl/usr/include");
-  } else {
+    break;
+  default:
     return;
   }
 
@@ -2437,18 +2402,22 @@ void NaCl_TC::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
   // if the value is libc++, and emits an error for other values.
   GetCXXStdlibType(DriverArgs);
 
-  if (getTriple().getArch() == llvm::Triple::arm) {
-    SmallString<128> P(D.Dir + "/../");
+  SmallString<128> P(D.Dir + "/../");
+  switch (getTriple().getArch()) {
+  case llvm::Triple::arm:
     llvm::sys::path::append(P, "arm-nacl/include/c++/v1");
     addSystemInclude(DriverArgs, CC1Args, P.str());
-  } else if (getTriple().getArch() == llvm::Triple::x86) {
-    SmallString<128> P(D.Dir + "/../");
+    break;
+  case llvm::Triple::x86:
     llvm::sys::path::append(P, "x86_64-nacl/include/c++/v1");
     addSystemInclude(DriverArgs, CC1Args, P.str());
-  } else if (getTriple().getArch() == llvm::Triple::x86_64) {
-    SmallString<128> P(D.Dir + "/../");
+    break;
+  case llvm::Triple::x86_64:
     llvm::sys::path::append(P, "x86_64-nacl/include/c++/v1");
     addSystemInclude(DriverArgs, CC1Args, P.str());
+    break;
+  default:
+    break;
   }
 }
 
@@ -2457,15 +2426,14 @@ ToolChain::CXXStdlibType NaCl_TC::GetCXXStdlibType(const ArgList &Args) const {
     StringRef Value = A->getValue();
     if (Value == "libc++")
       return ToolChain::CST_Libcxx;
-    getDriver().Diag(diag::err_drv_invalid_stdlib_name)
-      << A->getAsString(Args);
+    getDriver().Diag(diag::err_drv_invalid_stdlib_name) << A->getAsString(Args);
   }
 
   return ToolChain::CST_Libcxx;
 }
 
-std::string NaCl_TC::ComputeEffectiveClangTriple(
-    const ArgList &Args, types::ID InputType) const {
+std::string NaCl_TC::ComputeEffectiveClangTriple(const ArgList &Args,
+                                                 types::ID InputType) const {
   llvm::Triple TheTriple(ComputeLLVMTriple(Args, InputType));
   if (TheTriple.getArch() == llvm::Triple::arm &&
       TheTriple.getEnvironment() == llvm::Triple::UnknownEnvironment)
@@ -2474,13 +2442,13 @@ std::string NaCl_TC::ComputeEffectiveClangTriple(
 }
 
 Tool *NaCl_TC::buildLinker() const {
-  return new tools::nacltools::Link(*this);
+  return new tools::nacltools::Linker(*this);
 }
 
 Tool *NaCl_TC::buildAssembler() const {
   if (getTriple().getArch() == llvm::Triple::arm)
-    return new tools::nacltools::AssembleARM(*this);
-  return new tools::gnutools::Assemble(*this);
+    return new tools::nacltools::AssemblerARM(*this);
+  return new tools::gnutools::Assembler(*this);
 }
 // End NaCl
 
@@ -2488,9 +2456,9 @@ Tool *NaCl_TC::buildAssembler() const {
 /// all subcommands. See http://tce.cs.tut.fi for our peculiar target.
 /// Currently does not support anything else but compilation.
 
-TCEToolChain::TCEToolChain(const Driver &D, const llvm::Triple& Triple,
+TCEToolChain::TCEToolChain(const Driver &D, const llvm::Triple &Triple,
                            const ArgList &Args)
-  : ToolChain(D, Triple, Args) {
+    : ToolChain(D, Triple, Args) {
   // Path mangling to find libexec
   std::string Path(getDriver().Dir);
 
@@ -2498,24 +2466,15 @@ TCEToolChain::TCEToolChain(const Driver &D, const llvm::Triple& Triple,
   getProgramPaths().push_back(Path);
 }
 
-TCEToolChain::~TCEToolChain() {
-}
+TCEToolChain::~TCEToolChain() {}
 
-bool TCEToolChain::IsMathErrnoDefault() const {
-  return true;
-}
+bool TCEToolChain::IsMathErrnoDefault() const { return true; }
 
-bool TCEToolChain::isPICDefault() const {
-  return false;
-}
+bool TCEToolChain::isPICDefault() const { return false; }
 
-bool TCEToolChain::isPIEDefault() const {
-  return false;
-}
+bool TCEToolChain::isPIEDefault() const { return false; }
 
-bool TCEToolChain::isPICDefaultForced() const {
-  return false;
-}
+bool TCEToolChain::isPICDefaultForced() const { return false; }
 
 // CloudABI - CloudABI tool chain which can call ld(1) directly.
 
@@ -2545,42 +2504,40 @@ void CloudABI::AddCXXStdlibLibArgs(const ArgList &Args,
   CmdArgs.push_back("-lunwind");
 }
 
-Tool *CloudABI::buildLinker() const { return new tools::cloudabi::Link(*this); }
+Tool *CloudABI::buildLinker() const {
+  return new tools::cloudabi::Linker(*this);
+}
 
 /// OpenBSD - OpenBSD tool chain which can call as(1) and ld(1) directly.
 
-OpenBSD::OpenBSD(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
-  : Generic_ELF(D, Triple, Args) {
+OpenBSD::OpenBSD(const Driver &D, const llvm::Triple &Triple,
+                 const ArgList &Args)
+    : Generic_ELF(D, Triple, Args) {
   getFilePaths().push_back(getDriver().Dir + "/../lib");
   getFilePaths().push_back("/usr/lib");
 }
 
 Tool *OpenBSD::buildAssembler() const {
-  return new tools::openbsd::Assemble(*this);
+  return new tools::openbsd::Assembler(*this);
 }
 
-Tool *OpenBSD::buildLinker() const {
-  return new tools::openbsd::Link(*this);
-}
+Tool *OpenBSD::buildLinker() const { return new tools::openbsd::Linker(*this); }
 
 /// Bitrig - Bitrig tool chain which can call as(1) and ld(1) directly.
 
-Bitrig::Bitrig(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
-  : Generic_ELF(D, Triple, Args) {
+Bitrig::Bitrig(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
+    : Generic_ELF(D, Triple, Args) {
   getFilePaths().push_back(getDriver().Dir + "/../lib");
   getFilePaths().push_back("/usr/lib");
 }
 
 Tool *Bitrig::buildAssembler() const {
-  return new tools::bitrig::Assemble(*this);
+  return new tools::bitrig::Assembler(*this);
 }
 
-Tool *Bitrig::buildLinker() const {
-  return new tools::bitrig::Link(*this);
-}
+Tool *Bitrig::buildLinker() const { return new tools::bitrig::Linker(*this); }
 
-ToolChain::CXXStdlibType
-Bitrig::GetCXXStdlibType(const ArgList &Args) const {
+ToolChain::CXXStdlibType Bitrig::GetCXXStdlibType(const ArgList &Args) const {
   if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) {
     StringRef Value = A->getValue();
     if (Value == "libstdc++")
@@ -2588,8 +2545,7 @@ Bitrig::GetCXXStdlibType(const ArgList &Args) const {
     if (Value == "libc++")
       return ToolChain::CST_Libcxx;
 
-    getDriver().Diag(diag::err_drv_invalid_stdlib_name)
-      << A->getAsString(Args);
+    getDriver().Diag(diag::err_drv_invalid_stdlib_name) << A->getAsString(Args);
   }
   return ToolChain::CST_Libcxx;
 }
@@ -2615,11 +2571,11 @@ void Bitrig::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
     if (Triple.startswith("amd64"))
       addSystemInclude(DriverArgs, CC1Args,
                        getDriver().SysRoot + "/usr/include/c++/stdc++/x86_64" +
-                       Triple.substr(5));
+                           Triple.substr(5));
     else
-      addSystemInclude(DriverArgs, CC1Args,
-                       getDriver().SysRoot + "/usr/include/c++/stdc++/" +
-                       Triple);
+      addSystemInclude(DriverArgs, CC1Args, getDriver().SysRoot +
+                                                "/usr/include/c++/stdc++/" +
+                                                Triple);
     break;
   }
 }
@@ -2640,8 +2596,9 @@ void Bitrig::AddCXXStdlibLibArgs(const ArgList &Args,
 
 /// FreeBSD - FreeBSD tool chain which can call as(1) and ld(1) directly.
 
-FreeBSD::FreeBSD(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
-  : Generic_ELF(D, Triple, Args) {
+FreeBSD::FreeBSD(const Driver &D, const llvm::Triple &Triple,
+                 const ArgList &Args)
+    : Generic_ELF(D, Triple, Args) {
 
   // When targeting 32-bit platforms, look for '/usr/lib32/crt1.o' and fall
   // back to '/usr/lib' if it doesn't exist.
@@ -2653,8 +2610,7 @@ FreeBSD::FreeBSD(const Driver &D, const llvm::Triple& Triple, const ArgList &Arg
     getFilePaths().push_back(getDriver().SysRoot + "/usr/lib");
 }
 
-ToolChain::CXXStdlibType
-FreeBSD::GetCXXStdlibType(const ArgList &Args) const {
+ToolChain::CXXStdlibType FreeBSD::GetCXXStdlibType(const ArgList &Args) const {
   if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) {
     StringRef Value = A->getValue();
     if (Value == "libstdc++")
@@ -2662,10 +2618,9 @@ FreeBSD::GetCXXStdlibType(const ArgList &Args) const {
     if (Value == "libc++")
       return ToolChain::CST_Libcxx;
 
-    getDriver().Diag(diag::err_drv_invalid_stdlib_name)
-      << A->getAsString(Args);
+    getDriver().Diag(diag::err_drv_invalid_stdlib_name) << A->getAsString(Args);
   }
-  if (getTriple().getOSMajorVersion() >= 10) 
+  if (getTriple().getOSMajorVersion() >= 10)
     return ToolChain::CST_Libcxx;
   return ToolChain::CST_Libstdcxx;
 }
@@ -2691,12 +2646,10 @@ void FreeBSD::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
 }
 
 Tool *FreeBSD::buildAssembler() const {
-  return new tools::freebsd::Assemble(*this);
+  return new tools::freebsd::Assembler(*this);
 }
 
-Tool *FreeBSD::buildLinker() const {
-  return new tools::freebsd::Link(*this);
-}
+Tool *FreeBSD::buildLinker() const { return new tools::freebsd::Linker(*this); }
 
 bool FreeBSD::UseSjLjExceptions() const {
   // FreeBSD uses SjLj exceptions on ARM oabi.
@@ -2712,13 +2665,9 @@ bool FreeBSD::UseSjLjExceptions() const {
   }
 }
 
-bool FreeBSD::HasNativeLLVMSupport() const {
-  return true;
-}
+bool FreeBSD::HasNativeLLVMSupport() const { return true; }
 
-bool FreeBSD::isPIEDefault() const {
-  return getSanitizerArgs().requiresPIE();
-}
+bool FreeBSD::isPIEDefault() const { return getSanitizerArgs().requiresPIE(); }
 
 SanitizerMask FreeBSD::getSupportedSanitizers() const {
   const bool IsX86 = getTriple().getArch() == llvm::Triple::x86;
@@ -2740,8 +2689,8 @@ SanitizerMask FreeBSD::getSupportedSanitizers() const {
 
 /// NetBSD - NetBSD tool chain which can call as(1) and ld(1) directly.
 
-NetBSD::NetBSD(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
-  : Generic_ELF(D, Triple, Args) {
+NetBSD::NetBSD(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
+    : Generic_ELF(D, Triple, Args) {
 
   if (getDriver().UseStdLib) {
     // When targeting a 32-bit platform, try the special directory used on
@@ -2793,15 +2742,12 @@ NetBSD::NetBSD(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
 }
 
 Tool *NetBSD::buildAssembler() const {
-  return new tools::netbsd::Assemble(*this);
+  return new tools::netbsd::Assembler(*this);
 }
 
-Tool *NetBSD::buildLinker() const {
-  return new tools::netbsd::Link(*this);
-}
+Tool *NetBSD::buildLinker() const { return new tools::netbsd::Linker(*this); }
 
-ToolChain::CXXStdlibType
-NetBSD::GetCXXStdlibType(const ArgList &Args) const {
+ToolChain::CXXStdlibType NetBSD::GetCXXStdlibType(const ArgList &Args) const {
   if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) {
     StringRef Value = A->getValue();
     if (Value == "libstdc++")
@@ -2809,8 +2755,7 @@ NetBSD::GetCXXStdlibType(const ArgList &Args) const {
     if (Value == "libc++")
       return ToolChain::CST_Libcxx;
 
-    getDriver().Diag(diag::err_drv_invalid_stdlib_name)
-      << A->getAsString(Args);
+    getDriver().Diag(diag::err_drv_invalid_stdlib_name) << A->getAsString(Args);
   }
 
   unsigned Major, Minor, Micro;
@@ -2857,25 +2802,23 @@ void NetBSD::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
 
 /// Minix - Minix tool chain which can call as(1) and ld(1) directly.
 
-Minix::Minix(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
-  : Generic_ELF(D, Triple, Args) {
+Minix::Minix(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
+    : Generic_ELF(D, Triple, Args) {
   getFilePaths().push_back(getDriver().Dir + "/../lib");
   getFilePaths().push_back("/usr/lib");
 }
 
 Tool *Minix::buildAssembler() const {
-  return new tools::minix::Assemble(*this);
+  return new tools::minix::Assembler(*this);
 }
 
-Tool *Minix::buildLinker() const {
-  return new tools::minix::Link(*this);
-}
+Tool *Minix::buildLinker() const { return new tools::minix::Linker(*this); }
 
 /// Solaris - Solaris tool chain which can call as(1) and ld(1) directly.
 
-Solaris::Solaris(const Driver &D, const llvm::Triple& Triple,
+Solaris::Solaris(const Driver &D, const llvm::Triple &Triple,
                  const ArgList &Args)
-  : Generic_GCC(D, Triple, Args) {
+    : Generic_GCC(D, Triple, Args) {
 
   getProgramPaths().push_back(getDriver().getInstalledDir());
   if (getDriver().getInstalledDir() != getDriver().Dir)
@@ -2886,16 +2829,17 @@ Solaris::Solaris(const Driver &D, const llvm::Triple& Triple,
 }
 
 Tool *Solaris::buildAssembler() const {
-  return new tools::solaris::Assemble(*this);
+  return new tools::solaris::Assembler(*this);
 }
 
-Tool *Solaris::buildLinker() const {
-  return new tools::solaris::Link(*this);
-}
+Tool *Solaris::buildLinker() const { return new tools::solaris::Linker(*this); }
 
 /// Distribution (very bare-bones at the moment).
 
 enum Distro {
+  // NB: Releases of a particular Linux distro should be kept together
+  // in this enum, because some tests are done by integer comparison against
+  // the first and last known member in the family, e.g. IsRedHat().
   ArchLinux,
   DebianLenny,
   DebianSqueeze,
@@ -2931,9 +2875,7 @@ static bool IsRedhat(enum Distro Distro) {
   return Distro == Fedora || (Distro >= RHEL4 && Distro <= RHEL7);
 }
 
-static bool IsOpenSUSE(enum Distro Distro) {
-  return Distro == OpenSUSE;
-}
+static bool IsOpenSUSE(enum Distro Distro) { return Distro == OpenSUSE; }
 
 static bool IsDebian(enum Distro Distro) {
   return Distro >= DebianLenny && Distro <= DebianStretch;
@@ -2951,25 +2893,25 @@ static Distro DetectDistro(llvm::Triple::ArchType Arch) {
     SmallVector<StringRef, 16> Lines;
     Data.split(Lines, "\n");
     Distro Version = UnknownDistro;
-    for (unsigned i = 0, s = Lines.size(); i != s; ++i)
-      if (Version == UnknownDistro && Lines[i].startswith("DISTRIB_CODENAME="))
-        Version = llvm::StringSwitch<Distro>(Lines[i].substr(17))
-          .Case("hardy", UbuntuHardy)
-          .Case("intrepid", UbuntuIntrepid)
-          .Case("jaunty", UbuntuJaunty)
-          .Case("karmic", UbuntuKarmic)
-          .Case("lucid", UbuntuLucid)
-          .Case("maverick", UbuntuMaverick)
-          .Case("natty", UbuntuNatty)
-          .Case("oneiric", UbuntuOneiric)
-          .Case("precise", UbuntuPrecise)
-          .Case("quantal", UbuntuQuantal)
-          .Case("raring", UbuntuRaring)
-          .Case("saucy", UbuntuSaucy)
-          .Case("trusty", UbuntuTrusty)
-          .Case("utopic", UbuntuUtopic)
-          .Case("vivid", UbuntuVivid)
-          .Default(UnknownDistro);
+    for (const StringRef Line : Lines)
+      if (Version == UnknownDistro && Line.startswith("DISTRIB_CODENAME="))
+        Version = llvm::StringSwitch<Distro>(Line.substr(17))
+                      .Case("hardy", UbuntuHardy)
+                      .Case("intrepid", UbuntuIntrepid)
+                      .Case("jaunty", UbuntuJaunty)
+                      .Case("karmic", UbuntuKarmic)
+                      .Case("lucid", UbuntuLucid)
+                      .Case("maverick", UbuntuMaverick)
+                      .Case("natty", UbuntuNatty)
+                      .Case("oneiric", UbuntuOneiric)
+                      .Case("precise", UbuntuPrecise)
+                      .Case("quantal", UbuntuQuantal)
+                      .Case("raring", UbuntuRaring)
+                      .Case("saucy", UbuntuSaucy)
+                      .Case("trusty", UbuntuTrusty)
+                      .Case("utopic", UbuntuUtopic)
+                      .Case("vivid", UbuntuVivid)
+                      .Default(UnknownDistro);
     return Version;
   }
 
@@ -2999,9 +2941,9 @@ static Distro DetectDistro(llvm::Triple::ArchType Arch) {
       return DebianLenny;
     else if (Data.startswith("squeeze/sid") || Data[0] == '6')
       return DebianSqueeze;
-    else if (Data.startswith("wheezy/sid")  || Data[0] == '7')
+    else if (Data.startswith("wheezy/sid") || Data[0] == '7')
       return DebianWheezy;
-    else if (Data.startswith("jessie/sid")  || Data[0] == '8')
+    else if (Data.startswith("jessie/sid") || Data[0] == '8')
       return DebianJessie;
     else if (Data.startswith("stretch/sid") || Data[0] == '9')
       return DebianStretch;
@@ -3028,100 +2970,105 @@ static Distro DetectDistro(llvm::Triple::ArchType Arch) {
 /// so we provide a rough mapping here.
 static std::string getMultiarchTriple(const llvm::Triple &TargetTriple,
                                       StringRef SysRoot) {
+  llvm::Triple::EnvironmentType TargetEnvironment = TargetTriple.getEnvironment();
+
   // For most architectures, just use whatever we have rather than trying to be
   // clever.
   switch (TargetTriple.getArch()) {
   default:
-    return TargetTriple.str();
+    break;
 
-    // We use the existence of '/lib/<triple>' as a directory to detect some
-    // common linux triples that don't quite match the Clang triple for both
-    // 32-bit and 64-bit targets. Multiarch fixes its install triples to these
-    // regardless of what the actual target triple is.
+  // We use the existence of '/lib/<triple>' as a directory to detect some
+  // common linux triples that don't quite match the Clang triple for both
+  // 32-bit and 64-bit targets. Multiarch fixes its install triples to these
+  // regardless of what the actual target triple is.
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
-    if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF) {
+    if (TargetEnvironment == llvm::Triple::GNUEABIHF) {
       if (llvm::sys::fs::exists(SysRoot + "/lib/arm-linux-gnueabihf"))
         return "arm-linux-gnueabihf";
     } else {
       if (llvm::sys::fs::exists(SysRoot + "/lib/arm-linux-gnueabi"))
         return "arm-linux-gnueabi";
     }
-    return TargetTriple.str();
+    break;
   case llvm::Triple::armeb:
   case llvm::Triple::thumbeb:
-    if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF) {
+    if (TargetEnvironment == llvm::Triple::GNUEABIHF) {
       if (llvm::sys::fs::exists(SysRoot + "/lib/armeb-linux-gnueabihf"))
         return "armeb-linux-gnueabihf";
     } else {
       if (llvm::sys::fs::exists(SysRoot + "/lib/armeb-linux-gnueabi"))
         return "armeb-linux-gnueabi";
     }
-    return TargetTriple.str();
+    break;
   case llvm::Triple::x86:
     if (llvm::sys::fs::exists(SysRoot + "/lib/i386-linux-gnu"))
       return "i386-linux-gnu";
-    return TargetTriple.str();
+    break;
   case llvm::Triple::x86_64:
     // We don't want this for x32, otherwise it will match x86_64 libs
-    if (TargetTriple.getEnvironment() != llvm::Triple::GNUX32 &&
+    if (TargetEnvironment != llvm::Triple::GNUX32 &&
         llvm::sys::fs::exists(SysRoot + "/lib/x86_64-linux-gnu"))
       return "x86_64-linux-gnu";
-    return TargetTriple.str();
+    break;
   case llvm::Triple::aarch64:
     if (llvm::sys::fs::exists(SysRoot + "/lib/aarch64-linux-gnu"))
       return "aarch64-linux-gnu";
-    return TargetTriple.str();
+    break;
   case llvm::Triple::aarch64_be:
     if (llvm::sys::fs::exists(SysRoot + "/lib/aarch64_be-linux-gnu"))
       return "aarch64_be-linux-gnu";
-    return TargetTriple.str();
+    break;
   case llvm::Triple::mips:
     if (llvm::sys::fs::exists(SysRoot + "/lib/mips-linux-gnu"))
       return "mips-linux-gnu";
-    return TargetTriple.str();
+    break;
   case llvm::Triple::mipsel:
     if (llvm::sys::fs::exists(SysRoot + "/lib/mipsel-linux-gnu"))
       return "mipsel-linux-gnu";
-    return TargetTriple.str();
+    break;
   case llvm::Triple::mips64:
     if (llvm::sys::fs::exists(SysRoot + "/lib/mips64-linux-gnu"))
       return "mips64-linux-gnu";
     if (llvm::sys::fs::exists(SysRoot + "/lib/mips64-linux-gnuabi64"))
       return "mips64-linux-gnuabi64";
-    return TargetTriple.str();
+    break;
   case llvm::Triple::mips64el:
     if (llvm::sys::fs::exists(SysRoot + "/lib/mips64el-linux-gnu"))
       return "mips64el-linux-gnu";
     if (llvm::sys::fs::exists(SysRoot + "/lib/mips64el-linux-gnuabi64"))
       return "mips64el-linux-gnuabi64";
-    return TargetTriple.str();
+    break;
   case llvm::Triple::ppc:
     if (llvm::sys::fs::exists(SysRoot + "/lib/powerpc-linux-gnuspe"))
       return "powerpc-linux-gnuspe";
     if (llvm::sys::fs::exists(SysRoot + "/lib/powerpc-linux-gnu"))
       return "powerpc-linux-gnu";
-    return TargetTriple.str();
+    break;
   case llvm::Triple::ppc64:
     if (llvm::sys::fs::exists(SysRoot + "/lib/powerpc64-linux-gnu"))
       return "powerpc64-linux-gnu";
+    break;
   case llvm::Triple::ppc64le:
     if (llvm::sys::fs::exists(SysRoot + "/lib/powerpc64le-linux-gnu"))
       return "powerpc64le-linux-gnu";
-    return TargetTriple.str();
+    break;
   case llvm::Triple::sparc:
     if (llvm::sys::fs::exists(SysRoot + "/lib/sparc-linux-gnu"))
       return "sparc-linux-gnu";
-    return TargetTriple.str();
+    break;
   case llvm::Triple::sparcv9:
     if (llvm::sys::fs::exists(SysRoot + "/lib/sparc64-linux-gnu"))
       return "sparc64-linux-gnu";
-    return TargetTriple.str();
+    break;
   }
+  return TargetTriple.str();
 }
 
 static void addPathIfExists(Twine Path, ToolChain::path_list &Paths) {
-  if (llvm::sys::fs::exists(Path)) Paths.push_back(Path.str());
+  if (llvm::sys::fs::exists(Path))
+    Paths.push_back(Path.str());
 }
 
 static StringRef getOSLibDir(const llvm::Triple &Triple, const ArgList &Args) {
@@ -3155,7 +3102,7 @@ static StringRef getOSLibDir(const llvm::Triple &Triple, const ArgList &Args) {
 }
 
 Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
-  : Generic_ELF(D, Triple, Args) {
+    : Generic_ELF(D, Triple, Args) {
   GCCInstallation.init(D, Triple, Args);
   Multilibs = GCCInstallation.getMultilibs();
   llvm::Triple::ArchType Arch = Triple.getArch();
@@ -3171,7 +3118,8 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
   // FIXME: This seems unlikely to be Linux-specific.
   ToolChain::path_list &PPaths = getProgramPaths();
   PPaths.push_back(Twine(GCCInstallation.getParentLibPath() + "/../" +
-                         GCCInstallation.getTriple().str() + "/bin").str());
+                         GCCInstallation.getTriple().str() + "/bin")
+                       .str());
 
   Linker = GetLinkerPath();
 
@@ -3235,8 +3183,7 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
 
     // Sourcery CodeBench MIPS toolchain holds some libraries under
     // a biarch-like suffix of the GCC installation.
-    addPathIfExists((GCCInstallation.getInstallPath() +
-                     Multilib.gccSuffix()),
+    addPathIfExists((GCCInstallation.getInstallPath() + Multilib.gccSuffix()),
                     Paths);
 
     // GCC cross compiling toolchains will install target libraries which ship
@@ -3258,7 +3205,7 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     // Note that this matches the GCC behavior. See the below comment for where
     // Clang diverges from GCC's behavior.
     addPathIfExists(LibPath + "/../" + GCCTriple.str() + "/lib/../" + OSLibDir +
-                    Multilib.osSuffix(),
+                        Multilib.osSuffix(),
                     Paths);
 
     // If the GCC installation we found is inside of the sysroot, we want to
@@ -3295,13 +3242,14 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
   // installations with strange symlinks.
   if (GCCInstallation.isValid()) {
     addPathIfExists(SysRoot + "/usr/lib/" + GCCInstallation.getTriple().str() +
-                    "/../../" + OSLibDir, Paths);
+                        "/../../" + OSLibDir,
+                    Paths);
 
     // Add the 'other' biarch variant path
     Multilib BiarchSibling;
     if (GCCInstallation.getBiarchSibling(BiarchSibling)) {
-      addPathIfExists(GCCInstallation.getInstallPath() +
-                      BiarchSibling.gccSuffix(), Paths);
+      addPathIfExists(
+          GCCInstallation.getInstallPath() + BiarchSibling.gccSuffix(), Paths);
     }
 
     // See comments above on the multilib variant for details of why this is
@@ -3309,8 +3257,9 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     const std::string &LibPath = GCCInstallation.getParentLibPath();
     const llvm::Triple &GCCTriple = GCCInstallation.getTriple();
     const Multilib &Multilib = GCCInstallation.getMultilib();
-    addPathIfExists(LibPath + "/../" + GCCTriple.str() +
-                    "/lib" + Multilib.osSuffix(), Paths);
+    addPathIfExists(LibPath + "/../" + GCCTriple.str() + "/lib" +
+                        Multilib.osSuffix(),
+                    Paths);
 
     // See comments above on the multilib variant for details of why this is
     // only included from within the sysroot.
@@ -3330,16 +3279,12 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
   addPathIfExists(SysRoot + "/usr/lib", Paths);
 }
 
-bool Linux::HasNativeLLVMSupport() const {
-  return true;
-}
+bool Linux::HasNativeLLVMSupport() const { return true; }
 
-Tool *Linux::buildLinker() const {
-  return new tools::gnutools::Link(*this);
-}
+Tool *Linux::buildLinker() const { return new tools::gnutools::Linker(*this); }
 
 Tool *Linux::buildAssembler() const {
-  return new tools::gnutools::Assemble(*this);
+  return new tools::gnutools::Assembler(*this);
 }
 
 std::string Linux::computeSysRoot() const {
@@ -3357,8 +3302,9 @@ std::string Linux::computeSysRoot() const {
   const StringRef TripleStr = GCCInstallation.getTriple().str();
   const Multilib &Multilib = GCCInstallation.getMultilib();
 
-  std::string Path = (InstallDir + "/../../../../" + TripleStr + "/libc" +
-                      Multilib.osSuffix()).str();
+  std::string Path =
+      (InstallDir + "/../../../../" + TripleStr + "/libc" + Multilib.osSuffix())
+          .str();
 
   if (llvm::sys::fs::exists(Path))
     return Path;
@@ -3421,91 +3367,91 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
 
   // Implement generic Debian multiarch support.
   const StringRef X86_64MultiarchIncludeDirs[] = {
-    "/usr/include/x86_64-linux-gnu",
+      "/usr/include/x86_64-linux-gnu",
 
-    // FIXME: These are older forms of multiarch. It's not clear that they're
-    // in use in any released version of Debian, so we should consider
-    // removing them.
-    "/usr/include/i686-linux-gnu/64", "/usr/include/i486-linux-gnu/64"
-  };
+      // FIXME: These are older forms of multiarch. It's not clear that they're
+      // in use in any released version of Debian, so we should consider
+      // removing them.
+      "/usr/include/i686-linux-gnu/64", "/usr/include/i486-linux-gnu/64"};
   const StringRef X86MultiarchIncludeDirs[] = {
-    "/usr/include/i386-linux-gnu",
+      "/usr/include/i386-linux-gnu",
 
-    // FIXME: These are older forms of multiarch. It's not clear that they're
-    // in use in any released version of Debian, so we should consider
-    // removing them.
-    "/usr/include/x86_64-linux-gnu/32", "/usr/include/i686-linux-gnu",
-    "/usr/include/i486-linux-gnu"
-  };
+      // FIXME: These are older forms of multiarch. It's not clear that they're
+      // in use in any released version of Debian, so we should consider
+      // removing them.
+      "/usr/include/x86_64-linux-gnu/32", "/usr/include/i686-linux-gnu",
+      "/usr/include/i486-linux-gnu"};
   const StringRef AArch64MultiarchIncludeDirs[] = {
-    "/usr/include/aarch64-linux-gnu"
-  };
+      "/usr/include/aarch64-linux-gnu"};
   const StringRef ARMMultiarchIncludeDirs[] = {
-    "/usr/include/arm-linux-gnueabi"
-  };
+      "/usr/include/arm-linux-gnueabi"};
   const StringRef ARMHFMultiarchIncludeDirs[] = {
-    "/usr/include/arm-linux-gnueabihf"
-  };
-  const StringRef MIPSMultiarchIncludeDirs[] = {
-    "/usr/include/mips-linux-gnu"
-  };
+      "/usr/include/arm-linux-gnueabihf"};
+  const StringRef MIPSMultiarchIncludeDirs[] = {"/usr/include/mips-linux-gnu"};
   const StringRef MIPSELMultiarchIncludeDirs[] = {
-    "/usr/include/mipsel-linux-gnu"
-  };
+      "/usr/include/mipsel-linux-gnu"};
   const StringRef MIPS64MultiarchIncludeDirs[] = {
-    "/usr/include/mips64-linux-gnu",
-    "/usr/include/mips64-linux-gnuabi64"
-  };
+      "/usr/include/mips64-linux-gnu", "/usr/include/mips64-linux-gnuabi64"};
   const StringRef MIPS64ELMultiarchIncludeDirs[] = {
-    "/usr/include/mips64el-linux-gnu",
-    "/usr/include/mips64el-linux-gnuabi64"
-  };
+      "/usr/include/mips64el-linux-gnu",
+      "/usr/include/mips64el-linux-gnuabi64"};
   const StringRef PPCMultiarchIncludeDirs[] = {
-    "/usr/include/powerpc-linux-gnu"
-  };
+      "/usr/include/powerpc-linux-gnu"};
   const StringRef PPC64MultiarchIncludeDirs[] = {
-    "/usr/include/powerpc64-linux-gnu"
-  };
+      "/usr/include/powerpc64-linux-gnu"};
   const StringRef PPC64LEMultiarchIncludeDirs[] = {
-    "/usr/include/powerpc64le-linux-gnu"
-  };
+      "/usr/include/powerpc64le-linux-gnu"};
   const StringRef SparcMultiarchIncludeDirs[] = {
-    "/usr/include/sparc-linux-gnu"
-  };
+      "/usr/include/sparc-linux-gnu"};
   const StringRef Sparc64MultiarchIncludeDirs[] = {
-    "/usr/include/sparc64-linux-gnu"
-  };
+      "/usr/include/sparc64-linux-gnu"};
   ArrayRef<StringRef> MultiarchIncludeDirs;
-  if (getTriple().getArch() == llvm::Triple::x86_64) {
+  switch (getTriple().getArch()) {
+  case llvm::Triple::x86_64:
     MultiarchIncludeDirs = X86_64MultiarchIncludeDirs;
-  } else if (getTriple().getArch() == llvm::Triple::x86) {
+    break;
+  case llvm::Triple::x86:
     MultiarchIncludeDirs = X86MultiarchIncludeDirs;
-  } else if (getTriple().getArch() == llvm::Triple::aarch64 ||
-             getTriple().getArch() == llvm::Triple::aarch64_be) {
+    break;
+  case llvm::Triple::aarch64:
+  case llvm::Triple::aarch64_be:
     MultiarchIncludeDirs = AArch64MultiarchIncludeDirs;
-  } else if (getTriple().getArch() == llvm::Triple::arm) {
+    break;
+  case llvm::Triple::arm:
     if (getTriple().getEnvironment() == llvm::Triple::GNUEABIHF)
       MultiarchIncludeDirs = ARMHFMultiarchIncludeDirs;
     else
       MultiarchIncludeDirs = ARMMultiarchIncludeDirs;
-  } else if (getTriple().getArch() == llvm::Triple::mips) {
+    break;
+  case llvm::Triple::mips:
     MultiarchIncludeDirs = MIPSMultiarchIncludeDirs;
-  } else if (getTriple().getArch() == llvm::Triple::mipsel) {
+    break;
+  case llvm::Triple::mipsel:
     MultiarchIncludeDirs = MIPSELMultiarchIncludeDirs;
-  } else if (getTriple().getArch() == llvm::Triple::mips64) {
+    break;
+  case llvm::Triple::mips64:
     MultiarchIncludeDirs = MIPS64MultiarchIncludeDirs;
-  } else if (getTriple().getArch() == llvm::Triple::mips64el) {
+    break;
+  case llvm::Triple::mips64el:
     MultiarchIncludeDirs = MIPS64ELMultiarchIncludeDirs;
-  } else if (getTriple().getArch() == llvm::Triple::ppc) {
+    break;
+  case llvm::Triple::ppc:
     MultiarchIncludeDirs = PPCMultiarchIncludeDirs;
-  } else if (getTriple().getArch() == llvm::Triple::ppc64) {
+    break;
+  case llvm::Triple::ppc64:
     MultiarchIncludeDirs = PPC64MultiarchIncludeDirs;
-  } else if (getTriple().getArch() == llvm::Triple::ppc64le) {
+    break;
+  case llvm::Triple::ppc64le:
     MultiarchIncludeDirs = PPC64LEMultiarchIncludeDirs;
-  } else if (getTriple().getArch() == llvm::Triple::sparc) {
+    break;
+  case llvm::Triple::sparc:
     MultiarchIncludeDirs = SparcMultiarchIncludeDirs;
-  } else if (getTriple().getArch() == llvm::Triple::sparcv9) {
+    break;
+  case llvm::Triple::sparcv9:
     MultiarchIncludeDirs = Sparc64MultiarchIncludeDirs;
+    break;
+  default:
+    break;
   }
   for (StringRef Dir : MultiarchIncludeDirs) {
     if (llvm::sys::fs::exists(SysRoot + Dir)) {
@@ -3526,13 +3472,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
 }
 
 /// \brief Helper to add the variant paths of a libstdc++ installation.
-/*static*/ bool Linux::addLibStdCXXIncludePaths(Twine Base, Twine Suffix,
-                                                StringRef GCCTriple,
-                                                StringRef GCCMultiarchTriple,
-                                                StringRef TargetMultiarchTriple,
-                                                Twine IncludeSuffix,
-                                                const ArgList &DriverArgs,
-                                                ArgStringList &CC1Args) {
+/*static*/ bool Linux::addLibStdCXXIncludePaths(
+    Twine Base, Twine Suffix, StringRef GCCTriple, StringRef GCCMultiarchTriple,
+    StringRef TargetMultiarchTriple, Twine IncludeSuffix,
+    const ArgList &DriverArgs, ArgStringList &CC1Args) {
   if (!llvm::sys::fs::exists(Base + Suffix))
     return false;
 
@@ -3570,15 +3513,15 @@ void Linux::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
   // Check if libc++ has been enabled and provide its include paths if so.
   if (GetCXXStdlibType(DriverArgs) == ToolChain::CST_Libcxx) {
     const std::string LibCXXIncludePathCandidates[] = {
-      // The primary location is within the Clang installation.
-      // FIXME: We shouldn't hard code 'v1' here to make Clang future proof to
-      // newer ABI versions.
-      getDriver().Dir + "/../include/c++/v1",
-
-      // We also check the system as for a long time this is the only place Clang looked.
-      // FIXME: We should really remove this. It doesn't make any sense.
-      getDriver().SysRoot + "/usr/include/c++/v1"
-    };
+        // The primary location is within the Clang installation.
+        // FIXME: We shouldn't hard code 'v1' here to make Clang future proof to
+        // newer ABI versions.
+        getDriver().Dir + "/../include/c++/v1",
+
+        // We also check the system as for a long time this is the only place
+        // Clang looked.
+        // FIXME: We should really remove this. It doesn't make any sense.
+        getDriver().SysRoot + "/usr/include/c++/v1"};
     for (const auto &IncludePath : LibCXXIncludePathCandidates) {
       if (!llvm::sys::fs::exists(IncludePath))
         continue;
@@ -3609,24 +3552,24 @@ void Linux::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
 
   // The primary search for libstdc++ supports multiarch variants.
   if (addLibStdCXXIncludePaths(LibDir.str() + "/../include",
-                               "/c++/" + Version.Text, TripleStr, GCCMultiarchTriple,
-                               TargetMultiarchTriple,
+                               "/c++/" + Version.Text, TripleStr,
+                               GCCMultiarchTriple, TargetMultiarchTriple,
                                Multilib.includeSuffix(), DriverArgs, CC1Args))
     return;
 
   // Otherwise, fall back on a bunch of options which don't use multiarch
   // layouts for simplicity.
   const std::string LibStdCXXIncludePathCandidates[] = {
-    // Gentoo is weird and places its headers inside the GCC install, so if the
-    // first attempt to find the headers fails, try these patterns.
-    InstallDir.str() + "/include/g++-v" + Version.MajorStr + "." +
-        Version.MinorStr,
-    InstallDir.str() + "/include/g++-v" + Version.MajorStr,
-    // Android standalone toolchain has C++ headers in yet another place.
-    LibDir.str() + "/../" + TripleStr.str() + "/include/c++/" + Version.Text,
-    // Freescale SDK C++ headers are directly in <sysroot>/usr/include/c++,
-    // without a subdirectory corresponding to the gcc version.
-    LibDir.str() + "/../include/c++",
+      // Gentoo is weird and places its headers inside the GCC install,
+      // so if the first attempt to find the headers fails, try these patterns.
+      InstallDir.str() + "/include/g++-v" + Version.MajorStr + "." +
+          Version.MinorStr,
+      InstallDir.str() + "/include/g++-v" + Version.MajorStr,
+      // Android standalone toolchain has C++ headers in yet another place.
+      LibDir.str() + "/../" + TripleStr.str() + "/include/c++/" + Version.Text,
+      // Freescale SDK C++ headers are directly in <sysroot>/usr/include/c++,
+      // without a subdirectory corresponding to the gcc version.
+      LibDir.str() + "/../include/c++",
   };
 
   for (const auto &IncludePath : LibStdCXXIncludePathCandidates) {
@@ -3638,15 +3581,15 @@ void Linux::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
   }
 }
 
-bool Linux::isPIEDefault() const {
-  return getSanitizerArgs().requiresPIE();
-}
+bool Linux::isPIEDefault() const { return getSanitizerArgs().requiresPIE(); }
 
 SanitizerMask Linux::getSupportedSanitizers() const {
   const bool IsX86 = getTriple().getArch() == llvm::Triple::x86;
   const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;
   const bool IsMIPS64 = getTriple().getArch() == llvm::Triple::mips64 ||
                         getTriple().getArch() == llvm::Triple::mips64el;
+  const bool IsPowerPC64 = getTriple().getArch() == llvm::Triple::ppc64 ||
+                           getTriple().getArch() == llvm::Triple::ppc64le;
   SanitizerMask Res = ToolChain::getSupportedSanitizers();
   Res |= SanitizerKind::Address;
   Res |= SanitizerKind::KernelAddress;
@@ -3654,9 +3597,10 @@ SanitizerMask Linux::getSupportedSanitizers() const {
   if (IsX86_64 || IsMIPS64) {
     Res |= SanitizerKind::DataFlow;
     Res |= SanitizerKind::Leak;
-    Res |= SanitizerKind::Memory;
     Res |= SanitizerKind::Thread;
   }
+  if (IsX86_64 || IsMIPS64 || IsPowerPC64)
+    Res |= SanitizerKind::Memory;
   if (IsX86 || IsX86_64) {
     Res |= SanitizerKind::Function;
     Res |= SanitizerKind::SafeStack;
@@ -3666,8 +3610,9 @@ SanitizerMask Linux::getSupportedSanitizers() const {
 
 /// DragonFly - DragonFly tool chain which can call as(1) and ld(1) directly.
 
-DragonFly::DragonFly(const Driver &D, const llvm::Triple& Triple, const ArgList &Args)
-  : Generic_ELF(D, Triple, Args) {
+DragonFly::DragonFly(const Driver &D, const llvm::Triple &Triple,
+                     const ArgList &Args)
+    : Generic_ELF(D, Triple, Args) {
 
   // Path mangling to find libexec
   getProgramPaths().push_back(getDriver().getInstalledDir());
@@ -3683,47 +3628,34 @@ DragonFly::DragonFly(const Driver &D, const llvm::Triple& Triple, const ArgList
 }
 
 Tool *DragonFly::buildAssembler() const {
-  return new tools::dragonfly::Assemble(*this);
+  return new tools::dragonfly::Assembler(*this);
 }
 
 Tool *DragonFly::buildLinker() const {
-  return new tools::dragonfly::Link(*this);
+  return new tools::dragonfly::Linker(*this);
 }
 
-
 /// XCore tool chain
-XCore::XCore(const Driver &D, const llvm::Triple &Triple,
-             const ArgList &Args) : ToolChain(D, Triple, Args) {
+XCore::XCore(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
+    : ToolChain(D, Triple, Args) {
   // ProgramPaths are found via 'PATH' environment variable.
 }
 
 Tool *XCore::buildAssembler() const {
-  return new tools::XCore::Assemble(*this);
+  return new tools::XCore::Assembler(*this);
 }
 
-Tool *XCore::buildLinker() const {
-  return new tools::XCore::Link(*this);
-}
+Tool *XCore::buildLinker() const { return new tools::XCore::Linker(*this); }
 
-bool XCore::isPICDefault() const {
-  return false;
-}
+bool XCore::isPICDefault() const { return false; }
 
-bool XCore::isPIEDefault() const {
-  return false;
-}
+bool XCore::isPIEDefault() const { return false; }
 
-bool XCore::isPICDefaultForced() const {
-  return false;
-}
+bool XCore::isPICDefaultForced() const { return false; }
 
-bool XCore::SupportsProfiling() const {
-  return false;
-}
+bool XCore::SupportsProfiling() const { return false; }
 
-bool XCore::hasBlocksRuntime() const {
-  return false;
-}
+bool XCore::hasBlocksRuntime() const { return false; }
 
 void XCore::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
                                       ArgStringList &CC1Args) const {
@@ -3732,7 +3664,7 @@ void XCore::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
     return;
   if (const char *cl_include_dir = getenv("XCC_C_INCLUDE_PATH")) {
     SmallVector<StringRef, 4> Dirs;
-    const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator,'\0'};
+    const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
     StringRef(cl_include_dir).split(Dirs, StringRef(EnvPathSeparatorStr));
     ArrayRef<StringRef> DirVec(Dirs);
     addSystemIncludes(DriverArgs, CC1Args, DirVec);
@@ -3752,7 +3684,7 @@ void XCore::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
     return;
   if (const char *cl_include_dir = getenv("XCC_CPLUS_INCLUDE_PATH")) {
     SmallVector<StringRef, 4> Dirs;
-    const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator,'\0'};
+    const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
     StringRef(cl_include_dir).split(Dirs, StringRef(EnvPathSeparatorStr));
     ArrayRef<StringRef> DirVec(Dirs);
     addSystemIncludes(DriverArgs, CC1Args, DirVec);
@@ -3770,11 +3702,11 @@ Tool *SHAVEToolChain::SelectTool(const JobAction &JA) const {
   switch (JA.getKind()) {
   case Action::CompileJobClass:
     if (!Compiler)
-      Compiler.reset(new tools::SHAVE::Compile(*this));
+      Compiler.reset(new tools::SHAVE::Compiler(*this));
     return Compiler.get();
   case Action::AssembleJobClass:
     if (!Assembler)
-      Assembler.reset(new tools::SHAVE::Assemble(*this));
+      Assembler.reset(new tools::SHAVE::Assembler(*this));
     return Assembler.get();
   default:
     return ToolChain::getTool(JA.getKind());
@@ -3782,7 +3714,7 @@ Tool *SHAVEToolChain::SelectTool(const JobAction &JA) const {
 }
 
 SHAVEToolChain::SHAVEToolChain(const Driver &D, const llvm::Triple &Triple,
-                     const ArgList &Args)
+                               const ArgList &Args)
     : Generic_GCC(D, Triple, Args) {}
 
 SHAVEToolChain::~SHAVEToolChain() {}
diff --git a/lib/Driver/ToolChains.h b/lib/Driver/ToolChains.h
index a48bb5f..629f90a 100644
--- a/lib/Driver/ToolChains.h
+++ b/lib/Driver/ToolChains.h
@@ -101,7 +101,7 @@ protected:
   public:
     GCCInstallationDetector() : IsValid(false) {}
     void init(const Driver &D, const llvm::Triple &TargetTriple,
-                            const llvm::opt::ArgList &Args);
+              const llvm::opt::ArgList &Args);
 
     /// \brief Check whether we detected a valid GCC install.
     bool isValid() const { return IsValid; }
@@ -179,8 +179,8 @@ protected:
   /// @}
 
 private:
-  mutable std::unique_ptr<tools::gcc::Preprocess> Preprocess;
-  mutable std::unique_ptr<tools::gcc::Compile> Compile;
+  mutable std::unique_ptr<tools::gcc::Preprocessor> Preprocess;
+  mutable std::unique_ptr<tools::gcc::Compiler> Compile;
 };
 
 class LLVM_LIBRARY_VISIBILITY MachO : public ToolChain {
@@ -188,6 +188,7 @@ protected:
   Tool *buildAssembler() const override;
   Tool *buildLinker() const override;
   Tool *getTool(Action::ActionClass AC) const override;
+
 private:
   mutable std::unique_ptr<tools::darwin::Lipo> Lipo;
   mutable std::unique_ptr<tools::darwin::Dsymutil> Dsymutil;
@@ -195,7 +196,7 @@ private:
 
 public:
   MachO(const Driver &D, const llvm::Triple &Triple,
-             const llvm::opt::ArgList &Args);
+        const llvm::opt::ArgList &Args);
   ~MachO() override;
 
   /// @name MachO specific toolchain API
@@ -205,7 +206,6 @@ public:
   /// example, Apple treats different ARM variations as distinct architectures.
   StringRef getMachOArchName(const llvm::opt::ArgList &Args) const;
 
-
   /// Add the linker arguments to link the ARC runtime library.
   virtual void AddLinkARCArgs(const llvm::opt::ArgList &Args,
                               llvm::opt::ArgStringList &CmdArgs) const {}
@@ -214,30 +214,24 @@ public:
   virtual void AddLinkRuntimeLibArgs(const llvm::opt::ArgList &Args,
                                      llvm::opt::ArgStringList &CmdArgs) const;
 
-  virtual void
-  addStartObjectFileArgs(const llvm::opt::ArgList &Args,
-                         llvm::opt::ArgStringList &CmdArgs) const {}
+  virtual void addStartObjectFileArgs(const llvm::opt::ArgList &Args,
+                                      llvm::opt::ArgStringList &CmdArgs) const {
+  }
 
   virtual void addMinVersionArgs(const llvm::opt::ArgList &Args,
                                  llvm::opt::ArgStringList &CmdArgs) const {}
 
   /// On some iOS platforms, kernel and kernel modules were built statically. Is
   /// this such a target?
-  virtual bool isKernelStatic() const {
-    return false;
-  }
+  virtual bool isKernelStatic() const { return false; }
 
   /// Is the target either iOS or an iOS simulator?
-  bool isTargetIOSBased() const {
-    return false;
-  }
+  bool isTargetIOSBased() const { return false; }
 
   void AddLinkRuntimeLib(const llvm::opt::ArgList &Args,
                          llvm::opt::ArgStringList &CmdArgs,
-                         StringRef DarwinLibName,
-                         bool AlwaysLink = false,
-                         bool IsEmbedded = false,
-                         bool AddRPath = false) const;
+                         StringRef DarwinLibName, bool AlwaysLink = false,
+                         bool IsEmbedded = false, bool AddRPath = false) const;
 
   /// Add any profiling runtime libraries that are needed. This is essentially a
   /// MachO specific version of addProfileRT in Tools.cpp.
@@ -271,22 +265,16 @@ public:
     return true;
   }
 
-  bool IsMathErrnoDefault() const override {
-    return false;
-  }
+  bool IsMathErrnoDefault() const override { return false; }
 
-  bool IsEncodeExtendedBlockSignatureDefault() const override {
-    return true;
-  }
+  bool IsEncodeExtendedBlockSignatureDefault() const override { return true; }
 
   bool IsObjCNonFragileABIDefault() const override {
     // Non-fragile ABI is default for everything but i386.
     return getTriple().getArch() != llvm::Triple::x86;
   }
 
-  bool UseObjCMixedDispatch() const override {
-    return true;
-  }
+  bool UseObjCMixedDispatch() const override { return true; }
 
   bool IsUnwindTablesDefault() const override;
 
@@ -300,20 +288,16 @@ public:
 
   bool SupportsProfiling() const override;
 
-  bool SupportsObjCGC() const override {
-    return false;
-  }
+  bool SupportsObjCGC() const override { return false; }
 
   bool UseDwarfDebugFlags() const override;
 
-  bool UseSjLjExceptions() const override {
-    return false;
-  }
+  bool UseSjLjExceptions() const override { return false; }
 
   /// }
 };
 
-  /// Darwin - The base Darwin tool chain.
+/// Darwin - The base Darwin tool chain.
 class LLVM_LIBRARY_VISIBILITY Darwin : public MachO {
 public:
   /// Whether the information on the target has been initialized.
@@ -323,11 +307,7 @@ public:
   // the argument translation business.
   mutable bool TargetInitialized;
 
-  enum DarwinPlatformKind {
-    MacOS,
-    IPhoneOS,
-    IPhoneOSSimulator
-  };
+  enum DarwinPlatformKind { MacOS, IPhoneOS, IPhoneOSSimulator };
 
   mutable DarwinPlatformKind TargetPlatform;
 
@@ -348,14 +328,12 @@ public:
   /// @name Apple Specific Toolchain Implementation
   /// {
 
-  void
-  addMinVersionArgs(const llvm::opt::ArgList &Args,
-                    llvm::opt::ArgStringList &CmdArgs) const override;
-
-  void
-  addStartObjectFileArgs(const llvm::opt::ArgList &Args,
+  void addMinVersionArgs(const llvm::opt::ArgList &Args,
                          llvm::opt::ArgStringList &CmdArgs) const override;
 
+  void addStartObjectFileArgs(const llvm::opt::ArgList &Args,
+                              llvm::opt::ArgStringList &CmdArgs) const override;
+
   bool isKernelStatic() const override {
     return !isTargetIPhoneOS() || isIPhoneOSVersionLT(6, 0);
   }
@@ -411,12 +389,13 @@ protected:
     return TargetVersion;
   }
 
-  bool isIPhoneOSVersionLT(unsigned V0, unsigned V1=0, unsigned V2=0) const {
+  bool isIPhoneOSVersionLT(unsigned V0, unsigned V1 = 0,
+                           unsigned V2 = 0) const {
     assert(isTargetIOSBased() && "Unexpected call for non iOS target!");
     return TargetVersion < VersionTuple(V0, V1, V2);
   }
 
-  bool isMacosxVersionLT(unsigned V0, unsigned V1=0, unsigned V2=0) const {
+  bool isMacosxVersionLT(unsigned V0, unsigned V1 = 0, unsigned V2 = 0) const {
     assert(isTargetMacOS() && "Unexpected call for non OS X target!");
     return TargetVersion < VersionTuple(V0, V1, V2);
   }
@@ -476,23 +455,19 @@ public:
   /// @name Apple ToolChain Implementation
   /// {
 
-  void
-  AddLinkRuntimeLibArgs(const llvm::opt::ArgList &Args,
-                        llvm::opt::ArgStringList &CmdArgs) const override;
+  void AddLinkRuntimeLibArgs(const llvm::opt::ArgList &Args,
+                             llvm::opt::ArgStringList &CmdArgs) const override;
 
-  void
-  AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
-                      llvm::opt::ArgStringList &CmdArgs) const override;
+  void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
+                           llvm::opt::ArgStringList &CmdArgs) const override;
 
-  void
-  AddCCKextLibArgs(const llvm::opt::ArgList &Args,
-                   llvm::opt::ArgStringList &CmdArgs) const override;
+  void AddCCKextLibArgs(const llvm::opt::ArgList &Args,
+                        llvm::opt::ArgStringList &CmdArgs) const override;
 
   void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override;
 
-  void
-  AddLinkARCArgs(const llvm::opt::ArgList &Args,
-                 llvm::opt::ArgStringList &CmdArgs) const override;
+  void AddLinkARCArgs(const llvm::opt::ArgList &Args,
+                      llvm::opt::ArgStringList &CmdArgs) const override;
   /// }
 
 private:
@@ -503,6 +478,7 @@ private:
 
 class LLVM_LIBRARY_VISIBILITY Generic_ELF : public Generic_GCC {
   virtual void anchor();
+
 public:
   Generic_ELF(const Driver &D, const llvm::Triple &Triple,
               const llvm::opt::ArgList &Args)
@@ -521,8 +497,8 @@ public:
   bool IsMathErrnoDefault() const override { return false; }
   bool IsObjCNonFragileABIDefault() const override { return true; }
 
-  CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args)
-      const override {
+  CXXStdlibType
+  GetCXXStdlibType(const llvm::opt::ArgList &Args) const override {
     return ToolChain::CST_Libcxx;
   }
   void AddClangCXXStdlibIncludeArgs(
@@ -543,12 +519,43 @@ public:
           const llvm::opt::ArgList &Args);
 
   bool IsIntegratedAssemblerDefault() const override { return true; }
+
 protected:
   Tool *buildAssembler() const override;
   Tool *buildLinker() const override;
-
 };
 
+class LLVM_LIBRARY_VISIBILITY MinGW : public ToolChain {
+public:
+  MinGW(const Driver &D, const llvm::Triple &Triple,
+        const llvm::opt::ArgList &Args);
+
+  bool IsIntegratedAssemblerDefault() const override;
+  bool IsUnwindTablesDefault() const override;
+  bool isPICDefault() const override;
+  bool isPIEDefault() const override;
+  bool isPICDefaultForced() const override;
+  bool UseSEHExceptions() const;
+
+  void
+  AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                            llvm::opt::ArgStringList &CC1Args) const override;
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
+
+protected:
+  Tool *getTool(Action::ActionClass AC) const override;
+  Tool *buildLinker() const override;
+  Tool *buildAssembler() const override;
+
+private:
+  std::string Base;
+  std::string GccLibDir;
+  std::string Arch;
+  mutable std::unique_ptr<tools::gcc::Preprocessor> Preprocessor;
+  mutable std::unique_ptr<tools::gcc::Compiler> Compiler;
+};
 
 class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF {
 public:
@@ -577,13 +584,13 @@ public:
   bool IsObjCNonFragileABIDefault() const override { return true; }
 
   CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
-  void
-  AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                              llvm::opt::ArgStringList &CC1Args) const override;
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
   void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
                            llvm::opt::ArgStringList &CmdArgs) const override;
   unsigned GetDefaultStackProtectorLevel(bool KernelOrKext) const override {
-     return 1;
+    return 1;
   }
 
 protected:
@@ -601,13 +608,14 @@ public:
   bool IsObjCNonFragileABIDefault() const override { return true; }
 
   CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
-  void
-  AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                               llvm::opt::ArgStringList &CC1Args) const override;
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
 
   bool UseSjLjExceptions() const override;
   bool isPIEDefault() const override;
   SanitizerMask getSupportedSanitizers() const override;
+
 protected:
   Tool *buildAssembler() const override;
   Tool *buildLinker() const override;
@@ -623,12 +631,10 @@ public:
 
   CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
 
-  void
-  AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                              llvm::opt::ArgStringList &CC1Args) const override;
-  bool IsUnwindTablesDefault() const override {
-    return true;
-  }
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
+  bool IsUnwindTablesDefault() const override { return true; }
 
 protected:
   Tool *buildAssembler() const override;
@@ -667,9 +673,9 @@ public:
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                             llvm::opt::ArgStringList &CC1Args) const override;
-  void
-  AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                               llvm::opt::ArgStringList &CC1Args) const override;
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
   bool isPIEDefault() const override;
   SanitizerMask getSupportedSanitizers() const override;
 
@@ -706,9 +712,9 @@ public:
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                             llvm::opt::ArgStringList &CC1Args) const override;
-  void
-  AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                              llvm::opt::ArgStringList &CC1Args) const override;
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
   CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
 
   StringRef GetGCCLibAndIncVersion() const { return GCCLibAndIncVersion.Text; }
@@ -720,7 +726,7 @@ public:
 
   static const char *GetSmallDataThreshold(const llvm::opt::ArgList &Args);
 
-  static bool UsesG0(const char* smallDataThreshold);
+  static bool UsesG0(const char *smallDataThreshold);
 };
 
 class LLVM_LIBRARY_VISIBILITY NaCl_TC : public Generic_ELF {
@@ -731,19 +737,16 @@ public:
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                             llvm::opt::ArgStringList &CC1Args) const override;
-  void
-  AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                               llvm::opt::ArgStringList &CC1Args) const override;
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
 
-  CXXStdlibType
-  GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
+  CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
 
-  void
-  AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
-                      llvm::opt::ArgStringList &CmdArgs) const override;
+  void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
+                           llvm::opt::ArgStringList &CmdArgs) const override;
 
-  bool
-  IsIntegratedAssemblerDefault() const override { return false; }
+  bool IsIntegratedAssemblerDefault() const override { return false; }
 
   // Get the path to the file containing NaCl's ARM macros. It lives in NaCl_TC
   // because the AssembleARM tool needs a const char * that it can pass around
@@ -790,9 +793,9 @@ public:
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                             llvm::opt::ArgStringList &CC1Args) const override;
-  void
-  AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                               llvm::opt::ArgStringList &CC1Args) const override;
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
 
   bool getWindowsSDKDir(std::string &path, int &major, int &minor) const;
   bool getWindowsSDKLibraryPath(std::string &path) const;
@@ -829,12 +832,12 @@ public:
     return 0;
   }
 
-  void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                                 llvm::opt::ArgStringList &CC1Args)
-      const override;
-  void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                                    llvm::opt::ArgStringList &CC1Args)
-      const override;
+  void
+  AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                            llvm::opt::ArgStringList &CC1Args) const override;
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
   void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
                            llvm::opt::ArgStringList &CmdArgs) const override;
 
@@ -846,22 +849,26 @@ protected:
 class LLVM_LIBRARY_VISIBILITY XCore : public ToolChain {
 public:
   XCore(const Driver &D, const llvm::Triple &Triple,
-          const llvm::opt::ArgList &Args);
+        const llvm::opt::ArgList &Args);
+
 protected:
   Tool *buildAssembler() const override;
   Tool *buildLinker() const override;
+
 public:
   bool isPICDefault() const override;
   bool isPIEDefault() const override;
   bool isPICDefaultForced() const override;
   bool SupportsProfiling() const override;
   bool hasBlocksRuntime() const override;
-  void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                    llvm::opt::ArgStringList &CC1Args) const override;
+  void
+  AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                            llvm::opt::ArgStringList &CC1Args) const override;
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
                              llvm::opt::ArgStringList &CC1Args) const override;
-  void AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                       llvm::opt::ArgStringList &CC1Args) const override;
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
   void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
                            llvm::opt::ArgStringList &CmdArgs) const override;
 };
@@ -872,7 +879,7 @@ public:
 class LLVM_LIBRARY_VISIBILITY SHAVEToolChain : public Generic_GCC {
 public:
   SHAVEToolChain(const Driver &D, const llvm::Triple &Triple,
-            const llvm::opt::ArgList &Args);
+                 const llvm::opt::ArgList &Args);
   ~SHAVEToolChain() override;
 
   virtual Tool *SelectTool(const JobAction &JA) const override;
diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp
index 2367914..a2955db 100644
--- a/lib/Driver/Tools.cpp
+++ b/lib/Driver/Tools.cpp
@@ -84,10 +84,10 @@ static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) {
 static void CheckCodeGenerationOptions(const Driver &D, const ArgList &Args) {
   // In gcc, only ARM checks this, but it seems reasonable to check universally.
   if (Args.hasArg(options::OPT_static))
-    if (const Arg *A = Args.getLastArg(options::OPT_dynamic,
-                                       options::OPT_mdynamic_no_pic))
-      D.Diag(diag::err_drv_argument_not_allowed_with)
-        << A->getAsString(Args) << "-static";
+    if (const Arg *A =
+            Args.getLastArg(options::OPT_dynamic, options::OPT_mdynamic_no_pic))
+      D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
+                                                      << "-static";
 }
 
 // Add backslashes to escape spaces and other backslashes.
@@ -95,9 +95,10 @@ static void CheckCodeGenerationOptions(const Driver &D, const ArgList &Args) {
 // the -dwarf-debug-flags option.
 static void EscapeSpacesAndBackslashes(const char *Arg,
                                        SmallVectorImpl<char> &Res) {
-  for ( ; *Arg; ++Arg) {
+  for (; *Arg; ++Arg) {
     switch (*Arg) {
-    default: break;
+    default:
+      break;
     case ' ':
     case '\\':
       Res.push_back('\\');
@@ -109,8 +110,7 @@ static void EscapeSpacesAndBackslashes(const char *Arg,
 
 // Quote target names for inclusion in GNU Make dependency files.
 // Only the characters '$', '#', ' ', '\t' are quoted.
-static void QuoteTarget(StringRef Target,
-                        SmallVectorImpl<char> &Res) {
+static void QuoteTarget(StringRef Target, SmallVectorImpl<char> &Res) {
   for (unsigned i = 0, e = Target.size(); i != e; ++i) {
     switch (Target[i]) {
     case ' ':
@@ -136,10 +136,8 @@ static void QuoteTarget(StringRef Target,
   }
 }
 
-static void addDirectoryList(const ArgList &Args,
-                             ArgStringList &CmdArgs,
-                             const char *ArgName,
-                             const char *EnvVar) {
+static void addDirectoryList(const ArgList &Args, ArgStringList &CmdArgs,
+                             const char *ArgName, const char *EnvVar) {
   const char *DirList = ::getenv(EnvVar);
   bool CombinedArg = false;
 
@@ -165,7 +163,8 @@ static void addDirectoryList(const ArgList &Args,
       }
     } else {
       if (CombinedArg) {
-        CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + Dirs.substr(0, Delim)));
+        CmdArgs.push_back(
+            Args.MakeArgString(std::string(ArgName) + Dirs.substr(0, Delim)));
       } else {
         CmdArgs.push_back(ArgName);
         CmdArgs.push_back(Args.MakeArgString(Dirs.substr(0, Delim)));
@@ -191,9 +190,8 @@ static void addDirectoryList(const ArgList &Args,
   }
 }
 
-static void AddLinkerInputs(const ToolChain &TC,
-                            const InputInfoList &Inputs, const ArgList &Args,
-                            ArgStringList &CmdArgs) {
+static void AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs,
+                            const ArgList &Args, ArgStringList &CmdArgs) {
   const Driver &D = TC.getDriver();
 
   // Add extra linker input arguments which are not treated as inputs
@@ -205,10 +203,8 @@ static void AddLinkerInputs(const ToolChain &TC,
       // Don't try to pass LLVM inputs unless we have native support.
       if (II.getType() == types::TY_LLVM_IR ||
           II.getType() == types::TY_LTO_IR ||
-          II.getType() == types::TY_LLVM_BC ||
-          II.getType() == types::TY_LTO_BC)
-        D.Diag(diag::err_drv_no_linker_llvm_support)
-          << TC.getTripleString();
+          II.getType() == types::TY_LLVM_BC || II.getType() == types::TY_LTO_BC)
+        D.Diag(diag::err_drv_no_linker_llvm_support) << TC.getTripleString();
     }
 
     // Add filenames immediately.
@@ -230,7 +226,7 @@ static void AddLinkerInputs(const ToolChain &TC,
       A.claim();
       A.render(Args, CmdArgs);
     } else {
-       A.renderAsInput(Args, CmdArgs);
+      A.renderAsInput(Args, CmdArgs);
     }
   }
 
@@ -259,14 +255,11 @@ static bool forwardToGCC(const Option &O) {
   // Don't forward inputs from the original command line.  They are added from
   // InputInfoList.
   return O.getKind() != Option::InputClass &&
-         !O.hasFlag(options::DriverOption) &&
-         !O.hasFlag(options::LinkerInput);
+         !O.hasFlag(options::DriverOption) && !O.hasFlag(options::LinkerInput);
 }
 
-void Clang::AddPreprocessingOptions(Compilation &C,
-                                    const JobAction &JA,
-                                    const Driver &D,
-                                    const ArgList &Args,
+void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
+                                    const Driver &D, const ArgList &Args,
                                     ArgStringList &CmdArgs,
                                     const InputInfo &Output,
                                     const InputInfoList &Inputs) const {
@@ -333,7 +326,7 @@ void Clang::AddPreprocessingOptions(Compilation &C,
 
   if (Args.hasArg(options::OPT_MG)) {
     if (!A || A->getOption().matches(options::OPT_MD) ||
-              A->getOption().matches(options::OPT_MMD))
+        A->getOption().matches(options::OPT_MMD))
       D.Diag(diag::err_drv_mg_requires_m_or_mm);
     CmdArgs.push_back("-MG");
   }
@@ -351,7 +344,7 @@ void Clang::AddPreprocessingOptions(Compilation &C,
       QuoteTarget(A->getValue(), Quoted);
       CmdArgs.push_back(Args.MakeArgString(Quoted));
 
-    // -MT flag - no change
+      // -MT flag - no change
     } else {
       A->render(Args, CmdArgs);
     }
@@ -408,8 +401,8 @@ void Clang::AddPreprocessingOptions(Compilation &C,
           continue;
         } else {
           // Ignore the PCH if not first on command line and emit warning.
-          D.Diag(diag::warn_drv_pch_not_first_include)
-              << P << A->getAsString(Args);
+          D.Diag(diag::warn_drv_pch_not_first_include) << P
+                                                       << A->getAsString(Args);
         }
       }
     }
@@ -511,8 +504,8 @@ static bool isNoCommonDefault(const llvm::Triple &Triple) {
 
 // Handle -mhwdiv=.
 static void getARMHWDivFeatures(const Driver &D, const Arg *A,
-                              const ArgList &Args,
-                              std::vector<const char *> &Features) {
+                                const ArgList &Args,
+                                std::vector<const char *> &Features) {
   StringRef HWDiv = A->getValue();
   if (HWDiv == "arm") {
     Features.push_back("+hwdiv-arm");
@@ -556,9 +549,9 @@ static bool isARMMProfile(const llvm::Triple &Triple) {
 StringRef tools::arm::getARMFloatABI(const Driver &D, const ArgList &Args,
                                      const llvm::Triple &Triple) {
   StringRef FloatABI;
-  if (Arg *A = Args.getLastArg(options::OPT_msoft_float,
-                               options::OPT_mhard_float,
-                               options::OPT_mfloat_abi_EQ)) {
+  if (Arg *A =
+          Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
+                          options::OPT_mfloat_abi_EQ)) {
     if (A->getOption().matches(options::OPT_msoft_float))
       FloatABI = "soft";
     else if (A->getOption().matches(options::OPT_mhard_float))
@@ -566,8 +559,7 @@ StringRef tools::arm::getARMFloatABI(const Driver &D, const ArgList &Args,
     else {
       FloatABI = A->getValue();
       if (FloatABI != "soft" && FloatABI != "softfp" && FloatABI != "hard") {
-        D.Diag(diag::err_drv_invalid_mfloat_abi)
-          << A->getAsString(Args);
+        D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args);
         FloatABI = "soft";
       }
     }
@@ -595,7 +587,7 @@ StringRef tools::arm::getARMFloatABI(const Driver &D, const ArgList &Args,
       break;
 
     case llvm::Triple::FreeBSD:
-      switch(Triple.getEnvironment()) {
+      switch (Triple.getEnvironment()) {
       case llvm::Triple::GNUEABIHF:
         FloatABI = "hard";
         break;
@@ -607,7 +599,7 @@ StringRef tools::arm::getARMFloatABI(const Driver &D, const ArgList &Args,
       break;
 
     default:
-      switch(Triple.getEnvironment()) {
+      switch (Triple.getEnvironment()) {
       case llvm::Triple::GNUEABIHF:
         FloatABI = "hard";
         break;
@@ -715,8 +707,7 @@ static void getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   }
 }
 
-void Clang::AddARMTargetArgs(const ArgList &Args,
-                             ArgStringList &CmdArgs,
+void Clang::AddARMTargetArgs(const ArgList &Args, ArgStringList &CmdArgs,
                              bool KernelOrKext) const {
   const Driver &D = getToolChain().getDriver();
   // Get the effective triple, which takes into account the deployment target.
@@ -734,8 +725,7 @@ void Clang::AddARMTargetArgs(const ArgList &Args,
     // The backend is hardwired to assume AAPCS for M-class processors, ensure
     // the frontend matches that.
     if (Triple.getEnvironment() == llvm::Triple::EABI ||
-        Triple.getOS() == llvm::Triple::UnknownOS ||
-        isARMMProfile(Triple)) {
+        Triple.getOS() == llvm::Triple::UnknownOS || isARMMProfile(Triple)) {
       ABIName = "aapcs";
     } else {
       ABIName = "apcs-gnu";
@@ -745,7 +735,7 @@ void Clang::AddARMTargetArgs(const ArgList &Args,
     ABIName = "aapcs";
   } else {
     // Select the default based on the platform.
-    switch(Triple.getEnvironment()) {
+    switch (Triple.getEnvironment()) {
     case llvm::Triple::Android:
     case llvm::Triple::GNUEABI:
     case llvm::Triple::GNUEABIHF:
@@ -827,8 +817,7 @@ void Clang::AddARMTargetArgs(const ArgList &Args,
   }
 
   if (!Args.hasFlag(options::OPT_mimplicit_float,
-                    options::OPT_mno_implicit_float,
-                    true))
+                    options::OPT_mno_implicit_float, true))
     CmdArgs.push_back("-no-implicit-float");
 
   // llvm does not support reserving registers in general. There is support
@@ -932,10 +921,8 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
 
 // Get CPU and ABI names. They are not independent
 // so we have to calculate them together.
-void mips::getMipsCPUAndABI(const ArgList &Args,
-                            const llvm::Triple &Triple,
-                            StringRef &CPUName,
-                            StringRef &ABIName) {
+void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple,
+                            StringRef &CPUName, StringRef &ABIName) {
   const char *DefMips32CPU = "mips32r2";
   const char *DefMips64CPU = "mips64r2";
 
@@ -951,8 +938,7 @@ void mips::getMipsCPUAndABI(const ArgList &Args,
   if (Triple.getOS() == llvm::Triple::OpenBSD)
     DefMips64CPU = "mips3";
 
-  if (Arg *A = Args.getLastArg(options::OPT_march_EQ,
-                               options::OPT_mcpu_EQ))
+  if (Arg *A = Args.getLastArg(options::OPT_march_EQ, options::OPT_mcpu_EQ))
     CPUName = A->getValue();
 
   if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
@@ -960,9 +946,9 @@ void mips::getMipsCPUAndABI(const ArgList &Args,
     // Convert a GNU style Mips ABI name to the name
     // accepted by LLVM Mips backend.
     ABIName = llvm::StringSwitch<llvm::StringRef>(ABIName)
-      .Case("32", "o32")
-      .Case("64", "n64")
-      .Default(ABIName);
+                  .Case("32", "o32")
+                  .Case("64", "n64")
+                  .Default(ABIName);
   }
 
   // Setup default CPU and ABI names.
@@ -993,9 +979,9 @@ void mips::getMipsCPUAndABI(const ArgList &Args,
   if (CPUName.empty()) {
     // Deduce CPU name from ABI name.
     CPUName = llvm::StringSwitch<const char *>(ABIName)
-      .Cases("o32", "eabi", DefMips32CPU)
-      .Cases("n32", "n64", DefMips64CPU)
-      .Default("");
+                  .Cases("o32", "eabi", DefMips32CPU)
+                  .Cases("n32", "n64", DefMips64CPU)
+                  .Default("");
   }
 
   // FIXME: Warn on inconsistent use of -march and -mabi.
@@ -1004,18 +990,18 @@ void mips::getMipsCPUAndABI(const ArgList &Args,
 // Convert ABI name to the GNU tools acceptable variant.
 static StringRef getGnuCompatibleMipsABIName(StringRef ABI) {
   return llvm::StringSwitch<llvm::StringRef>(ABI)
-    .Case("o32", "32")
-    .Case("n64", "64")
-    .Default(ABI);
+      .Case("o32", "32")
+      .Case("n64", "64")
+      .Default(ABI);
 }
 
 // Select the MIPS float ABI as determined by -msoft-float, -mhard-float,
 // and -mfloat-abi=.
 static StringRef getMipsFloatABI(const Driver &D, const ArgList &Args) {
   StringRef FloatABI;
-  if (Arg *A = Args.getLastArg(options::OPT_msoft_float,
-                               options::OPT_mhard_float,
-                               options::OPT_mfloat_abi_EQ)) {
+  if (Arg *A =
+          Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
+                          options::OPT_mfloat_abi_EQ)) {
     if (A->getOption().matches(options::OPT_msoft_float))
       FloatABI = "soft";
     else if (A->getOption().matches(options::OPT_mhard_float))
@@ -1143,8 +1129,7 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args,
     CmdArgs.push_back("-msoft-float");
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("soft");
-  }
-  else {
+  } else {
     // Floating point operations and argument passing are hard.
     assert(FloatABI == "hard" && "Invalid float abi!");
     CmdArgs.push_back("-mfloat-abi");
@@ -1196,51 +1181,51 @@ static std::string getPPCTargetCPU(const ArgList &Args) {
     }
 
     return llvm::StringSwitch<const char *>(CPUName)
-      .Case("common", "generic")
-      .Case("440", "440")
-      .Case("440fp", "440")
-      .Case("450", "450")
-      .Case("601", "601")
-      .Case("602", "602")
-      .Case("603", "603")
-      .Case("603e", "603e")
-      .Case("603ev", "603ev")
-      .Case("604", "604")
-      .Case("604e", "604e")
-      .Case("620", "620")
-      .Case("630", "pwr3")
-      .Case("G3", "g3")
-      .Case("7400", "7400")
-      .Case("G4", "g4")
-      .Case("7450", "7450")
-      .Case("G4+", "g4+")
-      .Case("750", "750")
-      .Case("970", "970")
-      .Case("G5", "g5")
-      .Case("a2", "a2")
-      .Case("a2q", "a2q")
-      .Case("e500mc", "e500mc")
-      .Case("e5500", "e5500")
-      .Case("power3", "pwr3")
-      .Case("power4", "pwr4")
-      .Case("power5", "pwr5")
-      .Case("power5x", "pwr5x")
-      .Case("power6", "pwr6")
-      .Case("power6x", "pwr6x")
-      .Case("power7", "pwr7")
-      .Case("power8", "pwr8")
-      .Case("pwr3", "pwr3")
-      .Case("pwr4", "pwr4")
-      .Case("pwr5", "pwr5")
-      .Case("pwr5x", "pwr5x")
-      .Case("pwr6", "pwr6")
-      .Case("pwr6x", "pwr6x")
-      .Case("pwr7", "pwr7")
-      .Case("pwr8", "pwr8")
-      .Case("powerpc", "ppc")
-      .Case("powerpc64", "ppc64")
-      .Case("powerpc64le", "ppc64le")
-      .Default("");
+        .Case("common", "generic")
+        .Case("440", "440")
+        .Case("440fp", "440")
+        .Case("450", "450")
+        .Case("601", "601")
+        .Case("602", "602")
+        .Case("603", "603")
+        .Case("603e", "603e")
+        .Case("603ev", "603ev")
+        .Case("604", "604")
+        .Case("604e", "604e")
+        .Case("620", "620")
+        .Case("630", "pwr3")
+        .Case("G3", "g3")
+        .Case("7400", "7400")
+        .Case("G4", "g4")
+        .Case("7450", "7450")
+        .Case("G4+", "g4+")
+        .Case("750", "750")
+        .Case("970", "970")
+        .Case("G5", "g5")
+        .Case("a2", "a2")
+        .Case("a2q", "a2q")
+        .Case("e500mc", "e500mc")
+        .Case("e5500", "e5500")
+        .Case("power3", "pwr3")
+        .Case("power4", "pwr4")
+        .Case("power5", "pwr5")
+        .Case("power5x", "pwr5x")
+        .Case("power6", "pwr6")
+        .Case("power6x", "pwr6x")
+        .Case("power7", "pwr7")
+        .Case("power8", "pwr8")
+        .Case("pwr3", "pwr3")
+        .Case("pwr4", "pwr4")
+        .Case("pwr5", "pwr5")
+        .Case("pwr5x", "pwr5x")
+        .Case("pwr6", "pwr6")
+        .Case("pwr6x", "pwr6x")
+        .Case("pwr7", "pwr7")
+        .Case("pwr8", "pwr8")
+        .Case("powerpc", "ppc")
+        .Case("powerpc64", "ppc64")
+        .Case("powerpc64le", "ppc64le")
+        .Default("");
   }
 
   return "";
@@ -1282,7 +1267,7 @@ void Clang::AddPPCTargetArgs(const ArgList &Args,
   if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
     ABIName = A->getValue();
   } else if (getToolChain().getTriple().isOSLinux())
-    switch(getToolChain().getArch()) {
+    switch (getToolChain().getArch()) {
     case llvm::Triple::ppc64: {
       // When targeting a processor that supports QPX, or if QPX is
       // specifically enabled, default to using the ABI that supports QPX (so
@@ -1304,7 +1289,7 @@ void Clang::AddPPCTargetArgs(const ArgList &Args,
       break;
     default:
       break;
-  }
+    }
 
   if (ABIName) {
     CmdArgs.push_back("-target-abi");
@@ -1322,20 +1307,20 @@ static std::string getR600TargetGPU(const ArgList &Args) {
   if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
     const char *GPUName = A->getValue();
     return llvm::StringSwitch<const char *>(GPUName)
-      .Cases("rv630", "rv635", "r600")
-      .Cases("rv610", "rv620", "rs780", "rs880")
-      .Case("rv740", "rv770")
-      .Case("palm", "cedar")
-      .Cases("sumo", "sumo2", "sumo")
-      .Case("hemlock", "cypress")
-      .Case("aruba", "cayman")
-      .Default(GPUName);
+        .Cases("rv630", "rv635", "r600")
+        .Cases("rv610", "rv620", "rs780", "rs880")
+        .Case("rv740", "rv770")
+        .Case("palm", "cedar")
+        .Cases("sumo", "sumo2", "sumo")
+        .Case("hemlock", "cypress")
+        .Case("aruba", "cayman")
+        .Default(GPUName);
   }
   return "";
 }
 
 void Clang::AddSparcTargetArgs(const ArgList &Args,
-                             ArgStringList &CmdArgs) const {
+                               ArgStringList &CmdArgs) const {
   const Driver &D = getToolChain().getDriver();
   std::string Triple = getToolChain().ComputeEffectiveClangTriple(Args);
 
@@ -1352,8 +1337,8 @@ void Clang::AddSparcTargetArgs(const ArgList &Args,
   // currently does not support Sparc soft-float, at all, display an
   // error if it's requested.
   if (SoftFloatABI) {
-    D.Diag(diag::err_drv_unsupported_opt_for_target)
-        << "-msoft-float" << Triple;
+    D.Diag(diag::err_drv_unsupported_opt_for_target) << "-msoft-float"
+                                                     << Triple;
   }
 }
 
@@ -1366,16 +1351,14 @@ static const char *getSystemZTargetCPU(const ArgList &Args) {
 static void getSystemZTargetFeatures(const ArgList &Args,
                                      std::vector<const char *> &Features) {
   // -m(no-)htm overrides use of the transactional-execution facility.
-  if (Arg *A = Args.getLastArg(options::OPT_mhtm,
-                               options::OPT_mno_htm)) {
+  if (Arg *A = Args.getLastArg(options::OPT_mhtm, options::OPT_mno_htm)) {
     if (A->getOption().matches(options::OPT_mhtm))
       Features.push_back("+transactional-execution");
     else
       Features.push_back("-transactional-execution");
   }
   // -m(no-)vx overrides use of the vector facility.
-  if (Arg *A = Args.getLastArg(options::OPT_mvx,
-                               options::OPT_mno_vx)) {
+  if (Arg *A = Args.getLastArg(options::OPT_mvx, options::OPT_mno_vx)) {
     if (A->getOption().matches(options::OPT_mvx))
       Features.push_back("+vector");
     else
@@ -1403,6 +1386,28 @@ static const char *getX86TargetCPU(const ArgList &Args,
       return Args.MakeArgString(CPU);
   }
 
+  if (const Arg *A = Args.getLastArg(options::OPT__SLASH_arch)) {
+    // Mapping built by referring to X86TargetInfo::getDefaultFeatures().
+    StringRef Arch = A->getValue();
+    const char *CPU;
+    if (Triple.getArch() == llvm::Triple::x86) {
+      CPU = llvm::StringSwitch<const char *>(Arch)
+                .Case("IA32", "i386")
+                .Case("SSE", "pentium3")
+                .Case("SSE2", "pentium4")
+                .Case("AVX", "sandybridge")
+                .Case("AVX2", "haswell")
+                .Default(nullptr);
+    } else {
+      CPU = llvm::StringSwitch<const char *>(Arch)
+                .Case("AVX", "sandybridge")
+                .Case("AVX2", "haswell")
+                .Default(nullptr);
+    }
+    if (CPU)
+      return CPU;
+  }
+
   // Select the default CPU if none was given (or detection failed).
 
   if (Triple.getArch() != llvm::Triple::x86_64 &&
@@ -1446,7 +1451,7 @@ static const char *getX86TargetCPU(const ArgList &Args,
 }
 
 static std::string getCPUName(const ArgList &Args, const llvm::Triple &T) {
-  switch(T.getArch()) {
+  switch (T.getArch()) {
   default:
     return "";
 
@@ -1517,7 +1522,8 @@ static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
   // as gold requires -plugin to come before any -plugin-opt that -Wl might
   // forward.
   CmdArgs.push_back("-plugin");
-  std::string Plugin = ToolChain.getDriver().Dir + "/../lib" CLANG_LIBDIR_SUFFIX "/LLVMgold.so";
+  std::string Plugin =
+      ToolChain.getDriver().Dir + "/../lib" CLANG_LIBDIR_SUFFIX "/LLVMgold.so";
   CmdArgs.push_back(Args.MakeArgString(Plugin));
 
   // Try to pass driver level flags relevant to LTO code generation down to
@@ -1534,7 +1540,7 @@ static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
 /// parsing the refinement step. Otherwise, return true and set the Position
 /// of the refinement step in the input string.
 static bool getRefinementStep(const StringRef &In, const Driver &D,
-                                const Arg &A, size_t &Position) {
+                              const Arg &A, size_t &Position) {
   const char RefinementStepToken = ':';
   Position = In.find(RefinementStepToken);
   if (Position != StringRef::npos) {
@@ -1597,14 +1603,14 @@ static void ParseMRecip(const Driver &D, const ArgList &Args,
   // and pass through.
 
   llvm::StringMap<bool> OptionStrings;
-  OptionStrings.insert(std::make_pair("divd",       false));
-  OptionStrings.insert(std::make_pair("divf",       false));
-  OptionStrings.insert(std::make_pair("vec-divd",   false));
-  OptionStrings.insert(std::make_pair("vec-divf",   false));
-  OptionStrings.insert(std::make_pair("sqrtd",      false));
-  OptionStrings.insert(std::make_pair("sqrtf",      false));
-  OptionStrings.insert(std::make_pair("vec-sqrtd",  false));
-  OptionStrings.insert(std::make_pair("vec-sqrtf",  false));
+  OptionStrings.insert(std::make_pair("divd", false));
+  OptionStrings.insert(std::make_pair("divf", false));
+  OptionStrings.insert(std::make_pair("vec-divd", false));
+  OptionStrings.insert(std::make_pair("vec-divf", false));
+  OptionStrings.insert(std::make_pair("sqrtd", false));
+  OptionStrings.insert(std::make_pair("sqrtf", false));
+  OptionStrings.insert(std::make_pair("vec-sqrtd", false));
+  OptionStrings.insert(std::make_pair("vec-sqrtf", false));
 
   for (unsigned i = 0; i != NumOptions; ++i) {
     StringRef Val = A->getValue(i);
@@ -1636,7 +1642,7 @@ static void ParseMRecip(const Driver &D, const ArgList &Args,
         return;
       }
     }
-    
+
     if (OptionIter->second == true) {
       // Duplicate option specified.
       D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val;
@@ -1669,8 +1675,8 @@ static void getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
       llvm::StringMap<bool> HostFeatures;
       if (llvm::sys::getHostCPUFeatures(HostFeatures))
         for (auto &F : HostFeatures)
-          Features.push_back(Args.MakeArgString((F.second ? "+" : "-") +
-                                                F.first()));
+          Features.push_back(
+              Args.MakeArgString((F.second ? "+" : "-") + F.first()));
     }
   }
 
@@ -1739,9 +1745,7 @@ static void getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
 
 void Clang::AddX86TargetArgs(const ArgList &Args,
                              ArgStringList &CmdArgs) const {
-  if (!Args.hasFlag(options::OPT_mred_zone,
-                    options::OPT_mno_red_zone,
-                    true) ||
+  if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) ||
       Args.hasArg(options::OPT_mkernel) ||
       Args.hasArg(options::OPT_fapple_kext))
     CmdArgs.push_back("-disable-red-zone");
@@ -1750,10 +1754,9 @@ void Clang::AddX86TargetArgs(const ArgList &Args,
   // that to be overridden with -mno-soft-float.
   bool NoImplicitFloat = (Args.hasArg(options::OPT_mkernel) ||
                           Args.hasArg(options::OPT_fapple_kext));
-  if (Arg *A = Args.getLastArg(options::OPT_msoft_float,
-                               options::OPT_mno_soft_float,
-                               options::OPT_mimplicit_float,
-                               options::OPT_mno_implicit_float)) {
+  if (Arg *A = Args.getLastArg(
+          options::OPT_msoft_float, options::OPT_mno_soft_float,
+          options::OPT_mimplicit_float, options::OPT_mno_implicit_float)) {
     const Option &O = A->getOption();
     NoImplicitFloat = (O.matches(options::OPT_mno_implicit_float) ||
                        O.matches(options::OPT_msoft_float));
@@ -1778,21 +1781,21 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args,
   CmdArgs.push_back("-mqdsp6-compat");
   CmdArgs.push_back("-Wreturn-type");
 
-  if (const char* v = toolchains::Hexagon_TC::GetSmallDataThreshold(Args)) {
-    std::string SmallDataThreshold="-hexagon-small-data-threshold=";
+  if (const char *v = toolchains::Hexagon_TC::GetSmallDataThreshold(Args)) {
+    std::string SmallDataThreshold = "-hexagon-small-data-threshold=";
     SmallDataThreshold += v;
-    CmdArgs.push_back ("-mllvm");
+    CmdArgs.push_back("-mllvm");
     CmdArgs.push_back(Args.MakeArgString(SmallDataThreshold));
   }
 
   if (!Args.hasArg(options::OPT_fno_short_enums))
     CmdArgs.push_back("-fshort-enums");
   if (Args.getLastArg(options::OPT_mieee_rnd_near)) {
-    CmdArgs.push_back ("-mllvm");
-    CmdArgs.push_back ("-enable-hexagon-ieee-rnd-near");
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("-enable-hexagon-ieee-rnd-near");
   }
-  CmdArgs.push_back ("-mllvm");
-  CmdArgs.push_back ("-machine-sink-split=0");
+  CmdArgs.push_back("-mllvm");
+  CmdArgs.push_back("-machine-sink-split=0");
 }
 
 // Decode AArch64 features from string like +[no]featureA+[no]featureB+...
@@ -1801,8 +1804,8 @@ static bool DecodeAArch64Features(const Driver &D, StringRef text,
   SmallVector<StringRef, 8> Split;
   text.split(Split, StringRef("+"), -1, false);
 
-  for (unsigned I = 0, E = Split.size(); I != E; ++I) {
-    const char *result = llvm::StringSwitch<const char *>(Split[I])
+  for (const StringRef Feature : Split) {
+    const char *result = llvm::StringSwitch<const char *>(Feature)
                              .Case("fp", "+fp-armv8")
                              .Case("simd", "+neon")
                              .Case("crc", "+crc")
@@ -1814,7 +1817,7 @@ static bool DecodeAArch64Features(const Driver &D, StringRef text,
                              .Default(nullptr);
     if (result)
       Features.push_back(result);
-    else if (Split[I] == "neon" || Split[I] == "noneon")
+    else if (Feature == "neon" || Feature == "noneon")
       D.Diag(diag::err_drv_no_neon_modifier);
     else
       return false;
@@ -1828,7 +1831,8 @@ static bool DecodeAArch64Mcpu(const Driver &D, StringRef Mcpu, StringRef &CPU,
                               std::vector<const char *> &Features) {
   std::pair<StringRef, StringRef> Split = Mcpu.split("+");
   CPU = Split.first;
-  if (CPU == "cyclone" || CPU == "cortex-a53" || CPU == "cortex-a57" || CPU == "cortex-a72") {
+  if (CPU == "cyclone" || CPU == "cortex-a53" || CPU == "cortex-a57" ||
+      CPU == "cortex-a72") {
     Features.push_back("+neon");
     Features.push_back("+crc");
     Features.push_back("+crypto");
@@ -1851,12 +1855,9 @@ getAArch64ArchFeaturesFromMarch(const Driver &D, StringRef March,
   std::string MarchLowerCase = March.lower();
   std::pair<StringRef, StringRef> Split = StringRef(MarchLowerCase).split("+");
 
-  if (Split.first == "armv8-a" ||
-      Split.first == "armv8a") {
+  if (Split.first == "armv8-a" || Split.first == "armv8a") {
     // ok, no additional features.
-  } else if (
-      Split.first == "armv8.1-a" ||
-      Split.first == "armv8.1a" ) {
+  } else if (Split.first == "armv8.1-a" || Split.first == "armv8.1a") {
     Features.push_back("+v8.1a");
   } else {
     return false;
@@ -1941,8 +1942,7 @@ static void getAArch64TargetFeatures(const Driver &D, const ArgList &Args,
   }
 
   // En/disable crc
-  if (Arg *A = Args.getLastArg(options::OPT_mcrc,
-                               options::OPT_mnocrc)) {
+  if (Arg *A = Args.getLastArg(options::OPT_mcrc, options::OPT_mnocrc)) {
     if (A->getOption().matches(options::OPT_mcrc))
       Features.push_back("+crc");
     else
@@ -2023,7 +2023,7 @@ shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime,
   if (!Triple.isMacOSX())
     return false;
 
-  return (!Triple.isMacOSXVersionLT(10,5) &&
+  return (!Triple.isMacOSXVersionLT(10, 5) &&
           (Triple.getArch() == llvm::Triple::x86_64 ||
            Triple.getArch() == llvm::Triple::arm));
 }
@@ -2069,8 +2069,7 @@ static void addExceptionArgs(const ArgList &Args, types::ID InputType,
   // is not necessarily sensible, but follows GCC.
   if (types::isObjC(InputType) &&
       Args.hasFlag(options::OPT_fobjc_exceptions,
-                   options::OPT_fno_objc_exceptions,
-                   true)) {
+                   options::OPT_fno_objc_exceptions, true)) {
     CmdArgs.push_back("-fobjc-exceptions");
 
     EH |= shouldUseExceptionTablesForObjCExceptions(objcRuntime, Triple);
@@ -2113,8 +2112,7 @@ static void addExceptionArgs(const ArgList &Args, types::ID InputType,
     CmdArgs.push_back("-fexceptions");
 }
 
-static bool ShouldDisableAutolink(const ArgList &Args,
-                             const ToolChain &TC) {
+static bool ShouldDisableAutolink(const ArgList &Args, const ToolChain &TC) {
   bool Default = true;
   if (TC.getTriple().isOSDarwin()) {
     // The native darwin assembler doesn't support the linker_option directives,
@@ -2127,9 +2125,9 @@ static bool ShouldDisableAutolink(const ArgList &Args,
 
 static bool ShouldDisableDwarfDirectory(const ArgList &Args,
                                         const ToolChain &TC) {
-  bool UseDwarfDirectory = Args.hasFlag(options::OPT_fdwarf_directory_asm,
-                                        options::OPT_fno_dwarf_directory_asm,
-                                        TC.useIntegratedAs());
+  bool UseDwarfDirectory =
+      Args.hasFlag(options::OPT_fdwarf_directory_asm,
+                   options::OPT_fno_dwarf_directory_asm, TC.useIntegratedAs());
   return !UseDwarfDirectory;
 }
 
@@ -2164,79 +2162,78 @@ static bool UseRelaxAll(Compilation &C, const ArgList &Args) {
   }
 
   return Args.hasFlag(options::OPT_mrelax_all, options::OPT_mno_relax_all,
-    RelaxDefault);
+                      RelaxDefault);
 }
 
 static void CollectArgsForIntegratedAssembler(Compilation &C,
                                               const ArgList &Args,
                                               ArgStringList &CmdArgs,
                                               const Driver &D) {
-    if (UseRelaxAll(C, Args))
-      CmdArgs.push_back("-mrelax-all");
-
-    // When passing -I arguments to the assembler we sometimes need to
-    // unconditionally take the next argument.  For example, when parsing
-    // '-Wa,-I -Wa,foo' we need to accept the -Wa,foo arg after seeing the
-    // -Wa,-I arg and when parsing '-Wa,-I,foo' we need to accept the 'foo'
-    // arg after parsing the '-I' arg.
-    bool TakeNextArg = false;
-
-    // When using an integrated assembler, translate -Wa, and -Xassembler
-    // options.
-    bool CompressDebugSections = false;
-    for (const Arg *A :
-         Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) {
-      A->claim();
+  if (UseRelaxAll(C, Args))
+    CmdArgs.push_back("-mrelax-all");
+
+  // When passing -I arguments to the assembler we sometimes need to
+  // unconditionally take the next argument.  For example, when parsing
+  // '-Wa,-I -Wa,foo' we need to accept the -Wa,foo arg after seeing the
+  // -Wa,-I arg and when parsing '-Wa,-I,foo' we need to accept the 'foo'
+  // arg after parsing the '-I' arg.
+  bool TakeNextArg = false;
+
+  // When using an integrated assembler, translate -Wa, and -Xassembler
+  // options.
+  bool CompressDebugSections = false;
+  for (const Arg *A :
+       Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) {
+    A->claim();
 
-      for (unsigned i = 0, e = A->getNumValues(); i != e; ++i) {
-        StringRef Value = A->getValue(i);
+      for (const StringRef Value : A->getValues()) {
         if (TakeNextArg) {
           CmdArgs.push_back(Value.data());
           TakeNextArg = false;
           continue;
         }
 
-        if (Value == "-force_cpusubtype_ALL") {
-          // Do nothing, this is the default and we don't support anything else.
-        } else if (Value == "-L") {
-          CmdArgs.push_back("-msave-temp-labels");
-        } else if (Value == "--fatal-warnings") {
-          CmdArgs.push_back("-massembler-fatal-warnings");
-        } else if (Value == "--noexecstack") {
-          CmdArgs.push_back("-mnoexecstack");
-        } else if (Value == "-compress-debug-sections" ||
-                   Value == "--compress-debug-sections") {
-          CompressDebugSections = true;
-        } else if (Value == "-nocompress-debug-sections" ||
-                   Value == "--nocompress-debug-sections") {
-          CompressDebugSections = false;
-        } else if (Value.startswith("-I")) {
-          CmdArgs.push_back(Value.data());
-          // We need to consume the next argument if the current arg is a plain
-          // -I. The next arg will be the include directory.
-          if (Value == "-I")
-            TakeNextArg = true;
-        } else if (Value.startswith("-gdwarf-")) {
-          CmdArgs.push_back(Value.data());
-        } else {
-          D.Diag(diag::err_drv_unsupported_option_argument)
+      if (Value == "-force_cpusubtype_ALL") {
+        // Do nothing, this is the default and we don't support anything else.
+      } else if (Value == "-L") {
+        CmdArgs.push_back("-msave-temp-labels");
+      } else if (Value == "--fatal-warnings") {
+        CmdArgs.push_back("-massembler-fatal-warnings");
+      } else if (Value == "--noexecstack") {
+        CmdArgs.push_back("-mnoexecstack");
+      } else if (Value == "-compress-debug-sections" ||
+                 Value == "--compress-debug-sections") {
+        CompressDebugSections = true;
+      } else if (Value == "-nocompress-debug-sections" ||
+                 Value == "--nocompress-debug-sections") {
+        CompressDebugSections = false;
+      } else if (Value.startswith("-I")) {
+        CmdArgs.push_back(Value.data());
+        // We need to consume the next argument if the current arg is a plain
+        // -I. The next arg will be the include directory.
+        if (Value == "-I")
+          TakeNextArg = true;
+      } else if (Value.startswith("-gdwarf-")) {
+        CmdArgs.push_back(Value.data());
+      } else {
+        D.Diag(diag::err_drv_unsupported_option_argument)
             << A->getOption().getName() << Value;
-        }
       }
     }
-    if (CompressDebugSections) {
-      if (llvm::zlib::isAvailable())
-        CmdArgs.push_back("-compress-debug-sections");
-      else
-        D.Diag(diag::warn_debug_compression_unavailable);
-    }
+  }
+  if (CompressDebugSections) {
+    if (llvm::zlib::isAvailable())
+      CmdArgs.push_back("-compress-debug-sections");
+    else
+      D.Diag(diag::warn_debug_compression_unavailable);
+  }
 }
 
 // Until ARM libraries are build separately, we have them all in one library
 static StringRef getArchNameForCompilerRTLib(const ToolChain &TC) {
-  // FIXME: handle 64-bit
   if (TC.getTriple().isOSWindows() &&
-      !TC.getTriple().isWindowsItaniumEnvironment())
+      !TC.getTriple().isWindowsItaniumEnvironment() &&
+      TC.getArch() == llvm::Triple::x86)
     return "i386";
   if (TC.getArch() == llvm::Triple::arm || TC.getArch() == llvm::Triple::armeb)
     return "arm";
@@ -2254,8 +2251,8 @@ static SmallString<128> getCompilerRTLibDir(const ToolChain &TC) {
   return Res;
 }
 
-static SmallString<128> getCompilerRT(const ToolChain &TC, StringRef Component,
-                                      bool Shared = false) {
+SmallString<128> tools::getCompilerRT(const ToolChain &TC, StringRef Component,
+                                      bool Shared) {
   const char *Env = TC.getTriple().getEnvironment() == llvm::Triple::Android
                         ? "-android"
                         : "";
@@ -2334,15 +2331,15 @@ static OpenMPRuntimeKind getOpenMPRuntime(const ToolChain &TC,
     RuntimeName = A->getValue();
 
   auto RT = llvm::StringSwitch<OpenMPRuntimeKind>(RuntimeName)
-      .Case("libomp", OMPRT_OMP)
-      .Case("libgomp", OMPRT_GOMP)
-      .Case("libiomp5", OMPRT_IOMP5)
-      .Default(OMPRT_Unknown);
+                .Case("libomp", OMPRT_OMP)
+                .Case("libgomp", OMPRT_GOMP)
+                .Case("libiomp5", OMPRT_IOMP5)
+                .Default(OMPRT_Unknown);
 
   if (RT == OMPRT_Unknown) {
     if (A)
       TC.getDriver().Diag(diag::err_drv_unsupported_option_argument)
-        << A->getOption().getName() << A->getValue();
+          << A->getOption().getName() << A->getValue();
     else
       // FIXME: We could use a nicer diagnostic here.
       TC.getDriver().Diag(diag::err_drv_unsupported_opt) << "-fopenmp";
@@ -2537,8 +2534,7 @@ static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs) {
   }
 }
 
-static const char *SplitDebugName(const ArgList &Args,
-                                  const InputInfo &Input) {
+static const char *SplitDebugName(const ArgList &Args, const InputInfo &Input) {
   Arg *FinalOutput = Args.getLastArg(options::OPT_o);
   if (FinalOutput && Args.hasArg(options::OPT_c)) {
     SmallString<128> T(FinalOutput->getValue());
@@ -2555,10 +2551,9 @@ static const char *SplitDebugName(const ArgList &Args,
   }
 }
 
-static void SplitDebugInfo(const ToolChain &TC, Compilation &C,
-                           const Tool &T, const JobAction &JA,
-                           const ArgList &Args, const InputInfo &Output,
-                           const char *OutFile) {
+static void SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T,
+                           const JobAction &JA, const ArgList &Args,
+                           const InputInfo &Output, const char *OutFile) {
   ArgStringList ExtractArgs;
   ExtractArgs.push_back("--extract-dwo");
 
@@ -2570,8 +2565,7 @@ static void SplitDebugInfo(const ToolChain &TC, Compilation &C,
   ExtractArgs.push_back(Output.getFilename());
   ExtractArgs.push_back(OutFile);
 
-  const char *Exec =
-    Args.MakeArgString(TC.GetProgramPath("objcopy"));
+  const char *Exec = Args.MakeArgString(TC.GetProgramPath("objcopy"));
 
   // First extract the dwo sections.
   C.addCommand(llvm::make_unique<Command>(JA, T, Exec, ExtractArgs));
@@ -2635,7 +2629,7 @@ static VersionTuple getMSCompatibilityVersion(unsigned Version) {
     return VersionTuple(Version / 100, Version % 100);
 
   unsigned Build = 0, Factor = 1;
-  for ( ; Version > 10000; Version = Version / 10, Factor = Factor * 10)
+  for (; Version > 10000; Version = Version / 10, Factor = Factor * 10)
     Build = Build + (Version % 10) * Factor;
   return VersionTuple(Version / 100, Version % 100, Build);
 }
@@ -2672,7 +2666,7 @@ static void appendUserToPath(SmallVectorImpl<char> &Result) {
     }
   }
 
-  // Fallback to user id.
+// Fallback to user id.
 #ifdef LLVM_ON_UNIX
   std::string UID = llvm::utostr(getuid());
 #else
@@ -2692,7 +2686,7 @@ VersionTuple visualstudio::getMSVCVersion(const Driver *D,
       Args.hasArg(options::OPT_fms_compatibility_version)) {
     const Arg *MSCVersion = Args.getLastArg(options::OPT_fmsc_version);
     const Arg *MSCompatibilityVersion =
-      Args.getLastArg(options::OPT_fms_compatibility_version);
+        Args.getLastArg(options::OPT_fms_compatibility_version);
 
     if (MSCVersion && MSCompatibilityVersion) {
       if (D)
@@ -2730,12 +2724,10 @@ VersionTuple visualstudio::getMSVCVersion(const Driver *D,
 }
 
 void Clang::ConstructJob(Compilation &C, const JobAction &JA,
-                         const InputInfo &Output,
-                         const InputInfoList &Inputs,
-                         const ArgList &Args,
-                         const char *LinkingOutput) const {
-  bool KernelOrKext = Args.hasArg(options::OPT_mkernel,
-                                  options::OPT_fapple_kext);
+                         const InputInfo &Output, const InputInfoList &Inputs,
+                         const ArgList &Args, const char *LinkingOutput) const {
+  bool KernelOrKext =
+      Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
@@ -2775,7 +2767,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // Select the appropriate action.
   RewriteKind rewriteKind = RK_None;
-  
+
   if (isa<AnalyzeJobAction>(JA)) {
     assert(JA.getType() == types::TY_Plist && "Invalid output type.");
     CmdArgs.push_back("-analyze");
@@ -2796,7 +2788,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CollectArgsForIntegratedAssembler(C, Args, CmdArgs, D);
 
     // Also ignore explicit -force_cpusubtype_ALL option.
-    (void) Args.hasArg(options::OPT_force__cpusubtype__ALL);
+    (void)Args.hasArg(options::OPT_force__cpusubtype__ALL);
   } else if (isa<PrecompileJobAction>(JA)) {
     // Use PCH if the user requested it.
     bool UsePCH = D.CCCUsePCH;
@@ -2834,8 +2826,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-rewrite-objc");
       rewriteKind = RK_Fragile;
     } else {
-      assert(JA.getType() == types::TY_PP_Asm &&
-             "Unexpected output type!");
+      assert(JA.getType() == types::TY_PP_Asm && "Unexpected output type!");
     }
 
     // Preserve use-list order by default when emitting bitcode, so that
@@ -2852,7 +2843,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (!C.isForDiagnostics())
     CmdArgs.push_back("-disable-free");
 
-  // Disable the verification pass in -asserts builds.
+// Disable the verification pass in -asserts builds.
 #ifdef NDEBUG
   CmdArgs.push_back("-disable-llvm-verifier");
 #endif
@@ -2885,9 +2876,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
       if (getToolChain().getTriple().getVendor() == llvm::Triple::Apple)
         CmdArgs.push_back("-analyzer-checker=osx");
-      
+
       CmdArgs.push_back("-analyzer-checker=deadcode");
-      
+
       if (types::isCXX(Input.getType()))
         CmdArgs.push_back("-analyzer-checker=cplusplus");
 
@@ -2896,7 +2887,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
           "-analyzer-checker=security.insecureAPI.UncheckedReturn");
       CmdArgs.push_back("-analyzer-checker=security.insecureAPI.getpw");
       CmdArgs.push_back("-analyzer-checker=security.insecureAPI.gets");
-      CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mktemp");      
+      CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mktemp");
       CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mkstemp");
       CmdArgs.push_back("-analyzer-checker=security.insecureAPI.vfork");
     }
@@ -2980,10 +2971,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // used. If the last argument is any flavor of the '-fno-...' arguments,
   // both PIC and PIE are disabled. Any PIE option implicitly enables PIC
   // at the same level.
-  Arg *LastPICArg =Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
-                                 options::OPT_fpic, options::OPT_fno_pic,
-                                 options::OPT_fPIE, options::OPT_fno_PIE,
-                                 options::OPT_fpie, options::OPT_fno_pie);
+  Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
+                                    options::OPT_fpic, options::OPT_fno_pic,
+                                    options::OPT_fPIE, options::OPT_fno_PIE,
+                                    options::OPT_fpie, options::OPT_fno_pie);
   // Check whether the tool chain trumps the PIC-ness decision. If the PIC-ness
   // is forced, then neither PIC nor PIE flags will have no effect.
   if (!getToolChain().isPICDefaultForced()) {
@@ -2992,10 +2983,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       if (O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic) ||
           O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie)) {
         PIE = O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie);
-        PIC = PIE || O.matches(options::OPT_fPIC) ||
-              O.matches(options::OPT_fpic);
-        IsPICLevelTwo = O.matches(options::OPT_fPIE) ||
-                        O.matches(options::OPT_fPIC);
+        PIC =
+            PIE || O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic);
+        IsPICLevelTwo =
+            O.matches(options::OPT_fPIE) || O.matches(options::OPT_fPIC);
       } else {
         PIE = PIC = false;
       }
@@ -3022,7 +3013,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     // uses it, and it isn't even valid on any OS but Darwin.
     if (!getToolChain().getTriple().isOSDarwin())
       D.Diag(diag::err_drv_unsupported_opt_for_target)
-        << A->getSpelling() << getToolChain().getTriple().str();
+          << A->getSpelling() << getToolChain().getTriple().str();
 
     // FIXME: Warn when this flag trumps some other PIC or PIE flag.
 
@@ -3092,7 +3083,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                                options::OPT_freg_struct_return)) {
     if (getToolChain().getArch() != llvm::Triple::x86) {
       D.Diag(diag::err_drv_unsupported_opt_for_target)
-        << A->getSpelling() << getToolChain().getTriple().str();
+          << A->getSpelling() << getToolChain().getTriple().str();
     } else if (A->getOption().matches(options::OPT_fpcc_struct_return)) {
       CmdArgs.push_back("-fpcc-struct-return");
     } else {
@@ -3113,8 +3104,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   bool OFastEnabled = isOptimizationLevelFast(Args);
   // If -Ofast is the optimization level, then -fstrict-aliasing should be
   // enabled.  This alias option is being used to simplify the hasFlag logic.
-  OptSpecifier StrictAliasingAliasOption = OFastEnabled ? options::OPT_Ofast :
-    options::OPT_fstrict_aliasing;
+  OptSpecifier StrictAliasingAliasOption =
+      OFastEnabled ? options::OPT_Ofast : options::OPT_fstrict_aliasing;
   // We turn strict aliasing off by default if we're in CL mode, since MSVC
   // doesn't do any TBAA.
   bool TBAAOnByDefault = !getToolChain().getDriver().IsCLMode();
@@ -3137,30 +3128,28 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // If -Ofast is the optimization level, then -ffast-math should be enabled.
   // This alias option is being used to simplify the getLastArg logic.
-  OptSpecifier FastMathAliasOption = OFastEnabled ? options::OPT_Ofast :
-    options::OPT_ffast_math;
-  
+  OptSpecifier FastMathAliasOption =
+      OFastEnabled ? options::OPT_Ofast : options::OPT_ffast_math;
+
   // Handle various floating point optimization flags, mapping them to the
   // appropriate LLVM code generation flags. The pattern for all of these is to
   // default off the codegen optimizations, and if any flag enables them and no
   // flag disables them after the flag enabling them, enable the codegen
   // optimization. This is complicated by several "umbrella" flags.
-  if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
-                               options::OPT_fno_fast_math,
-                               options::OPT_ffinite_math_only,
-                               options::OPT_fno_finite_math_only,
-                               options::OPT_fhonor_infinities,
-                               options::OPT_fno_honor_infinities))
+  if (Arg *A = Args.getLastArg(
+          options::OPT_ffast_math, FastMathAliasOption,
+          options::OPT_fno_fast_math, options::OPT_ffinite_math_only,
+          options::OPT_fno_finite_math_only, options::OPT_fhonor_infinities,
+          options::OPT_fno_honor_infinities))
     if (A->getOption().getID() != options::OPT_fno_fast_math &&
         A->getOption().getID() != options::OPT_fno_finite_math_only &&
         A->getOption().getID() != options::OPT_fhonor_infinities)
       CmdArgs.push_back("-menable-no-infs");
-  if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
-                               options::OPT_fno_fast_math,
-                               options::OPT_ffinite_math_only,
-                               options::OPT_fno_finite_math_only,
-                               options::OPT_fhonor_nans,
-                               options::OPT_fno_honor_nans))
+  if (Arg *A = Args.getLastArg(
+          options::OPT_ffast_math, FastMathAliasOption,
+          options::OPT_fno_fast_math, options::OPT_ffinite_math_only,
+          options::OPT_fno_finite_math_only, options::OPT_fhonor_nans,
+          options::OPT_fno_honor_nans))
     if (A->getOption().getID() != options::OPT_fno_fast_math &&
         A->getOption().getID() != options::OPT_fno_finite_math_only &&
         A->getOption().getID() != options::OPT_fhonor_nans)
@@ -3168,10 +3157,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // -fmath-errno is the default on some platforms, e.g. BSD-derived OSes.
   bool MathErrno = getToolChain().IsMathErrnoDefault();
-  if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
-                               options::OPT_fno_fast_math,
-                               options::OPT_fmath_errno,
-                               options::OPT_fno_math_errno)) {
+  if (Arg *A =
+          Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
+                          options::OPT_fno_fast_math, options::OPT_fmath_errno,
+                          options::OPT_fno_math_errno)) {
     // Turning on -ffast_math (with either flag) removes the need for MathErrno.
     // However, turning *off* -ffast_math merely restores the toolchain default
     // (which may be false).
@@ -3190,45 +3179,41 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // entire set of LLVM optimizations, so collect them through all the flag
   // madness.
   bool AssociativeMath = false;
-  if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
-                               options::OPT_fno_fast_math,
-                               options::OPT_funsafe_math_optimizations,
-                               options::OPT_fno_unsafe_math_optimizations,
-                               options::OPT_fassociative_math,
-                               options::OPT_fno_associative_math))
+  if (Arg *A = Args.getLastArg(
+          options::OPT_ffast_math, FastMathAliasOption,
+          options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations,
+          options::OPT_fno_unsafe_math_optimizations,
+          options::OPT_fassociative_math, options::OPT_fno_associative_math))
     if (A->getOption().getID() != options::OPT_fno_fast_math &&
         A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations &&
         A->getOption().getID() != options::OPT_fno_associative_math)
       AssociativeMath = true;
   bool ReciprocalMath = false;
-  if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
-                               options::OPT_fno_fast_math,
-                               options::OPT_funsafe_math_optimizations,
-                               options::OPT_fno_unsafe_math_optimizations,
-                               options::OPT_freciprocal_math,
-                               options::OPT_fno_reciprocal_math))
+  if (Arg *A = Args.getLastArg(
+          options::OPT_ffast_math, FastMathAliasOption,
+          options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations,
+          options::OPT_fno_unsafe_math_optimizations,
+          options::OPT_freciprocal_math, options::OPT_fno_reciprocal_math))
     if (A->getOption().getID() != options::OPT_fno_fast_math &&
         A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations &&
         A->getOption().getID() != options::OPT_fno_reciprocal_math)
       ReciprocalMath = true;
   bool SignedZeros = true;
-  if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
-                               options::OPT_fno_fast_math,
-                               options::OPT_funsafe_math_optimizations,
-                               options::OPT_fno_unsafe_math_optimizations,
-                               options::OPT_fsigned_zeros,
-                               options::OPT_fno_signed_zeros))
+  if (Arg *A = Args.getLastArg(
+          options::OPT_ffast_math, FastMathAliasOption,
+          options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations,
+          options::OPT_fno_unsafe_math_optimizations,
+          options::OPT_fsigned_zeros, options::OPT_fno_signed_zeros))
     if (A->getOption().getID() != options::OPT_fno_fast_math &&
         A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations &&
         A->getOption().getID() != options::OPT_fsigned_zeros)
       SignedZeros = false;
   bool TrappingMath = true;
-  if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
-                               options::OPT_fno_fast_math,
-                               options::OPT_funsafe_math_optimizations,
-                               options::OPT_fno_unsafe_math_optimizations,
-                               options::OPT_ftrapping_math,
-                               options::OPT_fno_trapping_math))
+  if (Arg *A = Args.getLastArg(
+          options::OPT_ffast_math, FastMathAliasOption,
+          options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations,
+          options::OPT_fno_unsafe_math_optimizations,
+          options::OPT_ftrapping_math, options::OPT_fno_trapping_math))
     if (A->getOption().getID() != options::OPT_fno_fast_math &&
         A->getOption().getID() != options::OPT_fno_unsafe_math_optimizations &&
         A->getOption().getID() != options::OPT_ftrapping_math)
@@ -3243,7 +3228,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (ReciprocalMath)
     CmdArgs.push_back("-freciprocal-math");
 
-  // Validate and pass through -fp-contract option. 
+  // Validate and pass through -fp-contract option.
   if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
                                options::OPT_fno_fast_math,
                                options::OPT_ffp_contract)) {
@@ -3253,7 +3238,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
         CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + Val));
       } else {
         D.Diag(diag::err_drv_unsupported_option_argument)
-          << A->getOption().getName() << Val;
+            << A->getOption().getName() << Val;
       }
     } else if (A->getOption().matches(options::OPT_ffast_math) ||
                (OFastEnabled && A->getOption().matches(options::OPT_Ofast))) {
@@ -3261,7 +3246,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast"));
     }
   }
-  
+
   ParseMRecip(getToolChain().getDriver(), Args, CmdArgs);
 
   // We separately look for the '-ffast-math' and '-ffinite-math-only' flags,
@@ -3271,8 +3256,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // and deserialization, etc.
   if (Arg *A = Args.getLastArg(options::OPT_ffast_math, FastMathAliasOption,
                                options::OPT_fno_fast_math))
-      if (!A->getOption().matches(options::OPT_fno_fast_math))
-        CmdArgs.push_back("-ffast-math");
+    if (!A->getOption().matches(options::OPT_fno_fast_math))
+      CmdArgs.push_back("-ffast-math");
   if (Arg *A = Args.getLastArg(options::OPT_ffinite_math_only,
                                options::OPT_fno_fast_math))
     if (A->getOption().matches(options::OPT_ffinite_math_only))
@@ -3335,7 +3320,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   // FIXME: Handle -mtune=.
-  (void) Args.hasArg(options::OPT_mtune_EQ);
+  (void)Args.hasArg(options::OPT_mtune_EQ);
 
   if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) {
     CmdArgs.push_back("-mcode-model");
@@ -3358,7 +3343,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   getTargetFeatures(D, Triple, Args, CmdArgs, false);
 
   // Add target specific flags.
-  switch(getToolChain().getArch()) {
+  switch (getToolChain().getArch()) {
   default:
     break;
 
@@ -3421,13 +3406,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   types::ID InputType = Input.getType();
   if (!Args.hasArg(options::OPT_fallow_unsupported)) {
     Arg *Unsupported;
-    if (types::isCXX(InputType) &&
-        getToolChain().getTriple().isOSDarwin() &&
+    if (types::isCXX(InputType) && getToolChain().getTriple().isOSDarwin() &&
         getToolChain().getArch() == llvm::Triple::x86) {
       if ((Unsupported = Args.getLastArg(options::OPT_fapple_kext)) ||
           (Unsupported = Args.getLastArg(options::OPT_mkernel)))
         D.Diag(diag::err_drv_clang_unsupported_opt_cxx_darwin_i386)
-          << Unsupported->getOption().getName();
+            << Unsupported->getOption().getName();
     }
   }
 
@@ -3435,16 +3419,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddLastArg(CmdArgs, options::OPT_H);
   if (D.CCPrintHeaders && !D.CCGenDiagnostics) {
     CmdArgs.push_back("-header-include-file");
-    CmdArgs.push_back(D.CCPrintHeadersFilename ?
-                      D.CCPrintHeadersFilename : "-");
+    CmdArgs.push_back(D.CCPrintHeadersFilename ? D.CCPrintHeadersFilename
+                                               : "-");
   }
   Args.AddLastArg(CmdArgs, options::OPT_P);
   Args.AddLastArg(CmdArgs, options::OPT_print_ivar_layout);
 
   if (D.CCLogDiagnostics && !D.CCGenDiagnostics) {
     CmdArgs.push_back("-diagnostic-log-file");
-    CmdArgs.push_back(D.CCLogDiagnosticsFilename ?
-                      D.CCLogDiagnosticsFilename : "-");
+    CmdArgs.push_back(D.CCLogDiagnosticsFilename ? D.CCLogDiagnosticsFilename
+                                                 : "-");
   }
 
   // Use the last option from "-g" group. "-gline-tables-only" and "-gdwarf-x"
@@ -3525,8 +3509,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-ffunction-sections");
   }
 
-  if (Args.hasFlag(options::OPT_fdata_sections,
-                   options::OPT_fno_data_sections, UseSeparateSections)) {
+  if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections,
+                   UseSeparateSections)) {
     CmdArgs.push_back("-fdata-sections");
   }
 
@@ -3541,7 +3525,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       (Args.hasArg(options::OPT_fprofile_instr_use) ||
        Args.hasArg(options::OPT_fprofile_instr_use_EQ)))
     D.Diag(diag::err_drv_argument_not_allowed_with)
-      << "-fprofile-instr-generate" << "-fprofile-instr-use";
+        << "-fprofile-instr-generate"
+        << "-fprofile-instr-use";
 
   if (Arg *A = Args.getLastArg(options::OPT_fprofile_instr_generate_EQ))
     A->render(Args, CmdArgs);
@@ -3565,7 +3550,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       !(Args.hasArg(options::OPT_fprofile_instr_generate) ||
         Args.hasArg(options::OPT_fprofile_instr_generate_EQ)))
     D.Diag(diag::err_drv_argument_only_allowed_with)
-      << "-fcoverage-mapping" << "-fprofile-instr-generate";
+        << "-fcoverage-mapping"
+        << "-fprofile-instr-generate";
 
   if (Args.hasArg(options::OPT_fcoverage_mapping))
     CmdArgs.push_back("-fcoverage-mapping");
@@ -3597,7 +3583,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-nobuiltininc");
   } else {
     if (Args.hasArg(options::OPT_nostdlibinc))
-        CmdArgs.push_back("-nostdsysteminc");
+      CmdArgs.push_back("-nostdsysteminc");
     Args.AddLastArg(CmdArgs, options::OPT_nostdincxx);
     Args.AddLastArg(CmdArgs, options::OPT_nobuiltininc);
   }
@@ -3641,8 +3627,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   if (const Arg *A = Args.getLastArg(options::OPT_ccc_objcmt_migrate)) {
     if (ARCMTEnabled) {
-      D.Diag(diag::err_drv_argument_not_allowed_with)
-        << A->getAsString(Args) << "-ccc-arcmt-migrate";
+      D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
+                                                      << "-ccc-arcmt-migrate";
     }
     CmdArgs.push_back("-mt-migrate-directory");
     CmdArgs.push_back(A->getValue());
@@ -3745,8 +3731,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     // eventually we want to do all the standard defaulting here instead of
     // splitting it between the driver and clang -cc1.
     if (!types::isCXX(InputType))
-      Args.AddAllArgsTranslated(CmdArgs, options::OPT_std_default_EQ,
-                                "-std=", /*Joined=*/true);
+      Args.AddAllArgsTranslated(CmdArgs, options::OPT_std_default_EQ, "-std=",
+                                /*Joined=*/true);
     else if (IsWindowsMSVC)
       ImplyVCPPCXXVer = true;
 
@@ -3835,7 +3821,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-Wlarge-by-value-copy=64"); // default value
   }
 
-
   if (Args.hasArg(options::OPT_relocatable_pch))
     CmdArgs.push_back("-relocatable-pch");
 
@@ -3917,8 +3902,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddLastArg(CmdArgs, options::OPT_fno_standalone_debug);
   Args.AddLastArg(CmdArgs, options::OPT_fno_operator_names);
   // AltiVec language extensions aren't relevant for assembling.
-  if (!isa<PreprocessJobAction>(JA) || 
-      Output.getType() != types::TY_PP_Asm)
+  if (!isa<PreprocessJobAction>(JA) || Output.getType() != types::TY_PP_Asm)
     Args.AddLastArg(CmdArgs, options::OPT_faltivec);
   Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_show_template_tree);
   Args.AddLastArg(CmdArgs, options::OPT_fno_elide_type);
@@ -3943,7 +3927,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     }
 
   const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs();
-  Sanitize.addArgs(Args, CmdArgs);
+  Sanitize.addArgs(getToolChain(), Args, CmdArgs, InputType);
 
   // Report an error for -faltivec on anything other than PowerPC.
   if (const Arg *A = Args.getLastArg(options::OPT_faltivec)) {
@@ -3980,8 +3964,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // -fno-strict-overflow implies -fwrapv if it isn't disabled, but
   // -fstrict-overflow won't turn off an explicitly enabled -fwrapv.
-  if (Arg *A = Args.getLastArg(options::OPT_fwrapv,
-                               options::OPT_fno_wrapv)) {
+  if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) {
     if (A->getOption().matches(options::OPT_fwrapv))
       CmdArgs.push_back("-fwrapv");
   } else if (Arg *A = Args.getLastArg(options::OPT_fstrict_overflow,
@@ -4001,7 +3984,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   Args.AddLastArg(CmdArgs, options::OPT_pthread);
 
-
   // -stack-protector=0 is default.
   unsigned StackProtectorLevel = 0;
   if (getToolChain().getSanitizerArgs().needsSafeStackRt()) {
@@ -4010,19 +3992,20 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     Args.ClaimAllArgs(options::OPT_fstack_protector_strong);
     Args.ClaimAllArgs(options::OPT_fstack_protector);
   } else if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector,
-                               options::OPT_fstack_protector_all,
-                               options::OPT_fstack_protector_strong,
-                               options::OPT_fstack_protector)) {
+                                      options::OPT_fstack_protector_all,
+                                      options::OPT_fstack_protector_strong,
+                                      options::OPT_fstack_protector)) {
     if (A->getOption().matches(options::OPT_fstack_protector)) {
-      StackProtectorLevel = std::max<unsigned>(LangOptions::SSPOn,
-        getToolChain().GetDefaultStackProtectorLevel(KernelOrKext));
+      StackProtectorLevel = std::max<unsigned>(
+          LangOptions::SSPOn,
+          getToolChain().GetDefaultStackProtectorLevel(KernelOrKext));
     } else if (A->getOption().matches(options::OPT_fstack_protector_strong))
       StackProtectorLevel = LangOptions::SSPStrong;
     else if (A->getOption().matches(options::OPT_fstack_protector_all))
       StackProtectorLevel = LangOptions::SSPReq;
   } else {
     StackProtectorLevel =
-      getToolChain().GetDefaultStackProtectorLevel(KernelOrKext);
+        getToolChain().GetDefaultStackProtectorLevel(KernelOrKext);
   }
   if (StackProtectorLevel) {
     CmdArgs.push_back("-stack-protector");
@@ -4049,7 +4032,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-force-align-stack");
   }
   if (!Args.hasFlag(options::OPT_mno_stackrealign, options::OPT_mstackrealign,
-                   false)) {
+                    false)) {
     CmdArgs.push_back(Args.MakeArgString("-mstackrealign"));
   }
 
@@ -4126,12 +4109,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // -fblocks=0 is default.
   if (Args.hasFlag(options::OPT_fblocks, options::OPT_fno_blocks,
                    getToolChain().IsBlocksDefault()) ||
-        (Args.hasArg(options::OPT_fgnu_runtime) &&
-         Args.hasArg(options::OPT_fobjc_nonfragile_abi) &&
-         !Args.hasArg(options::OPT_fno_blocks))) {
+      (Args.hasArg(options::OPT_fgnu_runtime) &&
+       Args.hasArg(options::OPT_fobjc_nonfragile_abi) &&
+       !Args.hasArg(options::OPT_fno_blocks))) {
     CmdArgs.push_back("-fblocks");
 
-    if (!Args.hasArg(options::OPT_fgnu_runtime) && 
+    if (!Args.hasArg(options::OPT_fgnu_runtime) &&
         !getToolChain().hasBlocksRuntime())
       CmdArgs.push_back("-fblocks-runtime-optional");
   }
@@ -4141,9 +4124,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // C++/Objective-C++ programs.
   bool HaveModules = false;
   if (Args.hasFlag(options::OPT_fmodules, options::OPT_fno_modules, false)) {
-    bool AllowedInCXX = Args.hasFlag(options::OPT_fcxx_modules, 
-                                     options::OPT_fno_cxx_modules, 
-                                     true);
+    bool AllowedInCXX = Args.hasFlag(options::OPT_fcxx_modules,
+                                     options::OPT_fno_cxx_modules, true);
     if (AllowedInCXX || !types::isCXX(InputType)) {
       CmdArgs.push_back("-fmodules");
       HaveModules = true;
@@ -4160,16 +4142,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // -fmodules-decluse checks that modules used are declared so (off by
   // default).
   if (Args.hasFlag(options::OPT_fmodules_decluse,
-                   options::OPT_fno_modules_decluse,
-                   false)) {
+                   options::OPT_fno_modules_decluse, false)) {
     CmdArgs.push_back("-fmodules-decluse");
   }
 
   // -fmodules-strict-decluse is like -fmodule-decluse, but also checks that
   // all #included headers are part of modules.
   if (Args.hasFlag(options::OPT_fmodules_strict_decluse,
-                   options::OPT_fno_modules_strict_decluse,
-                   false)) {
+                   options::OPT_fno_modules_strict_decluse, false)) {
     CmdArgs.push_back("-fmodules-strict-decluse");
   }
 
@@ -4264,14 +4244,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // -faccess-control is default.
   if (Args.hasFlag(options::OPT_fno_access_control,
-                   options::OPT_faccess_control,
-                   false))
+                   options::OPT_faccess_control, false))
     CmdArgs.push_back("-fno-access-control");
 
   // -felide-constructors is the default.
   if (Args.hasFlag(options::OPT_fno_elide_constructors,
-                   options::OPT_felide_constructors,
-                   false))
+                   options::OPT_felide_constructors, false))
     CmdArgs.push_back("-fno-elide-constructors");
 
   ToolChain::RTTIMode RTTIMode = getToolChain().getRTTIMode();
@@ -4282,10 +4260,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-fno-rtti");
 
   // -fshort-enums=0 is default for all architectures except Hexagon.
-  if (Args.hasFlag(options::OPT_fshort_enums,
-                   options::OPT_fno_short_enums,
-                   getToolChain().getArch() ==
-                   llvm::Triple::hexagon))
+  if (Args.hasFlag(options::OPT_fshort_enums, options::OPT_fno_short_enums,
+                   getToolChain().getArch() == llvm::Triple::hexagon))
     CmdArgs.push_back("-fshort-enums");
 
   // -fsigned-char is default.
@@ -4304,8 +4280,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (!Args.hasFlag(options::OPT_fuse_cxa_atexit,
                     options::OPT_fno_use_cxa_atexit,
                     !IsWindowsCygnus && !IsWindowsGNU &&
-                    getToolChain().getArch() != llvm::Triple::hexagon &&
-                    getToolChain().getArch() != llvm::Triple::xcore) ||
+                        getToolChain().getArch() != llvm::Triple::hexagon &&
+                        getToolChain().getArch() != llvm::Triple::xcore) ||
       KernelOrKext)
     CmdArgs.push_back("-fno-use-cxa-atexit");
 
@@ -4320,11 +4296,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-fuse-line-directives");
 
   // -fms-compatibility=0 is default.
-  if (Args.hasFlag(options::OPT_fms_compatibility, 
+  if (Args.hasFlag(options::OPT_fms_compatibility,
                    options::OPT_fno_ms_compatibility,
-                   (IsWindowsMSVC && Args.hasFlag(options::OPT_fms_extensions,
-                                                  options::OPT_fno_ms_extensions,
-                                                  true))))
+                   (IsWindowsMSVC &&
+                    Args.hasFlag(options::OPT_fms_extensions,
+                                 options::OPT_fno_ms_extensions, true))))
     CmdArgs.push_back("-fms-compatibility");
 
   // -fms-compatibility-version=18.00 is default.
@@ -4366,8 +4342,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                                options::OPT_fno_gnu_keywords))
     A->render(Args, CmdArgs);
 
-  if (Args.hasFlag(options::OPT_fgnu89_inline,
-                   options::OPT_fno_gnu89_inline,
+  if (Args.hasFlag(options::OPT_fgnu89_inline, options::OPT_fno_gnu89_inline,
                    false))
     CmdArgs.push_back("-fgnu89-inline");
 
@@ -4387,7 +4362,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     if (!Args.hasFlag(options::OPT_fobjc_legacy_dispatch,
                       options::OPT_fno_objc_legacy_dispatch,
                       objcRuntime.isLegacyDispatchDefaultForArch(
-                        getToolChain().getArch()))) {
+                          getToolChain().getArch()))) {
       if (getToolChain().UseObjCMixedDispatch())
         CmdArgs.push_back("-fobjc-dispatch-method=mixed");
       else
@@ -4404,12 +4379,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       objcRuntime.getKind() == ObjCRuntime::FragileMacOSX &&
       objcRuntime.isNeXTFamily())
     CmdArgs.push_back("-fobjc-subscripting-legacy-runtime");
-  
+
   // -fencode-extended-block-signature=1 is default.
   if (getToolChain().IsEncodeExtendedBlockSignatureDefault()) {
     CmdArgs.push_back("-fencode-extended-block-signature");
   }
-  
+
   // Allow -fno-objc-arr to trump -fobjc-arr/-fobjc-arc.
   // NOTE: This logic is duplicated in ToolChains.cpp.
   bool ARC = isObjCAutoRefCount(Args);
@@ -4448,14 +4423,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     GCArg = Args.getLastArg(options::OPT_fobjc_gc);
   if (GCArg) {
     if (ARC) {
-      D.Diag(diag::err_drv_objc_gc_arr)
-        << GCArg->getAsString(Args);
+      D.Diag(diag::err_drv_objc_gc_arr) << GCArg->getAsString(Args);
     } else if (getToolChain().SupportsObjCGC()) {
       GCArg->render(Args, CmdArgs);
     } else {
       // FIXME: We should move this to a hard error.
-      D.Diag(diag::warn_drv_objc_gc_unsupported)
-        << GCArg->getAsString(Args);
+      D.Diag(diag::warn_drv_objc_gc_unsupported) << GCArg->getAsString(Args);
     }
   }
 
@@ -4465,8 +4438,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // Handle GCC-style exception args.
   if (!C.getDriver().IsCLMode())
-    addExceptionArgs(Args, InputType, getToolChain(), KernelOrKext,
-                     objcRuntime, CmdArgs);
+    addExceptionArgs(Args, InputType, getToolChain(), KernelOrKext, objcRuntime,
+                     CmdArgs);
 
   if (getToolChain().UseSjLjExceptions())
     CmdArgs.push_back("-fsjlj-exceptions");
@@ -4498,8 +4471,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // -fno-pascal-strings is default, only pass non-default.
   if (Args.hasFlag(options::OPT_fpascal_strings,
-                   options::OPT_fno_pascal_strings,
-                   false))
+                   options::OPT_fno_pascal_strings, false))
     CmdArgs.push_back("-fpascal-strings");
 
   // Honor -fpack-struct= and -fpack-struct, if given. Note that
@@ -4543,28 +4515,27 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (!Args.hasFlag(options::OPT_fsigned_bitfields,
                     options::OPT_funsigned_bitfields))
     D.Diag(diag::warn_drv_clang_unsupported)
-      << Args.getLastArg(options::OPT_funsigned_bitfields)->getAsString(Args);
+        << Args.getLastArg(options::OPT_funsigned_bitfields)->getAsString(Args);
 
   // -fsigned-bitfields is default, and clang doesn't support -fno-for-scope.
-  if (!Args.hasFlag(options::OPT_ffor_scope,
-                    options::OPT_fno_for_scope))
+  if (!Args.hasFlag(options::OPT_ffor_scope, options::OPT_fno_for_scope))
     D.Diag(diag::err_drv_clang_unsupported)
-      << Args.getLastArg(options::OPT_fno_for_scope)->getAsString(Args);
+        << Args.getLastArg(options::OPT_fno_for_scope)->getAsString(Args);
 
   // -finput_charset=UTF-8 is default. Reject others
-  if (Arg *inputCharset = Args.getLastArg(
-          options::OPT_finput_charset_EQ)) {
-      StringRef value = inputCharset->getValue();
-      if (value != "UTF-8")
-          D.Diag(diag::err_drv_invalid_value) << inputCharset->getAsString(Args) << value;
+  if (Arg *inputCharset = Args.getLastArg(options::OPT_finput_charset_EQ)) {
+    StringRef value = inputCharset->getValue();
+    if (value != "UTF-8")
+      D.Diag(diag::err_drv_invalid_value) << inputCharset->getAsString(Args)
+                                          << value;
   }
 
   // -fexec_charset=UTF-8 is default. Reject others
-  if (Arg *execCharset = Args.getLastArg(
-          options::OPT_fexec_charset_EQ)) {
-      StringRef value = execCharset->getValue();
-      if (value != "UTF-8")
-          D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args) << value;
+  if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) {
+    StringRef value = execCharset->getValue();
+    if (value != "UTF-8")
+      D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args)
+                                          << value;
   }
 
   // -fcaret-diagnostics is default.
@@ -4583,22 +4554,21 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-fdiagnostics-show-option");
 
   if (const Arg *A =
-        Args.getLastArg(options::OPT_fdiagnostics_show_category_EQ)) {
+          Args.getLastArg(options::OPT_fdiagnostics_show_category_EQ)) {
     CmdArgs.push_back("-fdiagnostics-show-category");
     CmdArgs.push_back(A->getValue());
   }
 
-  if (const Arg *A =
-        Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) {
+  if (const Arg *A = Args.getLastArg(options::OPT_fdiagnostics_format_EQ)) {
     CmdArgs.push_back("-fdiagnostics-format");
     CmdArgs.push_back(A->getValue());
   }
 
   if (Arg *A = Args.getLastArg(
-      options::OPT_fdiagnostics_show_note_include_stack,
-      options::OPT_fno_diagnostics_show_note_include_stack)) {
+          options::OPT_fdiagnostics_show_note_include_stack,
+          options::OPT_fno_diagnostics_show_note_include_stack)) {
     if (A->getOption().matches(
-        options::OPT_fdiagnostics_show_note_include_stack))
+            options::OPT_fdiagnostics_show_note_include_stack))
       CmdArgs.push_back("-fdiagnostics-show-note-include-stack");
     else
       CmdArgs.push_back("-fno-diagnostics-show-note-include-stack");
@@ -4636,7 +4606,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
         ShowColors = Colors_Auto;
       else
         getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported)
-          << ("-fdiagnostics-color=" + value).str();
+            << ("-fdiagnostics-color=" + value).str();
     }
   }
   if (ShowColors == Colors_On ||
@@ -4650,8 +4620,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                     options::OPT_fno_show_source_location))
     CmdArgs.push_back("-fno-show-source-location");
 
-  if (!Args.hasFlag(options::OPT_fshow_column,
-                    options::OPT_fno_show_column,
+  if (!Args.hasFlag(options::OPT_fshow_column, options::OPT_fno_show_column,
                     true))
     CmdArgs.push_back("-fno-show-column");
 
@@ -4659,7 +4628,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                     options::OPT_fno_spell_checking))
     CmdArgs.push_back("-fno-spell-checking");
 
-
   // -fno-asm-blocks is default.
   if (Args.hasFlag(options::OPT_fasm_blocks, options::OPT_fno_asm_blocks,
                    false))
@@ -4674,16 +4642,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // selected. For optimization levels that want vectorization we use the alias
   // option to simplify the hasFlag logic.
   bool EnableVec = shouldEnableVectorizerAtOLevel(Args, false);
-  OptSpecifier VectorizeAliasOption = EnableVec ? options::OPT_O_Group :
-    options::OPT_fvectorize;
+  OptSpecifier VectorizeAliasOption =
+      EnableVec ? options::OPT_O_Group : options::OPT_fvectorize;
   if (Args.hasFlag(options::OPT_fvectorize, VectorizeAliasOption,
                    options::OPT_fno_vectorize, EnableVec))
     CmdArgs.push_back("-vectorize-loops");
 
   // -fslp-vectorize is enabled based on the optimization level selected.
   bool EnableSLPVec = shouldEnableVectorizerAtOLevel(Args, true);
-  OptSpecifier SLPVectAliasOption = EnableSLPVec ? options::OPT_O_Group :
-    options::OPT_fslp_vectorize;
+  OptSpecifier SLPVectAliasOption =
+      EnableSLPVec ? options::OPT_O_Group : options::OPT_fslp_vectorize;
   if (Args.hasFlag(options::OPT_fslp_vectorize, SLPVectAliasOption,
                    options::OPT_fno_slp_vectorize, EnableSLPVec))
     CmdArgs.push_back("-vectorize-slp");
@@ -4718,16 +4686,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                    options::OPT_fno_apple_pragma_pack, false))
     CmdArgs.push_back("-fapple-pragma-pack");
 
-  // le32-specific flags: 
+  // le32-specific flags:
   //  -fno-math-builtin: clang should not convert math builtins to intrinsics
   //                     by default.
   if (getToolChain().getArch() == llvm::Triple::le32) {
     CmdArgs.push_back("-fno-math-builtin");
   }
 
-  // Default to -fno-builtin-str{cat,cpy} on Darwin for ARM.
-  //
-  // FIXME: This is disabled until clang -cc1 supports -fno-builtin-foo. PR4941.
+// Default to -fno-builtin-str{cat,cpy} on Darwin for ARM.
+//
+// FIXME: This is disabled until clang -cc1 supports -fno-builtin-foo. PR4941.
 #if 0
   if (getToolChain().getTriple().isOSDarwin() &&
       (getToolChain().getArch() == llvm::Triple::arm ||
@@ -4759,7 +4727,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   Args.AddLastArg(CmdArgs, options::OPT_dM);
   Args.AddLastArg(CmdArgs, options::OPT_dD);
-  
+
   // Handle serialized diagnostics.
   if (Arg *A = Args.getLastArg(options::OPT__serialize_diags)) {
     CmdArgs.push_back("-serialize-diagnostic-file");
@@ -4828,9 +4796,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
     SmallString<256> Flags;
     Flags += Exec;
-    for (unsigned i = 0, e = OriginalArgs.size(); i != e; ++i) {
+    for (const char *OriginalArg : OriginalArgs) {
       SmallString<128> EscapedArg;
-      EscapeSpacesAndBackslashes(OriginalArgs[i], EscapedArg);
+      EscapeSpacesAndBackslashes(OriginalArg, EscapedArg);
       Flags += " ";
       Flags += EscapedArg;
     }
@@ -4841,9 +4809,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // Add the split debug info name to the command lines here so we
   // can propagate it to the backend.
   bool SplitDwarf = Args.hasArg(options::OPT_gsplit_dwarf) &&
-    getToolChain().getTriple().isOSLinux() &&
-    (isa<AssembleJobAction>(JA) || isa<CompileJobAction>(JA) ||
-     isa<BackendJobAction>(JA));
+                    getToolChain().getTriple().isOSLinux() &&
+                    (isa<AssembleJobAction>(JA) || isa<CompileJobAction>(JA) ||
+                     isa<BackendJobAction>(JA));
   const char *SplitDwarfOut;
   if (SplitDwarf) {
     CmdArgs.push_back("-split-dwarf-file");
@@ -4863,7 +4831,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
   }
 
-
   // Handle the debug info splitting at object creation time if we're
   // creating an object.
   // TODO: Currently only works on linux with newer objcopy.
@@ -4872,8 +4839,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   if (Arg *A = Args.getLastArg(options::OPT_pg))
     if (Args.hasArg(options::OPT_fomit_frame_pointer))
-      D.Diag(diag::err_drv_argument_not_allowed_with)
-        << "-fomit-frame-pointer" << A->getAsString(Args);
+      D.Diag(diag::err_drv_argument_not_allowed_with) << "-fomit-frame-pointer"
+                                                      << A->getAsString(Args);
 
   // Claim some arguments which clang supports automatically.
 
@@ -4898,9 +4865,9 @@ ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args,
                                       ArgStringList &cmdArgs,
                                       RewriteKind rewriteKind) const {
   // Look for the controlling runtime option.
-  Arg *runtimeArg = args.getLastArg(options::OPT_fnext_runtime,
-                                    options::OPT_fgnu_runtime,
-                                    options::OPT_fobjc_runtime_EQ);
+  Arg *runtimeArg =
+      args.getLastArg(options::OPT_fnext_runtime, options::OPT_fgnu_runtime,
+                      options::OPT_fobjc_runtime_EQ);
 
   // Just forward -fobjc-runtime= to the frontend.  This supercedes
   // options about fragility.
@@ -4910,7 +4877,7 @@ ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args,
     StringRef value = runtimeArg->getValue();
     if (runtime.tryParse(value)) {
       getToolChain().getDriver().Diag(diag::err_drv_unknown_objc_runtime)
-        << value;
+          << value;
     }
 
     runtimeArg->render(args, cmdArgs);
@@ -4933,26 +4900,25 @@ ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args,
     else if (value == "3")
       objcABIVersion = 3;
     else
-      getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported)
-        << value;
+      getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) << value;
   } else {
     // Otherwise, determine if we are using the non-fragile ABI.
-    bool nonFragileABIIsDefault = 
-      (rewriteKind == RK_NonFragile || 
-       (rewriteKind == RK_None &&
-        getToolChain().IsObjCNonFragileABIDefault()));
+    bool nonFragileABIIsDefault =
+        (rewriteKind == RK_NonFragile ||
+         (rewriteKind == RK_None &&
+          getToolChain().IsObjCNonFragileABIDefault()));
     if (args.hasFlag(options::OPT_fobjc_nonfragile_abi,
                      options::OPT_fno_objc_nonfragile_abi,
                      nonFragileABIIsDefault)) {
-      // Determine the non-fragile ABI version to use.
+// Determine the non-fragile ABI version to use.
 #ifdef DISABLE_DEFAULT_NONFRAGILEABI_TWO
       unsigned nonFragileABIVersion = 1;
 #else
       unsigned nonFragileABIVersion = 2;
 #endif
 
-      if (Arg *abiArg = args.getLastArg(
-            options::OPT_fobjc_nonfragile_abi_version_EQ)) {
+      if (Arg *abiArg =
+              args.getLastArg(options::OPT_fobjc_nonfragile_abi_version_EQ)) {
         StringRef value = abiArg->getValue();
         if (value == "1")
           nonFragileABIVersion = 1;
@@ -4960,7 +4926,7 @@ ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args,
           nonFragileABIVersion = 2;
         else
           getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported)
-            << value;
+              << value;
       }
 
       objcABIVersion = 1 + nonFragileABIVersion;
@@ -4989,30 +4955,30 @@ ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args,
       break;
     }
 
-  // -fnext-runtime
+    // -fnext-runtime
   } else if (runtimeArg->getOption().matches(options::OPT_fnext_runtime)) {
     // On Darwin, make this use the default behavior for the toolchain.
     if (getToolChain().getTriple().isOSDarwin()) {
       runtime = getToolChain().getDefaultObjCRuntime(isNonFragile);
 
-    // Otherwise, build for a generic macosx port.
+      // Otherwise, build for a generic macosx port.
     } else {
       runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple());
     }
 
-  // -fgnu-runtime
+    // -fgnu-runtime
   } else {
     assert(runtimeArg->getOption().matches(options::OPT_fgnu_runtime));
     // Legacy behaviour is to target the gnustep runtime if we are i
     // non-fragile mode or the GCC runtime in fragile mode.
     if (isNonFragile)
-      runtime = ObjCRuntime(ObjCRuntime::GNUstep, VersionTuple(1,6));
+      runtime = ObjCRuntime(ObjCRuntime::GNUstep, VersionTuple(1, 6));
     else
       runtime = ObjCRuntime(ObjCRuntime::GCC, VersionTuple());
   }
 
-  cmdArgs.push_back(args.MakeArgString(
-                                 "-fobjc-runtime=" + runtime.getAsString()));
+  cmdArgs.push_back(
+      args.MakeArgString("-fobjc-runtime=" + runtime.getAsString()));
   return runtime;
 }
 
@@ -5039,14 +5005,22 @@ struct EHFlags {
 /// The default is /EHs-c-, meaning cleanups are disabled.
 static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) {
   EHFlags EH;
-  std::vector<std::string> EHArgs = Args.getAllArgValues(options::OPT__SLASH_EH);
+  std::vector<std::string> EHArgs =
+      Args.getAllArgValues(options::OPT__SLASH_EH);
   for (auto EHVal : EHArgs) {
     for (size_t I = 0, E = EHVal.size(); I != E; ++I) {
       switch (EHVal[I]) {
-      case 'a': EH.Asynch = maybeConsumeDash(EHVal, I); continue;
-      case 'c': EH.NoExceptC = maybeConsumeDash(EHVal, I); continue;
-      case 's': EH.Synch = maybeConsumeDash(EHVal, I); continue;
-      default: break;
+      case 'a':
+        EH.Asynch = maybeConsumeDash(EHVal, I);
+        continue;
+      case 'c':
+        EH.NoExceptC = maybeConsumeDash(EHVal, I);
+        continue;
+      case 's':
+        EH.Synch = maybeConsumeDash(EHVal, I);
+        continue;
+      default:
+        break;
       }
       D.Diag(clang::diag::err_drv_invalid_value) << "/EH" << EHVal;
       break;
@@ -5066,33 +5040,33 @@ void Clang::AddClangCLArgs(const ArgList &Args, ArgStringList &CmdArgs) const {
   if (Arg *A = Args.getLastArg(options::OPT__SLASH_M_Group))
     RTOptionID = A->getOption().getID();
 
-  switch(RTOptionID) {
-    case options::OPT__SLASH_MD:
-      if (Args.hasArg(options::OPT__SLASH_LDd))
-        CmdArgs.push_back("-D_DEBUG");
-      CmdArgs.push_back("-D_MT");
-      CmdArgs.push_back("-D_DLL");
-      CmdArgs.push_back("--dependent-lib=msvcrt");
-      break;
-    case options::OPT__SLASH_MDd:
+  switch (RTOptionID) {
+  case options::OPT__SLASH_MD:
+    if (Args.hasArg(options::OPT__SLASH_LDd))
       CmdArgs.push_back("-D_DEBUG");
-      CmdArgs.push_back("-D_MT");
-      CmdArgs.push_back("-D_DLL");
-      CmdArgs.push_back("--dependent-lib=msvcrtd");
-      break;
-    case options::OPT__SLASH_MT:
-      if (Args.hasArg(options::OPT__SLASH_LDd))
-        CmdArgs.push_back("-D_DEBUG");
-      CmdArgs.push_back("-D_MT");
-      CmdArgs.push_back("--dependent-lib=libcmt");
-      break;
-    case options::OPT__SLASH_MTd:
+    CmdArgs.push_back("-D_MT");
+    CmdArgs.push_back("-D_DLL");
+    CmdArgs.push_back("--dependent-lib=msvcrt");
+    break;
+  case options::OPT__SLASH_MDd:
+    CmdArgs.push_back("-D_DEBUG");
+    CmdArgs.push_back("-D_MT");
+    CmdArgs.push_back("-D_DLL");
+    CmdArgs.push_back("--dependent-lib=msvcrtd");
+    break;
+  case options::OPT__SLASH_MT:
+    if (Args.hasArg(options::OPT__SLASH_LDd))
       CmdArgs.push_back("-D_DEBUG");
-      CmdArgs.push_back("-D_MT");
-      CmdArgs.push_back("--dependent-lib=libcmtd");
-      break;
-    default:
-      llvm_unreachable("Unexpected option ID.");
+    CmdArgs.push_back("-D_MT");
+    CmdArgs.push_back("--dependent-lib=libcmt");
+    break;
+  case options::OPT__SLASH_MTd:
+    CmdArgs.push_back("-D_DEBUG");
+    CmdArgs.push_back("-D_MT");
+    CmdArgs.push_back("--dependent-lib=libcmtd");
+    break;
+  default:
+    llvm_unreachable("Unexpected option ID.");
   }
 
   // This provides POSIX compatibility (maps 'open' to '_open'), which most
@@ -5176,9 +5150,9 @@ void Clang::AddClangCLArgs(const ArgList &Args, ArgStringList &CmdArgs) const {
   }
 }
 
-visualstudio::Compile *Clang::getCLFallback() const {
+visualstudio::Compiler *Clang::getCLFallback() const {
   if (!CLFallback)
-    CLFallback.reset(new visualstudio::Compile(getToolChain()));
+    CLFallback.reset(new visualstudio::Compiler(getToolChain()));
   return CLFallback.get();
 }
 
@@ -5194,8 +5168,7 @@ void ClangAs::AddMIPSTargetArgs(const ArgList &Args,
 }
 
 void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
-                           const InputInfo &Output,
-                           const InputInfoList &Inputs,
+                           const InputInfo &Output, const InputInfoList &Inputs,
                            const ArgList &Args,
                            const char *LinkingOutput) const {
   ArgStringList CmdArgs;
@@ -5217,8 +5190,8 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
 
   // Add the "effective" target triple.
   CmdArgs.push_back("-triple");
-  std::string TripleStr = 
-    getToolChain().ComputeEffectiveClangTriple(Args, Input.getType());
+  std::string TripleStr =
+      getToolChain().ComputeEffectiveClangTriple(Args, Input.getType());
   CmdArgs.push_back(Args.MakeArgString(TripleStr));
 
   // Set the output mode, we currently only expect to be used as a real
@@ -5244,7 +5217,10 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   getTargetFeatures(D, Triple, Args, CmdArgs, true);
 
   // Ignore explicit -force_cpusubtype_ALL option.
-  (void) Args.hasArg(options::OPT_force__cpusubtype__ALL);
+  (void)Args.hasArg(options::OPT_force__cpusubtype__ALL);
+
+  // Pass along any -I options so we get proper .include search paths.
+  Args.AddAllArgs(CmdArgs, options::OPT_I_Group);
 
   // Determine the original source input.
   const Action *SourceAction = &JA;
@@ -5288,9 +5264,9 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
     SmallString<256> Flags;
     const char *Exec = getToolChain().getDriver().getClangProgramPath();
     Flags += Exec;
-    for (unsigned i = 0, e = OriginalArgs.size(); i != e; ++i) {
+    for (const char *OriginalArg : OriginalArgs) {
       SmallString<128> EscapedArg;
-      EscapeSpacesAndBackslashes(OriginalArgs[i], EscapedArg);
+      EscapeSpacesAndBackslashes(OriginalArg, EscapedArg);
       Flags += " ";
       Flags += EscapedArg;
     }
@@ -5301,7 +5277,7 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   // FIXME: Add -static support, once we have it.
 
   // Add target specific flags.
-  switch(getToolChain().getArch()) {
+  switch (getToolChain().getArch()) {
   default:
     break;
 
@@ -5349,8 +5325,7 @@ void GnuTool::anchor() {}
 
 void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
                                const InputInfo &Output,
-                               const InputInfoList &Inputs,
-                               const ArgList &Args,
+                               const InputInfoList &Inputs, const ArgList &Args,
                                const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
@@ -5382,7 +5357,7 @@ void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
   if (getToolChain().getTriple().isOSDarwin()) {
     CmdArgs.push_back("-arch");
     CmdArgs.push_back(
-      Args.MakeArgString(getToolChain().getDefaultUniversalArchName()));
+        Args.MakeArgString(getToolChain().getDefaultUniversalArchName()));
   }
 
   // Try to force gcc to match the tool chain we want, if we recognize
@@ -5405,8 +5380,7 @@ void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-fsyntax-only");
   }
 
-  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
-                       options::OPT_Xassembler);
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   // Only pass -x if gcc will understand it; otherwise hope gcc
   // understands the suffix correctly. The main use case this would go
@@ -5421,13 +5395,12 @@ void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
     if (II.getType() == types::TY_LLVM_IR || II.getType() == types::TY_LTO_IR ||
         II.getType() == types::TY_LLVM_BC || II.getType() == types::TY_LTO_BC)
       D.Diag(diag::err_drv_no_linker_llvm_support)
-        << getToolChain().getTripleString();
+          << getToolChain().getTripleString();
     else if (II.getType() == types::TY_AST)
-      D.Diag(diag::err_drv_no_ast_support)
-        << getToolChain().getTripleString();
+      D.Diag(diag::err_drv_no_ast_support) << getToolChain().getTripleString();
     else if (II.getType() == types::TY_ModuleFile)
       D.Diag(diag::err_drv_no_module_support)
-        << getToolChain().getTripleString();
+          << getToolChain().getTripleString();
 
     if (types::canTypeBeUserSpecified(II.getType())) {
       CmdArgs.push_back("-x");
@@ -5459,18 +5432,17 @@ void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
   } else
     GCCName = "gcc";
 
-  const char *Exec =
-    Args.MakeArgString(getToolChain().GetProgramPath(GCCName));
+  const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(GCCName));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void gcc::Preprocess::RenderExtraToolArgs(const JobAction &JA,
-                                          ArgStringList &CmdArgs) const {
+void gcc::Preprocessor::RenderExtraToolArgs(const JobAction &JA,
+                                            ArgStringList &CmdArgs) const {
   CmdArgs.push_back("-E");
 }
 
-void gcc::Compile::RenderExtraToolArgs(const JobAction &JA,
-                                       ArgStringList &CmdArgs) const {
+void gcc::Compiler::RenderExtraToolArgs(const JobAction &JA,
+                                        ArgStringList &CmdArgs) const {
   const Driver &D = getToolChain().getDriver();
 
   switch (JA.getType()) {
@@ -5492,21 +5464,19 @@ void gcc::Compile::RenderExtraToolArgs(const JobAction &JA,
   }
 }
 
-void gcc::Link::RenderExtraToolArgs(const JobAction &JA,
-                                    ArgStringList &CmdArgs) const {
+void gcc::Linker::RenderExtraToolArgs(const JobAction &JA,
+                                      ArgStringList &CmdArgs) const {
   // The types are (hopefully) good enough.
 }
 
 // Hexagon tools start.
-void hexagon::Assemble::RenderExtraToolArgs(const JobAction &JA,
-                                        ArgStringList &CmdArgs) const {
-
-}
-void hexagon::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
-                               const InputInfo &Output,
-                               const InputInfoList &Inputs,
-                               const ArgList &Args,
-                               const char *LinkingOutput) const {
+void hexagon::Assembler::RenderExtraToolArgs(const JobAction &JA,
+                                             ArgStringList &CmdArgs) const {}
+void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                      const InputInfo &Output,
+                                      const InputInfoList &Inputs,
+                                      const ArgList &Args,
+                                      const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
 
   const Driver &D = getToolChain().getDriver();
@@ -5526,11 +5496,10 @@ void hexagon::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-fsyntax-only");
   }
 
-  if (const char* v = toolchains::Hexagon_TC::GetSmallDataThreshold(Args))
+  if (const char *v = toolchains::Hexagon_TC::GetSmallDataThreshold(Args))
     CmdArgs.push_back(Args.MakeArgString(std::string("-G") + v));
 
-  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
-                       options::OPT_Xassembler);
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   // Only pass -x if gcc will understand it; otherwise hope gcc
   // understands the suffix correctly. The main use case this would go
@@ -5545,18 +5514,19 @@ void hexagon::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
     if (II.getType() == types::TY_LLVM_IR || II.getType() == types::TY_LTO_IR ||
         II.getType() == types::TY_LLVM_BC || II.getType() == types::TY_LTO_BC)
       D.Diag(clang::diag::err_drv_no_linker_llvm_support)
-        << getToolChain().getTripleString();
+          << getToolChain().getTripleString();
     else if (II.getType() == types::TY_AST)
       D.Diag(clang::diag::err_drv_no_ast_support)
-        << getToolChain().getTripleString();
+          << getToolChain().getTripleString();
     else if (II.getType() == types::TY_ModuleFile)
       D.Diag(diag::err_drv_no_module_support)
-      << getToolChain().getTripleString();
+          << getToolChain().getTripleString();
 
     if (II.isFilename())
       CmdArgs.push_back(II.getFilename());
     else
-      // Don't render as input, we need gcc to do the translations. FIXME: Pranav: What is this ?
+      // Don't render as input, we need gcc to do the translations.
+      // FIXME: Pranav: What is this ?
       II.getInputArg().render(Args, CmdArgs);
   }
 
@@ -5565,22 +5535,21 @@ void hexagon::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void hexagon::Link::RenderExtraToolArgs(const JobAction &JA,
-                                    ArgStringList &CmdArgs) const {
+void hexagon::Linker::RenderExtraToolArgs(const JobAction &JA,
+                                          ArgStringList &CmdArgs) const {
   // The types are (hopefully) good enough.
 }
 
 static void constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
-                              const toolchains::Hexagon_TC& ToolChain,
-                              const InputInfo &Output,
-                              const InputInfoList &Inputs,
-                              const ArgList &Args,
-                              ArgStringList &CmdArgs,
-                              const char *LinkingOutput) {
+                                     const toolchains::Hexagon_TC &ToolChain,
+                                     const InputInfo &Output,
+                                     const InputInfoList &Inputs,
+                                     const ArgList &Args,
+                                     ArgStringList &CmdArgs,
+                                     const char *LinkingOutput) {
 
   const Driver &D = ToolChain.getDriver();
 
-
   //----------------------------------------------------------------------------
   //
   //----------------------------------------------------------------------------
@@ -5624,7 +5593,7 @@ static void constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
   if (buildPIE && !buildingLib)
     CmdArgs.push_back("-pie");
 
-  if (const char* v = toolchains::Hexagon_TC::GetSmallDataThreshold(Args)) {
+  if (const char *v = toolchains::Hexagon_TC::GetSmallDataThreshold(Args)) {
     CmdArgs.push_back(Args.MakeArgString(std::string("-G") + v));
     useG0 = toolchains::Hexagon_TC::UsesG0(v);
   }
@@ -5640,15 +5609,14 @@ static void constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
   const std::string MarchG0Suffix = MarchSuffix + G0Suffix;
   const std::string RootDir =
       toolchains::Hexagon_TC::GetGnuDir(D.InstalledDir, Args) + "/";
-  const std::string StartFilesDir = RootDir
-                                    + "hexagon/lib"
-                                    + (useG0 ? MarchG0Suffix : MarchSuffix);
+  const std::string StartFilesDir =
+      RootDir + "hexagon/lib" + (useG0 ? MarchG0Suffix : MarchSuffix);
 
   //----------------------------------------------------------------------------
   // moslib
   //----------------------------------------------------------------------------
   std::vector<std::string> oslibs;
-  bool hasStandalone= false;
+  bool hasStandalone = false;
 
   for (const Arg *A : Args.filtered(options::OPT_moslib_EQ)) {
     A->claim();
@@ -5668,7 +5636,7 @@ static void constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
     if (!buildingLib) {
       if (hasStandalone) {
         CmdArgs.push_back(
-          Args.MakeArgString(StartFilesDir + "/crt0_standalone.o"));
+            Args.MakeArgString(StartFilesDir + "/crt0_standalone.o"));
       }
       CmdArgs.push_back(Args.MakeArgString(StartFilesDir + "/crt0.o"));
     }
@@ -5706,9 +5674,8 @@ static void constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("--start-group");
 
     if (!buildingLib) {
-      for(std::vector<std::string>::iterator i = oslibs.begin(),
-            e = oslibs.end(); i != e; ++i)
-        CmdArgs.push_back(Args.MakeArgString("-l" + *i));
+      for (const std::string &Lib : oslibs)
+        CmdArgs.push_back(Args.MakeArgString("-l" + Lib));
       CmdArgs.push_back("-lc");
     }
     CmdArgs.push_back("-lgcc");
@@ -5725,14 +5692,14 @@ static void constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
   }
 }
 
-void hexagon::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                               const InputInfo &Output,
-                               const InputInfoList &Inputs,
-                               const ArgList &Args,
-                               const char *LinkingOutput) const {
+void hexagon::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                   const InputInfo &Output,
+                                   const InputInfoList &Inputs,
+                                   const ArgList &Args,
+                                   const char *LinkingOutput) const {
 
-  const toolchains::Hexagon_TC& ToolChain =
-    static_cast<const toolchains::Hexagon_TC&>(getToolChain());
+  const toolchains::Hexagon_TC &ToolChain =
+      static_cast<const toolchains::Hexagon_TC &>(getToolChain());
 
   ArgStringList CmdArgs;
   constructHexagonLinkArgs(C, JA, ToolChain, Output, Inputs, Args, CmdArgs,
@@ -5764,7 +5731,7 @@ const std::string arm::getARMArch(const ArgList &Args,
       const char *Suffix = arm::getLLVMArchSuffixForARM(CPU, MArch);
       // If there is no valid architecture suffix for this CPU we don't know how
       // to handle it, so return no architecture.
-      if (strcmp(Suffix,"") == 0)
+      if (strcmp(Suffix, "") == 0)
         MArch = "";
       else
         MArch = std::string("arm") + Suffix;
@@ -5793,7 +5760,7 @@ const char *arm::getARMCPUForMArch(const ArgList &Args,
 
 /// getARMTargetCPU - Get the (LLVM) name of the ARM cpu we are targeting.
 std::string arm::getARMTargetCPU(const ArgList &Args,
-                               const llvm::Triple &Triple) {
+                                 const llvm::Triple &Triple) {
   // FIXME: Warn on inconsistent use of -mcpu and -march.
   // If we have -mcpu=, use that.
   if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
@@ -5822,7 +5789,7 @@ const char *arm::getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch) {
   return llvm::ARMTargetParser::getSubArch(ArchKind);
 }
 
-void arm::appendEBLinkFlags(const ArgList &Args, ArgStringList &CmdArgs, 
+void arm::appendEBLinkFlags(const ArgList &Args, ArgStringList &CmdArgs,
                             const llvm::Triple &Triple) {
   if (Args.hasArg(options::OPT_r))
     return;
@@ -5866,14 +5833,14 @@ bool mips::isUCLibc(const ArgList &Args) {
 bool mips::isNaN2008(const ArgList &Args, const llvm::Triple &Triple) {
   if (Arg *NaNArg = Args.getLastArg(options::OPT_mnan_EQ))
     return llvm::StringSwitch<bool>(NaNArg->getValue())
-               .Case("2008", true)
-               .Case("legacy", false)
-               .Default(false);
+        .Case("2008", true)
+        .Case("legacy", false)
+        .Default(false);
 
   // NaN2008 is the default for MIPS32r6/MIPS64r6.
   return llvm::StringSwitch<bool>(getCPUName(Args, Triple))
-             .Cases("mips32r6", "mips64r6", true)
-             .Default(false);
+      .Cases("mips32r6", "mips64r6", true)
+      .Default(false);
 
   return false;
 }
@@ -5893,10 +5860,10 @@ bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
     return false;
 
   return llvm::StringSwitch<bool>(CPUName)
-             .Cases("mips2", "mips3", "mips4", "mips5", true)
-             .Cases("mips32", "mips32r2", "mips32r3", "mips32r5", true)
-             .Cases("mips64", "mips64r2", "mips64r3", "mips64r5", true)
-             .Default(false);
+      .Cases("mips2", "mips3", "mips4", "mips5", true)
+      .Cases("mips32", "mips32r2", "mips32r3", "mips32r5", true)
+      .Cases("mips64", "mips64r2", "mips64r3", "mips64r5", true)
+      .Default(false);
 }
 
 bool mips::shouldUseFPXX(const ArgList &Args, const llvm::Triple &Triple,
@@ -5927,25 +5894,25 @@ llvm::Triple::ArchType darwin::getArchTypeForMachOArchName(StringRef Str) {
   // translation.
 
   return llvm::StringSwitch<llvm::Triple::ArchType>(Str)
-    .Cases("ppc", "ppc601", "ppc603", "ppc604", "ppc604e", llvm::Triple::ppc)
-    .Cases("ppc750", "ppc7400", "ppc7450", "ppc970", llvm::Triple::ppc)
-    .Case("ppc64", llvm::Triple::ppc64)
-    .Cases("i386", "i486", "i486SX", "i586", "i686", llvm::Triple::x86)
-    .Cases("pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4",
-           llvm::Triple::x86)
-    .Cases("x86_64", "x86_64h", llvm::Triple::x86_64)
-    // This is derived from the driver driver.
-    .Cases("arm", "armv4t", "armv5", "armv6", "armv6m", llvm::Triple::arm)
-    .Cases("armv7", "armv7em", "armv7k", "armv7m", llvm::Triple::arm)
-    .Cases("armv7s", "xscale", llvm::Triple::arm)
-    .Case("arm64", llvm::Triple::aarch64)
-    .Case("r600", llvm::Triple::r600)
-    .Case("amdgcn", llvm::Triple::amdgcn)
-    .Case("nvptx", llvm::Triple::nvptx)
-    .Case("nvptx64", llvm::Triple::nvptx64)
-    .Case("amdil", llvm::Triple::amdil)
-    .Case("spir", llvm::Triple::spir)
-    .Default(llvm::Triple::UnknownArch);
+      .Cases("ppc", "ppc601", "ppc603", "ppc604", "ppc604e", llvm::Triple::ppc)
+      .Cases("ppc750", "ppc7400", "ppc7450", "ppc970", llvm::Triple::ppc)
+      .Case("ppc64", llvm::Triple::ppc64)
+      .Cases("i386", "i486", "i486SX", "i586", "i686", llvm::Triple::x86)
+      .Cases("pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4",
+             llvm::Triple::x86)
+      .Cases("x86_64", "x86_64h", llvm::Triple::x86_64)
+      // This is derived from the driver driver.
+      .Cases("arm", "armv4t", "armv5", "armv6", "armv6m", llvm::Triple::arm)
+      .Cases("armv7", "armv7em", "armv7k", "armv7m", llvm::Triple::arm)
+      .Cases("armv7s", "xscale", llvm::Triple::arm)
+      .Case("arm64", llvm::Triple::aarch64)
+      .Case("r600", llvm::Triple::r600)
+      .Case("amdgcn", llvm::Triple::amdgcn)
+      .Case("nvptx", llvm::Triple::nvptx)
+      .Case("nvptx64", llvm::Triple::nvptx64)
+      .Case("amdil", llvm::Triple::amdil)
+      .Case("spir", llvm::Triple::spir)
+      .Default(llvm::Triple::UnknownArch);
 }
 
 void darwin::setTripleTypeForMachOArchName(llvm::Triple &T, StringRef Str) {
@@ -5989,11 +5956,11 @@ const char *Clang::getDependencyFileName(const ArgList &Args,
   return Args.MakeArgString(Res + ".d");
 }
 
-void cloudabi::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                  const InputInfo &Output,
-                                  const InputInfoList &Inputs,
-                                  const ArgList &Args,
-                                  const char *LinkingOutput) const {
+void cloudabi::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                    const InputInfo &Output,
+                                    const InputInfoList &Inputs,
+                                    const ArgList &Args,
+                                    const char *LinkingOutput) const {
   const ToolChain &ToolChain = getToolChain();
   const Driver &D = ToolChain.getDriver();
   ArgStringList CmdArgs;
@@ -6059,11 +6026,11 @@ void cloudabi::Link::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void darwin::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
-                                    const InputInfo &Output,
-                                    const InputInfoList &Inputs,
-                                    const ArgList &Args,
-                                    const char *LinkingOutput) const {
+void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                     const InputInfo &Output,
+                                     const InputInfoList &Inputs,
+                                     const ArgList &Args,
+                                     const char *LinkingOutput) const {
   ArgStringList CmdArgs;
 
   assert(Inputs.size() == 1 && "Unexpected number of inputs.");
@@ -6112,8 +6079,7 @@ void darwin::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
        Args.hasArg(options::OPT_static)))
     CmdArgs.push_back("-static");
 
-  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
-                       options::OPT_Xassembler);
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   assert(Output.isFilename() && "Unexpected lipo output.");
   CmdArgs.push_back("-o");
@@ -6124,8 +6090,7 @@ void darwin::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
 
   // asm_final spec is empty.
 
-  const char *Exec =
-    Args.MakeArgString(getToolChain().GetProgramPath("as"));
+  const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
@@ -6144,7 +6109,7 @@ void darwin::MachOTool::AddMachOArch(const ArgList &Args,
     CmdArgs.push_back("-force_cpusubtype_ALL");
 }
 
-bool darwin::Link::NeedsTempPath(const InputInfoList &Inputs) const {
+bool darwin::Linker::NeedsTempPath(const InputInfoList &Inputs) const {
   // We only need to generate a temp path for LTO if we aren't compiling object
   // files. When compiling source files, we run 'dsymutil' after linking. We
   // don't run 'dsymutil' when compiling object files.
@@ -6155,21 +6120,19 @@ bool darwin::Link::NeedsTempPath(const InputInfoList &Inputs) const {
   return false;
 }
 
-void darwin::Link::AddLinkArgs(Compilation &C,
-                               const ArgList &Args,
-                               ArgStringList &CmdArgs,
-                               const InputInfoList &Inputs) const {
+void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args,
+                                 ArgStringList &CmdArgs,
+                                 const InputInfoList &Inputs) const {
   const Driver &D = getToolChain().getDriver();
   const toolchains::MachO &MachOTC = getMachOToolChain();
 
-  unsigned Version[3] = { 0, 0, 0 };
+  unsigned Version[3] = {0, 0, 0};
   if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
     bool HadExtra;
-    if (!Driver::GetReleaseVersion(A->getValue(), Version[0],
-                                   Version[1], Version[2], HadExtra) ||
+    if (!Driver::GetReleaseVersion(A->getValue(), Version[0], Version[1],
+                                   Version[2], HadExtra) ||
         HadExtra)
-      D.Diag(diag::err_drv_invalid_version_number)
-        << A->getAsString(Args);
+      D.Diag(diag::err_drv_invalid_version_number) << A->getAsString(Args);
   }
 
   // Newer linkers support -demangle. Pass it if supported and not disabled by
@@ -6191,7 +6154,7 @@ void darwin::Link::AddLinkArgs(Compilation &C,
   // dsymutil step.
   if (Version[0] >= 116 && D.IsUsingLTO(Args) && NeedsTempPath(Inputs)) {
     const char *TmpPath = C.getArgs().MakeArgString(
-      D.GetTemporaryPath("cc", types::getTypeTempSuffix(types::TY_Object)));
+        D.GetTemporaryPath("cc", types::getTypeTempSuffix(types::TY_Object)));
     C.addTempFile(TmpPath);
     CmdArgs.push_back("-object_path_lto");
     CmdArgs.push_back(TmpPath);
@@ -6219,8 +6182,8 @@ void darwin::Link::AddLinkArgs(Compilation &C,
     if ((A = Args.getLastArg(options::OPT_compatibility__version)) ||
         (A = Args.getLastArg(options::OPT_current__version)) ||
         (A = Args.getLastArg(options::OPT_install__name)))
-      D.Diag(diag::err_drv_argument_only_allowed_with)
-        << A->getAsString(Args) << "-dynamiclib";
+      D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args)
+                                                       << "-dynamiclib";
 
     Args.AddLastArg(CmdArgs, options::OPT_force__flat__namespace);
     Args.AddLastArg(CmdArgs, options::OPT_keep__private__externs);
@@ -6235,8 +6198,8 @@ void darwin::Link::AddLinkArgs(Compilation &C,
         (A = Args.getLastArg(options::OPT_force__flat__namespace)) ||
         (A = Args.getLastArg(options::OPT_keep__private__externs)) ||
         (A = Args.getLastArg(options::OPT_private__bundle)))
-      D.Diag(diag::err_drv_argument_not_allowed_with)
-        << A->getAsString(Args) << "-dynamiclib";
+      D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
+                                                      << "-dynamiclib";
 
     Args.AddAllArgsTranslated(CmdArgs, options::OPT_compatibility__version,
                               "-dylib_compatibility_version");
@@ -6274,9 +6237,9 @@ void darwin::Link::AddLinkArgs(Compilation &C,
   Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined);
   Args.AddAllArgs(CmdArgs, options::OPT_multiply__defined__unused);
 
-  if (const Arg *A = Args.getLastArg(options::OPT_fpie, options::OPT_fPIE,
-                                     options::OPT_fno_pie,
-                                     options::OPT_fno_PIE)) {
+  if (const Arg *A =
+          Args.getLastArg(options::OPT_fpie, options::OPT_fPIE,
+                          options::OPT_fno_pie, options::OPT_fno_PIE)) {
     if (A->getOption().matches(options::OPT_fpie) ||
         A->getOption().matches(options::OPT_fPIE))
       CmdArgs.push_back("-pie");
@@ -6335,11 +6298,11 @@ void darwin::Link::AddLinkArgs(Compilation &C,
   Args.AddLastArg(CmdArgs, options::OPT_Mach);
 }
 
-void darwin::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                const InputInfo &Output,
-                                const InputInfoList &Inputs,
-                                const ArgList &Args,
-                                const char *LinkingOutput) const {
+void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                  const InputInfo &Output,
+                                  const InputInfoList &Inputs,
+                                  const ArgList &Args,
+                                  const char *LinkingOutput) const {
   assert(Output.getType() == types::TY_Image && "Invalid linker output type.");
 
   // If the number of arguments surpasses the system limits, we will encode the
@@ -6359,7 +6322,7 @@ void darwin::Link::ConstructJob(Compilation &C, const JobAction &JA,
     for (const auto &Arg : Args)
       Arg->claim();
     const char *Exec =
-      Args.MakeArgString(getToolChain().GetProgramPath("touch"));
+        Args.MakeArgString(getToolChain().GetProgramPath("touch"));
     CmdArgs.push_back(Output.getFilename());
     C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
     return;
@@ -6437,8 +6400,7 @@ void darwin::Link::ConstructJob(Compilation &C, const JobAction &JA,
     InputFileList.push_back(II.getFilename());
   }
 
-  if (isObjCRuntimeLinked(Args) &&
-      !Args.hasArg(options::OPT_nostdlib) &&
+  if (isObjCRuntimeLinked(Args) && !Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nodefaultlibs)) {
     // We use arclite library for both ARC and subscripting support.
     getMachOToolChain().AddLinkARCArgs(Args, CmdArgs);
@@ -6496,10 +6458,9 @@ void darwin::Link::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  const char *Exec =
-    Args.MakeArgString(getToolChain().GetLinkerPath());
+  const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   std::unique_ptr<Command> Cmd =
-    llvm::make_unique<Command>(JA, *this, Exec, CmdArgs);
+      llvm::make_unique<Command>(JA, *this, Exec, CmdArgs);
   Cmd->setInputFileList(std::move(InputFileList));
   C.addCommand(std::move(Cmd));
 }
@@ -6542,7 +6503,7 @@ void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA,
   CmdArgs.push_back(Input.getFilename());
 
   const char *Exec =
-    Args.MakeArgString(getToolChain().GetProgramPath("dsymutil"));
+      Args.MakeArgString(getToolChain().GetProgramPath("dsymutil"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
@@ -6565,11 +6526,11 @@ void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA,
   CmdArgs.push_back(Input.getFilename());
 
   const char *Exec =
-    Args.MakeArgString(getToolChain().GetProgramPath("dwarfdump"));
+      Args.MakeArgString(getToolChain().GetProgramPath("dwarfdump"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void solaris::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
+void solaris::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                       const InputInfo &Output,
                                       const InputInfoList &Inputs,
                                       const ArgList &Args,
@@ -6577,8 +6538,7 @@ void solaris::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
-  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
-                       options::OPT_Xassembler);
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
@@ -6590,11 +6550,11 @@ void solaris::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void solaris::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                  const InputInfo &Output,
-                                  const InputInfoList &Inputs,
-                                  const ArgList &Args,
-                                  const char *LinkingOutput) const {
+void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                   const InputInfo &Output,
+                                   const InputInfoList &Inputs,
+                                   const ArgList &Args,
+                                   const char *LinkingOutput) const {
   // FIXME: Find a real GCC, don't hard-code versions here
   std::string GCCLibPath = "/usr/gcc/4.5/lib/gcc/";
   const llvm::Triple &T = getToolChain().getTriple();
@@ -6690,16 +6650,15 @@ void solaris::Link::ConstructJob(Compilation &C, const JobAction &JA,
 
   addProfileRT(getToolChain(), Args, CmdArgs);
 
-  const char *Exec =
-    Args.MakeArgString(getToolChain().GetLinkerPath());
+  const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void openbsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
-                                     const InputInfo &Output,
-                                     const InputInfoList &Inputs,
-                                     const ArgList &Args,
-                                     const char *LinkingOutput) const {
+void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                      const InputInfo &Output,
+                                      const InputInfoList &Inputs,
+                                      const ArgList &Args,
+                                      const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
   bool NeedsKPIC = false;
@@ -6753,8 +6712,7 @@ void openbsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   if (NeedsKPIC)
     addAssemblerKPIC(Args, CmdArgs);
 
-  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
-                       options::OPT_Xassembler);
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
@@ -6762,16 +6720,15 @@ void openbsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   for (const auto &II : Inputs)
     CmdArgs.push_back(II.getFilename());
 
-  const char *Exec =
-    Args.MakeArgString(getToolChain().GetProgramPath("as"));
+  const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void openbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                 const InputInfo &Output,
-                                 const InputInfoList &Inputs,
-                                 const ArgList &Args,
-                                 const char *LinkingOutput) const {
+void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                   const InputInfo &Output,
+                                   const InputInfoList &Inputs,
+                                   const ArgList &Args,
+                                   const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
@@ -6822,25 +6779,25 @@ void openbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
   if (!Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared)) {
-      if (Args.hasArg(options::OPT_pg))  
-        CmdArgs.push_back(Args.MakeArgString(
-                                getToolChain().GetFilePath("gcrt0.o")));
+      if (Args.hasArg(options::OPT_pg))
+        CmdArgs.push_back(
+            Args.MakeArgString(getToolChain().GetFilePath("gcrt0.o")));
       else
-        CmdArgs.push_back(Args.MakeArgString(
-                                getToolChain().GetFilePath("crt0.o")));
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtbegin.o")));
+        CmdArgs.push_back(
+            Args.MakeArgString(getToolChain().GetFilePath("crt0.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
     } else {
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtbeginS.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o")));
     }
   }
 
   std::string Triple = getToolChain().getTripleString();
   if (Triple.substr(0, 6) == "x86_64")
     Triple.replace(0, 6, "amd64");
-  CmdArgs.push_back(Args.MakeArgString("-L/usr/lib/gcc-lib/" + Triple +
-                                       "/4.2.1"));
+  CmdArgs.push_back(
+      Args.MakeArgString("-L/usr/lib/gcc-lib/" + Triple + "/4.2.1"));
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
@@ -6856,7 +6813,7 @@ void openbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
       !Args.hasArg(options::OPT_nodefaultlibs)) {
     if (D.CCCIsCXX()) {
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
-      if (Args.hasArg(options::OPT_pg)) 
+      if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back("-lm_p");
       else
         CmdArgs.push_back("-lm");
@@ -6867,18 +6824,17 @@ void openbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-lgcc");
 
     if (Args.hasArg(options::OPT_pthread)) {
-      if (!Args.hasArg(options::OPT_shared) &&
-          Args.hasArg(options::OPT_pg))
-         CmdArgs.push_back("-lpthread_p");
+      if (!Args.hasArg(options::OPT_shared) && Args.hasArg(options::OPT_pg))
+        CmdArgs.push_back("-lpthread_p");
       else
-         CmdArgs.push_back("-lpthread");
+        CmdArgs.push_back("-lpthread");
     }
 
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
-         CmdArgs.push_back("-lc_p");
+        CmdArgs.push_back("-lc_p");
       else
-         CmdArgs.push_back("-lc");
+        CmdArgs.push_back("-lc");
     }
 
     CmdArgs.push_back("-lgcc");
@@ -6887,28 +6843,26 @@ void openbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
   if (!Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared))
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtend.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
     else
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtendS.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtendS.o")));
   }
 
-  const char *Exec =
-    Args.MakeArgString(getToolChain().GetLinkerPath());
+  const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void bitrig::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
-                                    const InputInfo &Output,
-                                    const InputInfoList &Inputs,
-                                    const ArgList &Args,
-                                    const char *LinkingOutput) const {
+void bitrig::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                     const InputInfo &Output,
+                                     const InputInfoList &Inputs,
+                                     const ArgList &Args,
+                                     const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
-  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
-                       options::OPT_Xassembler);
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
@@ -6920,11 +6874,11 @@ void bitrig::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void bitrig::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                const InputInfo &Output,
-                                const InputInfoList &Inputs,
-                                const ArgList &Args,
-                                const char *LinkingOutput) const {
+void bitrig::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                  const InputInfo &Output,
+                                  const InputInfoList &Inputs,
+                                  const ArgList &Args,
+                                  const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
@@ -6960,16 +6914,16 @@ void bitrig::Link::ConstructJob(Compilation &C, const JobAction &JA,
       !Args.hasArg(options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
-        CmdArgs.push_back(Args.MakeArgString(
-                                getToolChain().GetFilePath("gcrt0.o")));
+        CmdArgs.push_back(
+            Args.MakeArgString(getToolChain().GetFilePath("gcrt0.o")));
       else
-        CmdArgs.push_back(Args.MakeArgString(
-                                getToolChain().GetFilePath("crt0.o")));
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtbegin.o")));
+        CmdArgs.push_back(
+            Args.MakeArgString(getToolChain().GetFilePath("crt0.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
     } else {
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtbeginS.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o")));
     }
   }
 
@@ -6990,8 +6944,7 @@ void bitrig::Link::ConstructJob(Compilation &C, const JobAction &JA,
     }
 
     if (Args.hasArg(options::OPT_pthread)) {
-      if (!Args.hasArg(options::OPT_shared) &&
-          Args.hasArg(options::OPT_pg))
+      if (!Args.hasArg(options::OPT_shared) && Args.hasArg(options::OPT_pg))
         CmdArgs.push_back("-lpthread_p");
       else
         CmdArgs.push_back("-lpthread");
@@ -7024,23 +6977,22 @@ void bitrig::Link::ConstructJob(Compilation &C, const JobAction &JA,
   if (!Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared))
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtend.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
     else
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtendS.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtendS.o")));
   }
 
-  const char *Exec =
-    Args.MakeArgString(getToolChain().GetLinkerPath());
+  const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void freebsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
-                                     const InputInfo &Output,
-                                     const InputInfoList &Inputs,
-                                     const ArgList &Args,
-                                     const char *LinkingOutput) const {
+void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                      const InputInfo &Output,
+                                      const InputInfoList &Inputs,
+                                      const ArgList &Args,
+                                      const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
@@ -7085,7 +7037,7 @@ void freebsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-mfpu=softvfp");
     }
 
-    switch(getToolChain().getTriple().getEnvironment()) {
+    switch (getToolChain().getTriple().getEnvironment()) {
     case llvm::Triple::GNUEABIHF:
     case llvm::Triple::GNUEABI:
     case llvm::Triple::EABI:
@@ -7106,8 +7058,7 @@ void freebsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
     addAssemblerKPIC(Args, CmdArgs);
   }
 
-  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
-                       options::OPT_Xassembler);
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
@@ -7119,11 +7070,11 @@ void freebsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void freebsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                 const InputInfo &Output,
-                                 const InputInfoList &Inputs,
-                                 const ArgList &Args,
-                                 const char *LinkingOutput) const {
+void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                   const InputInfo &Output,
+                                   const InputInfoList &Inputs,
+                                   const ArgList &Args,
+                                   const char *LinkingOutput) const {
   const toolchains::FreeBSD &ToolChain =
       static_cast<const toolchains::FreeBSD &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
@@ -7298,12 +7249,11 @@ void freebsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
 
   addProfileRT(ToolChain, Args, CmdArgs);
 
-  const char *Exec =
-    Args.MakeArgString(getToolChain().GetLinkerPath());
+  const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void netbsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
+void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                      const InputInfo &Output,
                                      const InputInfoList &Inputs,
                                      const ArgList &Args,
@@ -7363,11 +7313,10 @@ void netbsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
     break;
 
   default:
-    break;  
+    break;
   }
 
-  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
-                       options::OPT_Xassembler);
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
@@ -7379,11 +7328,11 @@ void netbsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void netbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                 const InputInfo &Output,
-                                 const InputInfoList &Inputs,
-                                 const ArgList &Args,
-                                 const char *LinkingOutput) const {
+void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                  const InputInfo &Output,
+                                  const InputInfoList &Inputs,
+                                  const ArgList &Args,
+                                  const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
@@ -7430,7 +7379,8 @@ void netbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
     break;
   case llvm::Triple::armeb:
   case llvm::Triple::thumbeb:
-    arm::appendEBLinkFlags(Args, CmdArgs,
+    arm::appendEBLinkFlags(
+        Args, CmdArgs,
         llvm::Triple(getToolChain().ComputeEffectiveClangTriple(Args)));
     CmdArgs.push_back("-m");
     switch (getToolChain().getTriple().getEnvironment()) {
@@ -7455,14 +7405,14 @@ void netbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
         CmdArgs.push_back("elf32btsmip");
       else
         CmdArgs.push_back("elf32ltsmip");
-   } else if (mips::hasMipsAbiArg(Args, "64")) {
-     CmdArgs.push_back("-m");
-     if (getToolChain().getArch() == llvm::Triple::mips64)
-       CmdArgs.push_back("elf64btsmip");
-     else
-       CmdArgs.push_back("elf64ltsmip");
-   }
-   break;
+    } else if (mips::hasMipsAbiArg(Args, "64")) {
+      CmdArgs.push_back("-m");
+      if (getToolChain().getArch() == llvm::Triple::mips64)
+        CmdArgs.push_back("elf64btsmip");
+      else
+        CmdArgs.push_back("elf64ltsmip");
+    }
+    break;
   case llvm::Triple::ppc:
     CmdArgs.push_back("-m");
     CmdArgs.push_back("elf32ppc_nbsd");
@@ -7498,17 +7448,17 @@ void netbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
   if (!Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared)) {
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crt0.o")));
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crti.o")));
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtbegin.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crt0.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
     } else {
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crti.o")));
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtbeginS.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o")));
     }
   }
 
@@ -7526,7 +7476,7 @@ void netbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
   getToolChain().getTriple().getOSVersion(Major, Minor, Micro);
   bool useLibgcc = true;
   if (Major >= 7 || (Major == 6 && Minor == 99 && Micro >= 49) || Major == 0) {
-    switch(getToolChain().getArch()) {
+    switch (getToolChain().getArch()) {
     case llvm::Triple::aarch64:
     case llvm::Triple::arm:
     case llvm::Triple::armeb:
@@ -7574,13 +7524,12 @@ void netbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
   if (!Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared))
-      CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath(
-                                                                  "crtend.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
     else
-      CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath(
-                                                                 "crtendS.o")));
-    CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath(
-                                                                    "crtn.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtendS.o")));
+    CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
   }
 
   addProfileRT(getToolChain(), Args, CmdArgs);
@@ -7589,11 +7538,11 @@ void netbsd::Link::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void gnutools::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
-                                      const InputInfo &Output,
-                                      const InputInfoList &Inputs,
-                                      const ArgList &Args,
-                                      const char *LinkingOutput) const {
+void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                       const InputInfo &Output,
+                                       const InputInfoList &Inputs,
+                                       const ArgList &Args,
+                                       const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
 
   ArgStringList CmdArgs;
@@ -7668,8 +7617,8 @@ void gnutools::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
     // march from being picked in the absence of a cpu flag.
     Arg *A;
     if ((A = Args.getLastArg(options::OPT_mcpu_EQ)) &&
-      StringRef(A->getValue()).lower() == "krait")
-        CmdArgs.push_back("-march=armv7-a");
+        StringRef(A->getValue()).lower() == "krait")
+      CmdArgs.push_back("-march=armv7-a");
     else
       Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ);
     Args.AddLastArg(CmdArgs, options::OPT_mfpu_EQ);
@@ -7733,8 +7682,8 @@ void gnutools::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
 
     // Pass on -mmips16 or -mno-mips16. However, the assembler equivalent of
     // -mno-mips16 is actually -no-mips16.
-    if (Arg *A = Args.getLastArg(options::OPT_mips16,
-                                 options::OPT_mno_mips16)) {
+    if (Arg *A =
+            Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16)) {
       if (A->getOption().matches(options::OPT_mips16)) {
         A->claim();
         A->render(Args, CmdArgs);
@@ -7780,8 +7729,7 @@ void gnutools::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   if (NeedsKPIC)
     addAssemblerKPIC(Args, CmdArgs);
 
-  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
-                       options::OPT_Xassembler);
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
@@ -7909,7 +7857,8 @@ static void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
   switch (RLT) {
   case ToolChain::RLT_CompilerRT:
     switch (TC.getTriple().getOS()) {
-    default: llvm_unreachable("unsupported OS");
+    default:
+      llvm_unreachable("unsupported OS");
     case llvm::Triple::Win32:
     case llvm::Triple::Linux:
       addClangRT(TC, Args, CmdArgs);
@@ -7970,11 +7919,11 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) {
   }
 }
 
-void gnutools::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                  const InputInfo &Output,
-                                  const InputInfoList &Inputs,
-                                  const ArgList &Args,
-                                  const char *LinkingOutput) const {
+void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                    const InputInfo &Output,
+                                    const InputInfoList &Inputs,
+                                    const ArgList &Args,
+                                    const char *LinkingOutput) const {
   const toolchains::Linux &ToolChain =
       static_cast<const toolchains::Linux &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
@@ -8048,7 +7997,7 @@ void gnutools::Link::ConstructJob(Compilation &C, const JobAction &JA,
       !Args.hasArg(options::OPT_nostartfiles)) {
     if (!isAndroid) {
       const char *crt1 = nullptr;
-      if (!Args.hasArg(options::OPT_shared)){
+      if (!Args.hasArg(options::OPT_shared)) {
         if (Args.hasArg(options::OPT_pg))
           crt1 = "gcrt1.o";
         else if (IsPIE)
@@ -8096,11 +8045,10 @@ void gnutools::Link::ConstructJob(Compilation &C, const JobAction &JA,
   // The profile runtime also needs access to system libraries.
   addProfileRT(getToolChain(), Args, CmdArgs);
 
-  if (D.CCCIsCXX() &&
-      !Args.hasArg(options::OPT_nostdlib) &&
+  if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nodefaultlibs)) {
     bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
-      !Args.hasArg(options::OPT_static);
+                               !Args.hasArg(options::OPT_static);
     if (OnlyLibstdcxxStatic)
       CmdArgs.push_back("-Bstatic");
     ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
@@ -8181,37 +8129,35 @@ void gnutools::Link::ConstructJob(Compilation &C, const JobAction &JA,
       llvm::make_unique<Command>(JA, *this, ToolChain.Linker.c_str(), CmdArgs));
 }
 
-
 // NaCl ARM assembly (inline or standalone) can be written with a set of macros
 // for the various SFI requirements like register masking. The assembly tool
 // inserts the file containing the macros as an input into all the assembly
 // jobs.
-void nacltools::AssembleARM::ConstructJob(Compilation &C, const JobAction &JA,
-                                          const InputInfo &Output,
-                                          const InputInfoList &Inputs,
-                                          const ArgList &Args,
-                                          const char *LinkingOutput) const {
-  const toolchains::NaCl_TC& ToolChain =
-    static_cast<const toolchains::NaCl_TC&>(getToolChain());
+void nacltools::AssemblerARM::ConstructJob(Compilation &C, const JobAction &JA,
+                                           const InputInfo &Output,
+                                           const InputInfoList &Inputs,
+                                           const ArgList &Args,
+                                           const char *LinkingOutput) const {
+  const toolchains::NaCl_TC &ToolChain =
+      static_cast<const toolchains::NaCl_TC &>(getToolChain());
   InputInfo NaClMacros(ToolChain.GetNaClArmMacrosPath(), types::TY_PP_Asm,
                        "nacl-arm-macros.s");
   InputInfoList NewInputs;
   NewInputs.push_back(NaClMacros);
   NewInputs.append(Inputs.begin(), Inputs.end());
-  gnutools::Assemble::ConstructJob(C, JA, Output, NewInputs, Args,
-                                   LinkingOutput);
+  gnutools::Assembler::ConstructJob(C, JA, Output, NewInputs, Args,
+                                    LinkingOutput);
 }
 
-
-// This is quite similar to gnutools::link::ConstructJob with changes that
+// This is quite similar to gnutools::Linker::ConstructJob with changes that
 // we use static by default, do not yet support sanitizers or LTO, and a few
 // others. Eventually we can support more of that and hopefully migrate back
-// to gnutools::link.
-void nacltools::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                   const InputInfo &Output,
-                                   const InputInfoList &Inputs,
-                                   const ArgList &Args,
-                                   const char *LinkingOutput) const {
+// to gnutools::Linker.
+void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                     const InputInfo &Output,
+                                     const InputInfoList &Inputs,
+                                     const ArgList &Args,
+                                     const char *LinkingOutput) const {
 
   const toolchains::NaCl_TC &ToolChain =
       static_cast<const toolchains::NaCl_TC &>(getToolChain());
@@ -8293,11 +8239,10 @@ void nacltools::Link::ConstructJob(Compilation &C, const JobAction &JA,
 
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
 
-  if (D.CCCIsCXX() &&
-      !Args.hasArg(options::OPT_nostdlib) &&
+  if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nodefaultlibs)) {
-    bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
-      !IsStatic;
+    bool OnlyLibstdcxxStatic =
+        Args.hasArg(options::OPT_static_libstdcxx) && !IsStatic;
     if (OnlyLibstdcxxStatic)
       CmdArgs.push_back("-Bstatic");
     ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
@@ -8314,8 +8259,7 @@ void nacltools::Link::ConstructJob(Compilation &C, const JobAction &JA,
       // NaCl's libc++ currently requires libpthread, so just always include it
       // in the group for C++.
       if (Args.hasArg(options::OPT_pthread) ||
-          Args.hasArg(options::OPT_pthreads) ||
-          D.CCCIsCXX()) {
+          Args.hasArg(options::OPT_pthreads) || D.CCCIsCXX()) {
         CmdArgs.push_back("-lpthread");
       }
 
@@ -8341,16 +8285,15 @@ void nacltools::Link::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  C.addCommand(llvm::make_unique<Command>(JA, *this,
-                                          ToolChain.Linker.c_str(), CmdArgs));
+  C.addCommand(
+      llvm::make_unique<Command>(JA, *this, ToolChain.Linker.c_str(), CmdArgs));
 }
 
-
-void minix::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
-                                   const InputInfo &Output,
-                                   const InputInfoList &Inputs,
-                                   const ArgList &Args,
-                                   const char *LinkingOutput) const {
+void minix::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                    const InputInfo &Output,
+                                    const InputInfoList &Inputs,
+                                    const ArgList &Args,
+                                    const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
@@ -8366,11 +8309,11 @@ void minix::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void minix::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                               const InputInfo &Output,
-                               const InputInfoList &Inputs,
-                               const ArgList &Args,
-                               const char *LinkingOutput) const {
+void minix::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                 const InputInfo &Output,
+                                 const InputInfoList &Inputs,
+                                 const ArgList &Args,
+                                 const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
@@ -8383,10 +8326,11 @@ void minix::Link::ConstructJob(Compilation &C, const JobAction &JA,
 
   if (!Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nostartfiles)) {
-      CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crt1.o")));
-      CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
-      CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
-      CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
+    CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crt1.o")));
+    CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
+    CmdArgs.push_back(
+        Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
+    CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
@@ -8413,7 +8357,7 @@ void minix::Link::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-lCompilerRT-Generic");
     CmdArgs.push_back("-L/usr/pkg/compiler-rt/lib");
     CmdArgs.push_back(
-         Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
+        Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
   }
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
@@ -8424,11 +8368,11 @@ void minix::Link::ConstructJob(Compilation &C, const JobAction &JA,
 
 // For now, DragonFly Assemble does just about the same as for
 // FreeBSD, but this may change soon.
-void dragonfly::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
-                                       const InputInfo &Output,
-                                       const InputInfoList &Inputs,
-                                       const ArgList &Args,
-                                       const char *LinkingOutput) const {
+void dragonfly::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                        const InputInfo &Output,
+                                        const InputInfoList &Inputs,
+                                        const ArgList &Args,
+                                        const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
@@ -8449,11 +8393,11 @@ void dragonfly::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void dragonfly::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                   const InputInfo &Output,
-                                   const InputInfoList &Inputs,
-                                   const ArgList &Args,
-                                   const char *LinkingOutput) const {
+void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                     const InputInfo &Output,
+                                     const InputInfoList &Inputs,
+                                     const ArgList &Args,
+                                     const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
   bool UseGCC47 = llvm::sys::fs::exists("/usr/lib/gcc47");
@@ -8494,25 +8438,24 @@ void dragonfly::Link::ConstructJob(Compilation &C, const JobAction &JA,
       !Args.hasArg(options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
-        CmdArgs.push_back(Args.MakeArgString(
-                                getToolChain().GetFilePath("gcrt1.o")));
+        CmdArgs.push_back(
+            Args.MakeArgString(getToolChain().GetFilePath("gcrt1.o")));
       else {
         if (Args.hasArg(options::OPT_pie))
-          CmdArgs.push_back(Args.MakeArgString(
-                                  getToolChain().GetFilePath("Scrt1.o")));
+          CmdArgs.push_back(
+              Args.MakeArgString(getToolChain().GetFilePath("Scrt1.o")));
         else
-          CmdArgs.push_back(Args.MakeArgString(
-                                  getToolChain().GetFilePath("crt1.o")));
+          CmdArgs.push_back(
+              Args.MakeArgString(getToolChain().GetFilePath("crt1.o")));
       }
     }
-    CmdArgs.push_back(Args.MakeArgString(
-                            getToolChain().GetFilePath("crti.o")));
+    CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
     if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtbeginS.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o")));
     else
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtbegin.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
@@ -8581,13 +8524,12 @@ void dragonfly::Link::ConstructJob(Compilation &C, const JobAction &JA,
   if (!Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nostartfiles)) {
     if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtendS.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtendS.o")));
     else
-      CmdArgs.push_back(Args.MakeArgString(
-                              getToolChain().GetFilePath("crtend.o")));
-    CmdArgs.push_back(Args.MakeArgString(
-                            getToolChain().GetFilePath("crtn.o")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
+    CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
   }
 
   addProfileRT(getToolChain(), Args, CmdArgs);
@@ -8616,18 +8558,18 @@ static std::string FindVisualStudioExecutable(const ToolChain &TC,
   return Exe;
 }
 
-void visualstudio::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                      const InputInfo &Output,
-                                      const InputInfoList &Inputs,
-                                      const ArgList &Args,
-                                      const char *LinkingOutput) const {
+void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                        const InputInfo &Output,
+                                        const InputInfoList &Inputs,
+                                        const ArgList &Args,
+                                        const char *LinkingOutput) const {
   ArgStringList CmdArgs;
   const ToolChain &TC = getToolChain();
 
   assert((Output.isFilename() || Output.isNothing()) && "invalid output");
   if (Output.isFilename())
-    CmdArgs.push_back(Args.MakeArgString(std::string("-out:") +
-                                         Output.getFilename()));
+    CmdArgs.push_back(
+        Args.MakeArgString(std::string("-out:") + Output.getFilename()));
 
   if (!Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nostartfiles) && !C.getDriver().IsCLMode())
@@ -8671,16 +8613,14 @@ void visualstudio::Link::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.hasArg(options::OPT_g_Group))
     CmdArgs.push_back("-debug");
 
-  bool DLL = Args.hasArg(options::OPT__SLASH_LD,
-                         options::OPT__SLASH_LDd,
+  bool DLL = Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd,
                          options::OPT_shared);
   if (DLL) {
     CmdArgs.push_back(Args.MakeArgString("-dll"));
 
     SmallString<128> ImplibName(Output.getFilename());
     llvm::sys::path::replace_extension(ImplibName, "lib");
-    CmdArgs.push_back(Args.MakeArgString(std::string("-implib:") +
-                                         ImplibName));
+    CmdArgs.push_back(Args.MakeArgString(std::string("-implib:") + ImplibName));
   }
 
   if (TC.getSanitizerArgs().needsAsanRt()) {
@@ -8688,8 +8628,7 @@ void visualstudio::Link::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Args.MakeArgString("-incremental:no"));
     if (Args.hasArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd)) {
       static const char *CompilerRTComponents[] = {
-        "asan_dynamic",
-        "asan_dynamic_runtime_thunk",
+          "asan_dynamic", "asan_dynamic_runtime_thunk",
       };
       for (const auto &Component : CompilerRTComponents)
         CmdArgs.push_back(Args.MakeArgString(getCompilerRT(TC, Component)));
@@ -8697,11 +8636,11 @@ void visualstudio::Link::ConstructJob(Compilation &C, const JobAction &JA,
       // to ensure proper SEH handling.
       CmdArgs.push_back(Args.MakeArgString("-include:___asan_seh_interceptor"));
     } else if (DLL) {
-      CmdArgs.push_back(Args.MakeArgString(getCompilerRT(TC, "asan_dll_thunk")));
+      CmdArgs.push_back(
+          Args.MakeArgString(getCompilerRT(TC, "asan_dll_thunk")));
     } else {
       static const char *CompilerRTComponents[] = {
-        "asan",
-        "asan_cxx",
+          "asan", "asan_cxx",
       };
       for (const auto &Component : CompilerRTComponents)
         CmdArgs.push_back(Args.MakeArgString(getCompilerRT(TC, Component)));
@@ -8760,21 +8699,21 @@ void visualstudio::Link::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void visualstudio::Compile::ConstructJob(Compilation &C, const JobAction &JA,
-                                         const InputInfo &Output,
-                                         const InputInfoList &Inputs,
-                                         const ArgList &Args,
-                                         const char *LinkingOutput) const {
+void visualstudio::Compiler::ConstructJob(Compilation &C, const JobAction &JA,
+                                          const InputInfo &Output,
+                                          const InputInfoList &Inputs,
+                                          const ArgList &Args,
+                                          const char *LinkingOutput) const {
   C.addCommand(GetCommand(C, JA, Output, Inputs, Args, LinkingOutput));
 }
 
-std::unique_ptr<Command> visualstudio::Compile::GetCommand(
+std::unique_ptr<Command> visualstudio::Compiler::GetCommand(
     Compilation &C, const JobAction &JA, const InputInfo &Output,
     const InputInfoList &Inputs, const ArgList &Args,
     const char *LinkingOutput) const {
   ArgStringList CmdArgs;
   CmdArgs.push_back("/nologo");
-  CmdArgs.push_back("/c"); // Compile only.
+  CmdArgs.push_back("/c");  // Compile only.
   CmdArgs.push_back("/W0"); // No warnings.
 
   // The goal is to be able to invoke this tool correctly based on
@@ -8832,7 +8771,6 @@ std::unique_ptr<Command> visualstudio::Compile::GetCommand(
                                options::OPT__SLASH_MT, options::OPT__SLASH_MTd))
     A->render(Args, CmdArgs);
 
-
   // Input filename.
   assert(Inputs.size() == 1);
   const InputInfo &II = Inputs[0];
@@ -8845,8 +8783,8 @@ std::unique_ptr<Command> visualstudio::Compile::GetCommand(
 
   // Output filename.
   assert(Output.getType() == types::TY_Object);
-  const char *Fo = Args.MakeArgString(std::string("/Fo") +
-                                      Output.getFilename());
+  const char *Fo =
+      Args.MakeArgString(std::string("/Fo") + Output.getFilename());
   CmdArgs.push_back(Fo);
 
   const Driver &D = getToolChain().getDriver();
@@ -8856,15 +8794,220 @@ std::unique_ptr<Command> visualstudio::Compile::GetCommand(
                                     CmdArgs);
 }
 
+/// MinGW Tools
+void MinGW::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                    const InputInfo &Output,
+                                    const InputInfoList &Inputs,
+                                    const ArgList &Args,
+                                    const char *LinkingOutput) const {
+  claimNoWarnArgs(Args);
+  ArgStringList CmdArgs;
+
+  if (getToolChain().getArch() == llvm::Triple::x86) {
+    CmdArgs.push_back("--32");
+  } else if (getToolChain().getArch() == llvm::Triple::x86_64) {
+    CmdArgs.push_back("--64");
+  }
+
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
+
+  CmdArgs.push_back("-o");
+  CmdArgs.push_back(Output.getFilename());
+
+  for (const auto &II : Inputs)
+    CmdArgs.push_back(II.getFilename());
+
+  const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+
+  if (Args.hasArg(options::OPT_gsplit_dwarf))
+    SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output,
+                   SplitDebugName(Args, Inputs[0]));
+}
+
+void MinGW::Linker::AddLibGCC(const ArgList &Args,
+                              ArgStringList &CmdArgs) const {
+  if (Args.hasArg(options::OPT_mthreads))
+    CmdArgs.push_back("-lmingwthrd");
+  CmdArgs.push_back("-lmingw32");
+  if (Args.hasArg(options::OPT_shared) ||
+      Args.hasArg(options::OPT_shared_libgcc) ||
+      !Args.hasArg(options::OPT_static_libgcc)) {
+    CmdArgs.push_back("-lgcc_s");
+    CmdArgs.push_back("-lgcc");
+  } else {
+    CmdArgs.push_back("-lgcc");
+    CmdArgs.push_back("-lgcc_eh");
+  }
+  CmdArgs.push_back("-lmoldname");
+  CmdArgs.push_back("-lmingwex");
+  CmdArgs.push_back("-lmsvcrt");
+}
+
+void MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                 const InputInfo &Output,
+                                 const InputInfoList &Inputs,
+                                 const ArgList &Args,
+                                 const char *LinkingOutput) const {
+  const ToolChain &TC = getToolChain();
+  const Driver &D = TC.getDriver();
+  // const SanitizerArgs &Sanitize = TC.getSanitizerArgs();
+
+  ArgStringList CmdArgs;
+
+  // Silence warning for "clang -g foo.o -o foo"
+  Args.ClaimAllArgs(options::OPT_g_Group);
+  // and "clang -emit-llvm foo.o -o foo"
+  Args.ClaimAllArgs(options::OPT_emit_llvm);
+  // and for "clang -w foo.o -o foo". Other warning options are already
+  // handled somewhere else.
+  Args.ClaimAllArgs(options::OPT_w);
+
+  if (!D.SysRoot.empty())
+    CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
+
+  if (Args.hasArg(options::OPT_s))
+    CmdArgs.push_back("-s");
+
+  CmdArgs.push_back("-m");
+  if (TC.getArch() == llvm::Triple::x86)
+    CmdArgs.push_back("i386pe");
+  if (TC.getArch() == llvm::Triple::x86_64)
+    CmdArgs.push_back("i386pep");
+
+  if (Args.hasArg(options::OPT_mwindows)) {
+    CmdArgs.push_back("--subsystem");
+    CmdArgs.push_back("windows");
+  } else if (Args.hasArg(options::OPT_mconsole)) {
+    CmdArgs.push_back("--subsystem");
+    CmdArgs.push_back("console");
+  }
+
+  if (Args.hasArg(options::OPT_static))
+    CmdArgs.push_back("-Bstatic");
+  else {
+    if (Args.hasArg(options::OPT_mdll))
+      CmdArgs.push_back("--dll");
+    else if (Args.hasArg(options::OPT_shared))
+      CmdArgs.push_back("--shared");
+    CmdArgs.push_back("-Bdynamic");
+    if (Args.hasArg(options::OPT_mdll) || Args.hasArg(options::OPT_shared)) {
+      CmdArgs.push_back("-e");
+      if (TC.getArch() == llvm::Triple::x86)
+        CmdArgs.push_back("_DllMainCRTStartup@12");
+      else
+        CmdArgs.push_back("DllMainCRTStartup");
+      CmdArgs.push_back("--enable-auto-image-base");
+    }
+  }
+
+  CmdArgs.push_back("-o");
+  CmdArgs.push_back(Output.getFilename());
+
+  Args.AddAllArgs(CmdArgs, options::OPT_e);
+  // FIXME: add -N, -n flags
+  Args.AddLastArg(CmdArgs, options::OPT_r);
+  Args.AddLastArg(CmdArgs, options::OPT_s);
+  Args.AddLastArg(CmdArgs, options::OPT_t);
+  Args.AddAllArgs(CmdArgs, options::OPT_u_Group);
+  Args.AddLastArg(CmdArgs, options::OPT_Z_Flag);
+
+  if (!Args.hasArg(options::OPT_nostdlib) &&
+      !Args.hasArg(options::OPT_nostartfiles)) {
+    if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_mdll)) {
+      CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("dllcrt2.o")));
+    } else {
+      if (Args.hasArg(options::OPT_municode))
+        CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt2u.o")));
+      else
+        CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt2.o")));
+    }
+    if (Args.hasArg(options::OPT_pg))
+      CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("gcrt2.o")));
+    CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtbegin.o")));
+  }
+
+  Args.AddAllArgs(CmdArgs, options::OPT_L);
+  const ToolChain::path_list Paths = TC.getFilePaths();
+  for (const auto &Path : Paths)
+    CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + Path));
+
+  AddLinkerInputs(TC, Inputs, Args, CmdArgs);
+
+  // TODO: Add ASan stuff here
+
+  // TODO: Add profile stuff here
+
+  if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib) &&
+      !Args.hasArg(options::OPT_nodefaultlibs)) {
+    bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
+                               !Args.hasArg(options::OPT_static);
+    if (OnlyLibstdcxxStatic)
+      CmdArgs.push_back("-Bstatic");
+    TC.AddCXXStdlibLibArgs(Args, CmdArgs);
+    if (OnlyLibstdcxxStatic)
+      CmdArgs.push_back("-Bdynamic");
+  }
+
+  if (!Args.hasArg(options::OPT_nostdlib)) {
+    if (!Args.hasArg(options::OPT_nodefaultlibs)) {
+      if (Args.hasArg(options::OPT_static))
+        CmdArgs.push_back("--start-group");
+
+      if (Args.hasArg(options::OPT_fstack_protector) ||
+          Args.hasArg(options::OPT_fstack_protector_strong) ||
+          Args.hasArg(options::OPT_fstack_protector_all)) {
+        CmdArgs.push_back("-lssp_nonshared");
+        CmdArgs.push_back("-lssp");
+      }
+      if (Args.hasArg(options::OPT_fopenmp))
+        CmdArgs.push_back("-lgomp");
+
+      AddLibGCC(Args, CmdArgs);
+
+      if (Args.hasArg(options::OPT_pg))
+        CmdArgs.push_back("-lgmon");
+
+      // FIXME: what to do about pthreads library?
+      // Currently required for OpenMP and posix-threading libgcc, 
+      // does not exists in mingw.org.
+      //CmdArgs.push_back("-lpthread");
+
+      // add system libraries
+      if (Args.hasArg(options::OPT_mwindows)) {
+        CmdArgs.push_back("-lgdi32");
+        CmdArgs.push_back("-lcomdlg32");
+      }
+      CmdArgs.push_back("-ladvapi32");
+      CmdArgs.push_back("-lshell32");
+      CmdArgs.push_back("-luser32");
+      CmdArgs.push_back("-lkernel32");
+
+      if (Args.hasArg(options::OPT_static))
+        CmdArgs.push_back("--end-group");
+      else
+        AddLibGCC(Args, CmdArgs);
+    }
+
+    if (!Args.hasArg(options::OPT_nostartfiles)) {
+      // Add crtfastmath.o if available and fast math is enabled.
+      TC.AddFastMathRuntimeIfAvailable(Args, CmdArgs);
+
+      CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtend.o")));
+    }
+  }
+  const char *Exec = Args.MakeArgString(TC.GetProgramPath("ld"));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+}
 
 /// XCore Tools
 // We pass assemble and link construction to the xcc tool.
 
-void XCore::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
-                                       const InputInfo &Output,
-                                       const InputInfoList &Inputs,
-                                       const ArgList &Args,
-                                       const char *LinkingOutput) const {
+void XCore::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                    const InputInfo &Output,
+                                    const InputInfoList &Inputs,
+                                    const ArgList &Args,
+                                    const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
 
@@ -8884,8 +9027,7 @@ void XCore::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
                    false))
     CmdArgs.push_back("-fverbose-asm");
 
-  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
-                       options::OPT_Xassembler);
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   for (const auto &II : Inputs)
     CmdArgs.push_back(II.getFilename());
@@ -8894,11 +9036,11 @@ void XCore::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void XCore::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                   const InputInfo &Output,
-                                   const InputInfoList &Inputs,
-                                   const ArgList &Args,
-                                   const char *LinkingOutput) const {
+void XCore::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                 const InputInfo &Output,
+                                 const InputInfoList &Inputs,
+                                 const ArgList &Args,
+                                 const char *LinkingOutput) const {
   ArgStringList CmdArgs;
 
   if (Output.isFilename()) {
@@ -8920,11 +9062,11 @@ void XCore::Link::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void CrossWindows::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
-                                          const InputInfo &Output,
-                                          const InputInfoList &Inputs,
-                                          const ArgList &Args,
-                                          const char *LinkingOutput) const {
+void CrossWindows::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                           const InputInfo &Output,
+                                           const InputInfoList &Inputs,
+                                           const ArgList &Args,
+                                           const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   const auto &TC =
       static_cast<const toolchains::CrossWindowsToolChain &>(getToolChain());
@@ -8932,7 +9074,8 @@ void CrossWindows::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   const char *Exec;
 
   switch (TC.getArch()) {
-  default: llvm_unreachable("unsupported architecture");
+  default:
+    llvm_unreachable("unsupported architecture");
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
     break;
@@ -8958,11 +9101,11 @@ void CrossWindows::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void CrossWindows::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                      const InputInfo &Output,
-                                      const InputInfoList &Inputs,
-                                      const ArgList &Args,
-                                      const char *LinkingOutput) const {
+void CrossWindows::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                        const InputInfo &Output,
+                                        const InputInfoList &Inputs,
+                                        const ArgList &Args,
+                                        const char *LinkingOutput) const {
   const auto &TC =
       static_cast<const toolchains::CrossWindowsToolChain &>(getToolChain());
   const llvm::Triple &T = TC.getTriple();
@@ -8991,7 +9134,8 @@ void CrossWindows::Link::ConstructJob(Compilation &C, const JobAction &JA,
 
   CmdArgs.push_back("-m");
   switch (TC.getArch()) {
-  default: llvm_unreachable("unsupported architecture");
+  default:
+    llvm_unreachable("unsupported architecture");
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
     // FIXME: this is incorrect for WinCE
@@ -9008,7 +9152,8 @@ void CrossWindows::Link::ConstructJob(Compilation &C, const JobAction &JA,
 
   if (Args.hasArg(options::OPT_shared)) {
     switch (T.getArch()) {
-    default: llvm_unreachable("unsupported architecture");
+    default:
+      llvm_unreachable("unsupported architecture");
     case llvm::Triple::arm:
     case llvm::Triple::thumb:
     case llvm::Triple::x86_64:
@@ -9098,11 +9243,11 @@ void CrossWindows::Link::ConstructJob(Compilation &C, const JobAction &JA,
   C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
 }
 
-void tools::SHAVE::Compile::ConstructJob(Compilation &C, const JobAction &JA,
-                                         const InputInfo &Output,
-                                         const InputInfoList &Inputs,
-                                         const ArgList &Args,
-                                         const char *LinkingOutput) const {
+void tools::SHAVE::Compiler::ConstructJob(Compilation &C, const JobAction &JA,
+                                          const InputInfo &Output,
+                                          const InputInfoList &Inputs,
+                                          const ArgList &Args,
+                                          const char *LinkingOutput) const {
 
   ArgStringList CmdArgs;
 
@@ -9143,12 +9288,11 @@ void tools::SHAVE::Compile::ConstructJob(Compilation &C, const JobAction &JA,
       llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec), CmdArgs));
 }
 
-void tools::SHAVE::Assemble::ConstructJob(Compilation &C,
-                                          const JobAction &JA,
-                                          const InputInfo &Output,
-                                          const InputInfoList &Inputs,
-                                          const ArgList &Args,
-                                          const char *LinkingOutput) const {
+void tools::SHAVE::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                           const InputInfo &Output,
+                                           const InputInfoList &Inputs,
+                                           const ArgList &Args,
+                                           const char *LinkingOutput) const {
   ArgStringList CmdArgs;
 
   assert(Inputs.size() == 1);
diff --git a/lib/Driver/Tools.h b/lib/Driver/Tools.h
index 753f542..52ab731 100644
--- a/lib/Driver/Tools.h
+++ b/lib/Driver/Tools.h
@@ -32,11 +32,14 @@ namespace toolchains {
 namespace tools {
 
 namespace visualstudio {
-  class Compile;
+class Compiler;
 }
 
 using llvm::opt::ArgStringList;
 
+SmallString<128> getCompilerRT(const ToolChain &TC, StringRef Component,
+                               bool Shared = false);
+
   /// \brief Clang compiler tool.
   class LLVM_LIBRARY_VISIBILITY Clang : public Tool {
   public:
@@ -86,11 +89,14 @@ using llvm::opt::ArgStringList;
     void AddClangCLArgs(const llvm::opt::ArgList &Args,
                         llvm::opt::ArgStringList &CmdArgs) const;
 
-    visualstudio::Compile *getCLFallback() const;
+    visualstudio::Compiler *getCLFallback() const;
 
-    mutable std::unique_ptr<visualstudio::Compile> CLFallback;
+    mutable std::unique_ptr<visualstudio::Compiler> CLFallback;
 
   public:
+    // CAUTION! The first constructor argument ("clang") is not arbitrary,
+    // as it is for other tools. Some operations on a Tool actually test
+    // whether that tool is Clang based on the Tool's Name as a string.
     Clang(const ToolChain &TC) : Tool("clang", "clang frontend", TC, RF_Full) {}
 
     bool hasGoodDiagnostics() const override { return true; }
@@ -124,7 +130,7 @@ using llvm::opt::ArgStringList;
 
   /// \brief Base class for all GNU tools that provide the same behavior when
   /// it comes to response files support
-  class GnuTool : public Tool {
+  class LLVM_LIBRARY_VISIBILITY GnuTool : public Tool {
     virtual void anchor();
 
   public:
@@ -148,14 +154,14 @@ namespace gcc {
     /// RenderExtraToolArgs - Render any arguments necessary to force
     /// the particular tool mode.
     virtual void
-        RenderExtraToolArgs(const JobAction &JA,
-                            llvm::opt::ArgStringList &CmdArgs) const = 0;
+    RenderExtraToolArgs(const JobAction &JA,
+                        llvm::opt::ArgStringList &CmdArgs) const = 0;
   };
 
-  class LLVM_LIBRARY_VISIBILITY Preprocess : public Common {
+  class LLVM_LIBRARY_VISIBILITY Preprocessor : public Common {
   public:
-    Preprocess(const ToolChain &TC) : Common("gcc::Preprocess",
-                                             "gcc preprocessor", TC) {}
+    Preprocessor(const ToolChain &TC)
+        : Common("gcc::Preprocessor", "gcc preprocessor", TC) {}
 
     bool hasGoodDiagnostics() const override { return true; }
     bool hasIntegratedCPP() const override { return false; }
@@ -164,10 +170,10 @@ namespace gcc {
                              llvm::opt::ArgStringList &CmdArgs) const override;
   };
 
-  class LLVM_LIBRARY_VISIBILITY Compile : public Common  {
+  class LLVM_LIBRARY_VISIBILITY Compiler : public Common {
   public:
-    Compile(const ToolChain &TC) : Common("gcc::Compile",
-                                          "gcc frontend", TC) {}
+    Compiler(const ToolChain &TC)
+        : Common("gcc::Compiler", "gcc frontend", TC) {}
 
     bool hasGoodDiagnostics() const override { return true; }
     bool hasIntegratedCPP() const override { return true; }
@@ -176,10 +182,10 @@ namespace gcc {
                              llvm::opt::ArgStringList &CmdArgs) const override;
   };
 
-  class LLVM_LIBRARY_VISIBILITY Link : public Common  {
+  class LLVM_LIBRARY_VISIBILITY Linker : public Common {
   public:
-    Link(const ToolChain &TC) : Common("gcc::Link",
-                                       "linker (via gcc)", TC) {}
+    Linker(const ToolChain &TC)
+        : Common("gcc::Linker", "linker (via gcc)", TC) {}
 
     bool hasIntegratedCPP() const override { return false; }
     bool isLinkJob() const override { return true; }
@@ -190,14 +196,15 @@ namespace gcc {
 } // end namespace gcc
 
 namespace hexagon {
-  // For Hexagon, we do not need to instantiate tools for PreProcess, PreCompile and Compile.
-  // We simply use "clang -cc1" for those actions.
-  class LLVM_LIBRARY_VISIBILITY Assemble : public GnuTool {
-  public:
-    Assemble(const ToolChain &TC) : GnuTool("hexagon::Assemble",
-      "hexagon-as", TC) {}
+// For Hexagon, we do not need to instantiate tools for PreProcess, PreCompile
+// and Compile.
+// We simply use "clang -cc1" for those actions.
+class LLVM_LIBRARY_VISIBILITY Assembler : public GnuTool {
+public:
+  Assembler(const ToolChain &TC)
+      : GnuTool("hexagon::Assembler", "hexagon-as", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
 
     void RenderExtraToolArgs(const JobAction &JA,
                              llvm::opt::ArgStringList &CmdArgs) const;
@@ -205,14 +212,13 @@ namespace hexagon {
                       const InputInfo &Output, const InputInfoList &Inputs,
                       const llvm::opt::ArgList &TCArgs,
                       const char *LinkingOutput) const override;
-  };
+};
 
-  class LLVM_LIBRARY_VISIBILITY Link : public GnuTool {
-  public:
-    Link(const ToolChain &TC) : GnuTool("hexagon::Link",
-      "hexagon-ld", TC) {}
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
+public:
+  Linker(const ToolChain &TC) : GnuTool("hexagon::Linker", "hexagon-ld", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
     bool isLinkJob() const override { return true; }
 
     virtual void RenderExtraToolArgs(const JobAction &JA,
@@ -258,11 +264,11 @@ namespace ppc {
   bool hasPPCAbiArg(const llvm::opt::ArgList &Args, const char *Value);
 }
 
-  /// cloudabi -- Directly call GNU Binutils linker
+/// cloudabi -- Directly call GNU Binutils linker
 namespace cloudabi {
-class LLVM_LIBRARY_VISIBILITY Link : public GnuTool {
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
 public:
-  Link(const ToolChain &TC) : GnuTool("cloudabi::Link", "linker", TC) {}
+  Linker(const ToolChain &TC) : GnuTool("cloudabi::Linker", "linker", TC) {}
 
   bool hasIntegratedCPP() const override { return false; }
   bool isLinkJob() const override { return true; }
@@ -289,19 +295,19 @@ namespace darwin {
     }
 
   public:
-  MachOTool(
-      const char *Name, const char *ShortName, const ToolChain &TC,
-      ResponseFileSupport ResponseSupport = RF_None,
-      llvm::sys::WindowsEncodingMethod ResponseEncoding = llvm::sys::WEM_UTF8,
-      const char *ResponseFlag = "@")
-      : Tool(Name, ShortName, TC, ResponseSupport, ResponseEncoding,
-             ResponseFlag) {}
+    MachOTool(
+        const char *Name, const char *ShortName, const ToolChain &TC,
+        ResponseFileSupport ResponseSupport = RF_None,
+        llvm::sys::WindowsEncodingMethod ResponseEncoding = llvm::sys::WEM_UTF8,
+        const char *ResponseFlag = "@")
+        : Tool(Name, ShortName, TC, ResponseSupport, ResponseEncoding,
+               ResponseFlag) {}
   };
 
-  class LLVM_LIBRARY_VISIBILITY Assemble : public MachOTool  {
+  class LLVM_LIBRARY_VISIBILITY Assembler : public MachOTool {
   public:
-    Assemble(const ToolChain &TC) : MachOTool("darwin::Assemble",
-                                              "assembler", TC) {}
+    Assembler(const ToolChain &TC)
+        : MachOTool("darwin::Assembler", "assembler", TC) {}
 
     bool hasIntegratedCPP() const override { return false; }
 
@@ -311,16 +317,16 @@ namespace darwin {
                       const char *LinkingOutput) const override;
   };
 
-  class LLVM_LIBRARY_VISIBILITY Link : public MachOTool  {
+  class LLVM_LIBRARY_VISIBILITY Linker : public MachOTool {
     bool NeedsTempPath(const InputInfoList &Inputs) const;
     void AddLinkArgs(Compilation &C, const llvm::opt::ArgList &Args,
                      llvm::opt::ArgStringList &CmdArgs,
                      const InputInfoList &Inputs) const;
 
   public:
-    Link(const ToolChain &TC) : MachOTool("darwin::Link", "linker", TC,
-                                          RF_FileList, llvm::sys::WEM_UTF8,
-                                          "-filelist") {}
+    Linker(const ToolChain &TC)
+        : MachOTool("darwin::Linker", "linker", TC, RF_FileList,
+                    llvm::sys::WEM_UTF8, "-filelist") {}
 
     bool hasIntegratedCPP() const override { return false; }
     bool isLinkJob() const override { return true; }
@@ -373,27 +379,26 @@ namespace darwin {
 
 }
 
-  /// openbsd -- Directly call GNU Binutils assembler and linker
+/// openbsd -- Directly call GNU Binutils assembler and linker
 namespace openbsd {
-  class LLVM_LIBRARY_VISIBILITY Assemble : public GnuTool  {
-  public:
-    Assemble(const ToolChain &TC) : GnuTool("openbsd::Assemble", "assembler",
-                                         TC) {}
+class LLVM_LIBRARY_VISIBILITY Assembler : public GnuTool {
+public:
+  Assembler(const ToolChain &TC)
+      : GnuTool("openbsd::Assembler", "assembler", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output,
-                      const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
-  class LLVM_LIBRARY_VISIBILITY Link : public GnuTool  {
-  public:
-    Link(const ToolChain &TC) : GnuTool("openbsd::Link", "linker", TC) {}
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
+public:
+  Linker(const ToolChain &TC) : GnuTool("openbsd::Linker", "linker", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
 
     void ConstructJob(Compilation &C, const JobAction &JA,
                       const InputInfo &Output, const InputInfoList &Inputs,
@@ -402,26 +407,26 @@ namespace openbsd {
   };
 } // end namespace openbsd
 
-  /// bitrig -- Directly call GNU Binutils assembler and linker
+/// bitrig -- Directly call GNU Binutils assembler and linker
 namespace bitrig {
-  class LLVM_LIBRARY_VISIBILITY Assemble : public GnuTool  {
-  public:
-    Assemble(const ToolChain &TC) : GnuTool("bitrig::Assemble", "assembler",
-                                         TC) {}
+class LLVM_LIBRARY_VISIBILITY Assembler : public GnuTool {
+public:
+  Assembler(const ToolChain &TC)
+      : GnuTool("bitrig::Assembler", "assembler", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
-  class LLVM_LIBRARY_VISIBILITY Link : public GnuTool  {
-  public:
-    Link(const ToolChain &TC) : GnuTool("bitrig::Link", "linker", TC) {}
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
+public:
+  Linker(const ToolChain &TC) : GnuTool("bitrig::Linker", "linker", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
 
     void ConstructJob(Compilation &C, const JobAction &JA,
                       const InputInfo &Output, const InputInfoList &Inputs,
@@ -430,26 +435,26 @@ namespace bitrig {
   };
 } // end namespace bitrig
 
-  /// freebsd -- Directly call GNU Binutils assembler and linker
+/// freebsd -- Directly call GNU Binutils assembler and linker
 namespace freebsd {
-  class LLVM_LIBRARY_VISIBILITY Assemble : public GnuTool  {
-  public:
-    Assemble(const ToolChain &TC) : GnuTool("freebsd::Assemble", "assembler",
-                                         TC) {}
+class LLVM_LIBRARY_VISIBILITY Assembler : public GnuTool {
+public:
+  Assembler(const ToolChain &TC)
+      : GnuTool("freebsd::Assembler", "assembler", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
-  class LLVM_LIBRARY_VISIBILITY Link : public GnuTool  {
-  public:
-    Link(const ToolChain &TC) : GnuTool("freebsd::Link", "linker", TC) {}
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
+public:
+  Linker(const ToolChain &TC) : GnuTool("freebsd::Linker", "linker", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
 
     void ConstructJob(Compilation &C, const JobAction &JA,
                       const InputInfo &Output, const InputInfoList &Inputs,
@@ -458,29 +463,28 @@ namespace freebsd {
   };
 } // end namespace freebsd
 
-  /// netbsd -- Directly call GNU Binutils assembler and linker
+/// netbsd -- Directly call GNU Binutils assembler and linker
 namespace netbsd {
-  class LLVM_LIBRARY_VISIBILITY Assemble : public GnuTool  {
+class LLVM_LIBRARY_VISIBILITY Assembler : public GnuTool {
 
-  public:
-    Assemble(const ToolChain &TC)
-      : GnuTool("netbsd::Assemble", "assembler", TC) {}
+public:
+  Assembler(const ToolChain &TC)
+      : GnuTool("netbsd::Assembler", "assembler", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
-  class LLVM_LIBRARY_VISIBILITY Link : public GnuTool  {
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
 
-  public:
-    Link(const ToolChain &TC)
-      : GnuTool("netbsd::Link", "linker", TC) {}
+public:
+  Linker(const ToolChain &TC) : GnuTool("netbsd::Linker", "linker", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
 
     void ConstructJob(Compilation &C, const JobAction &JA,
                       const InputInfo &Output, const InputInfoList &Inputs,
@@ -489,26 +493,25 @@ namespace netbsd {
   };
 } // end namespace netbsd
 
-  /// Directly call GNU Binutils' assembler and linker.
+/// Directly call GNU Binutils' assembler and linker.
 namespace gnutools {
-  class LLVM_LIBRARY_VISIBILITY Assemble : public GnuTool  {
-  public:
-    Assemble(const ToolChain &TC) : GnuTool("GNU::Assemble", "assembler", TC) {}
+class LLVM_LIBRARY_VISIBILITY Assembler : public GnuTool {
+public:
+  Assembler(const ToolChain &TC) : GnuTool("GNU::Assembler", "assembler", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output,
-                      const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
-  class LLVM_LIBRARY_VISIBILITY Link : public GnuTool  {
-  public:
-    Link(const ToolChain &TC) : GnuTool("GNU::Link", "linker", TC) {}
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
+public:
+  Linker(const ToolChain &TC) : GnuTool("GNU::Linker", "linker", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
 
     void ConstructJob(Compilation &C, const JobAction &JA,
                       const InputInfo &Output,
@@ -516,22 +519,21 @@ namespace gnutools {
                       const llvm::opt::ArgList &TCArgs,
                       const char *LinkingOutput) const override;
   };
-}
+  }
 
-namespace nacltools {
-  class LLVM_LIBRARY_VISIBILITY AssembleARM : public gnutools::Assemble  {
+  namespace nacltools {
+  class LLVM_LIBRARY_VISIBILITY AssemblerARM : public gnutools::Assembler {
   public:
-    AssembleARM(const ToolChain &TC) : gnutools::Assemble(TC) {}
+    AssemblerARM(const ToolChain &TC) : gnutools::Assembler(TC) {}
 
     void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output,
-                      const InputInfoList &Inputs,
+                      const InputInfo &Output, const InputInfoList &Inputs,
                       const llvm::opt::ArgList &TCArgs,
                       const char *LinkingOutput) const override;
   };
-  class LLVM_LIBRARY_VISIBILITY Link : public Tool  {
+  class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
   public:
-    Link(const ToolChain &TC) : Tool("NaCl::Link", "linker", TC) {}
+    Linker(const ToolChain &TC) : Tool("NaCl::Linker", "linker", TC) {}
 
     bool hasIntegratedCPP() const override { return false; }
     bool isLinkJob() const override { return true; }
@@ -544,27 +546,26 @@ namespace nacltools {
   };
 }
 
-  /// minix -- Directly call GNU Binutils assembler and linker
+/// minix -- Directly call GNU Binutils assembler and linker
 namespace minix {
-  class LLVM_LIBRARY_VISIBILITY Assemble : public GnuTool  {
-  public:
-    Assemble(const ToolChain &TC) : GnuTool("minix::Assemble", "assembler",
-                                         TC) {}
+class LLVM_LIBRARY_VISIBILITY Assembler : public GnuTool {
+public:
+  Assembler(const ToolChain &TC)
+      : GnuTool("minix::Assembler", "assembler", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output,
-                      const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
-  class LLVM_LIBRARY_VISIBILITY Link : public GnuTool  {
-  public:
-    Link(const ToolChain &TC) : GnuTool("minix::Link", "linker", TC) {}
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
+public:
+  Linker(const ToolChain &TC) : GnuTool("minix::Linker", "linker", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
 
     void ConstructJob(Compilation &C, const JobAction &JA,
                       const InputInfo &Output,
@@ -574,26 +575,26 @@ namespace minix {
   };
 } // end namespace minix
 
-  /// solaris -- Directly call Solaris assembler and linker
+/// solaris -- Directly call Solaris assembler and linker
 namespace solaris {
-  class LLVM_LIBRARY_VISIBILITY Assemble : public Tool  {
-  public:
-    Assemble(const ToolChain &TC) : Tool("solaris::Assemble", "assembler",
-                                         TC) {}
+class LLVM_LIBRARY_VISIBILITY Assembler : public Tool {
+public:
+  Assembler(const ToolChain &TC)
+      : Tool("solaris::Assembler", "assembler", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
-  class LLVM_LIBRARY_VISIBILITY Link : public Tool  {
-  public:
-    Link(const ToolChain &TC) : Tool("solaris::Link", "linker", TC) {}
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
+public:
+  Linker(const ToolChain &TC) : Tool("solaris::Linker", "linker", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
 
     void ConstructJob(Compilation &C, const JobAction &JA,
                       const InputInfo &Output, const InputInfoList &Inputs,
@@ -602,26 +603,26 @@ namespace solaris {
   };
 } // end namespace solaris
 
-  /// dragonfly -- Directly call GNU Binutils assembler and linker
+/// dragonfly -- Directly call GNU Binutils assembler and linker
 namespace dragonfly {
-  class LLVM_LIBRARY_VISIBILITY Assemble : public GnuTool  {
-  public:
-    Assemble(const ToolChain &TC) : GnuTool("dragonfly::Assemble", "assembler",
-                                         TC) {}
+class LLVM_LIBRARY_VISIBILITY Assembler : public GnuTool {
+public:
+  Assembler(const ToolChain &TC)
+      : GnuTool("dragonfly::Assembler", "assembler", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
-  class LLVM_LIBRARY_VISIBILITY Link : public GnuTool  {
-  public:
-    Link(const ToolChain &TC) : GnuTool("dragonfly::Link", "linker", TC) {}
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
+public:
+  Linker(const ToolChain &TC) : GnuTool("dragonfly::Linker", "linker", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
 
     void ConstructJob(Compilation &C, const JobAction &JA,
                       const InputInfo &Output,
@@ -633,30 +634,31 @@ namespace dragonfly {
 
 /// Visual studio tools.
 namespace visualstudio {
-  VersionTuple getMSVCVersion(const Driver *D, const llvm::Triple &Triple,
-                              const llvm::opt::ArgList &Args,
-                              bool IsWindowsMSVC);
+VersionTuple getMSVCVersion(const Driver *D, const llvm::Triple &Triple,
+                            const llvm::opt::ArgList &Args, bool IsWindowsMSVC);
 
-  class LLVM_LIBRARY_VISIBILITY Link : public Tool {
-  public:
-    Link(const ToolChain &TC) : Tool("visualstudio::Link", "linker", TC,
-                                     RF_Full, llvm::sys::WEM_UTF16) {}
+class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
+public:
+  Linker(const ToolChain &TC)
+      : Tool("visualstudio::Linker", "linker", TC, RF_Full,
+             llvm::sys::WEM_UTF16) {}
 
-    bool hasIntegratedCPP() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
     bool isLinkJob() const override { return true; }
 
     void ConstructJob(Compilation &C, const JobAction &JA,
                       const InputInfo &Output, const InputInfoList &Inputs,
                       const llvm::opt::ArgList &TCArgs,
                       const char *LinkingOutput) const override;
-  };
+};
 
-  class LLVM_LIBRARY_VISIBILITY Compile : public Tool {
-  public:
-    Compile(const ToolChain &TC) : Tool("visualstudio::Compile", "compiler", TC,
-                                        RF_Full, llvm::sys::WEM_UTF16) {}
+class LLVM_LIBRARY_VISIBILITY Compiler : public Tool {
+public:
+  Compiler(const ToolChain &TC)
+      : Tool("visualstudio::Compiler", "compiler", TC, RF_Full,
+             llvm::sys::WEM_UTF16) {}
 
-    bool hasIntegratedAssembler() const override { return true; }
+  bool hasIntegratedAssembler() const override { return true; }
     bool hasIntegratedCPP() const override { return true; }
     bool isLinkJob() const override { return false; }
 
@@ -673,58 +675,90 @@ namespace visualstudio {
   };
 } // end namespace visualstudio
 
+/// MinGW -- Directly call GNU Binutils assembler and linker
+namespace MinGW {
+class LLVM_LIBRARY_VISIBILITY Assembler : public Tool {
+public:
+  Assembler(const ToolChain &TC) : Tool("MinGW::Assemble", "assembler", TC) {}
+
+  bool hasIntegratedCPP() const override { return false; }
+
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+
+class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
+public:
+  Linker(const ToolChain &TC) : Tool("MinGW::Linker", "linker", TC) {}
+
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
+
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+
+private:
+  void AddLibGCC(const llvm::opt::ArgList &Args, ArgStringList &CmdArgs) const;
+};
+} // end namespace MinGW
+
 namespace arm {
   StringRef getARMFloatABI(const Driver &D, const llvm::opt::ArgList &Args,
                          const llvm::Triple &Triple);
 }
 namespace XCore {
-  // For XCore, we do not need to instantiate tools for PreProcess, PreCompile and Compile.
-  // We simply use "clang -cc1" for those actions.
-  class LLVM_LIBRARY_VISIBILITY Assemble : public Tool {
-  public:
-    Assemble(const ToolChain &TC) : Tool("XCore::Assemble",
-      "XCore-as", TC) {}
+// For XCore, we do not need to instantiate tools for PreProcess, PreCompile and
+// Compile.
+// We simply use "clang -cc1" for those actions.
+class LLVM_LIBRARY_VISIBILITY Assembler : public Tool {
+public:
+  Assembler(const ToolChain &TC) : Tool("XCore::Assembler", "XCore-as", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+
+class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
+public:
+  Linker(const ToolChain &TC) : Tool("XCore::Linker", "XCore-ld", TC) {}
+
+  bool hasIntegratedCPP() const override { return false; }
+    bool isLinkJob() const override { return true; }
     void ConstructJob(Compilation &C, const JobAction &JA,
                       const InputInfo &Output, const InputInfoList &Inputs,
                       const llvm::opt::ArgList &TCArgs,
                       const char *LinkingOutput) const override;
   };
+  } // end namespace XCore.
 
-  class LLVM_LIBRARY_VISIBILITY Link : public Tool {
+  namespace CrossWindows {
+  class LLVM_LIBRARY_VISIBILITY Assembler : public Tool {
   public:
-    Link(const ToolChain &TC) : Tool("XCore::Link",
-      "XCore-ld", TC) {}
+    Assembler(const ToolChain &TC)
+        : Tool("CrossWindows::Assembler", "as", TC) {}
 
     bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+
     void ConstructJob(Compilation &C, const JobAction &JA,
                       const InputInfo &Output, const InputInfoList &Inputs,
                       const llvm::opt::ArgList &TCArgs,
                       const char *LinkingOutput) const override;
   };
-} // end namespace XCore.
-
-namespace CrossWindows {
-class LLVM_LIBRARY_VISIBILITY Assemble : public Tool {
-public:
-  Assemble(const ToolChain &TC) : Tool("CrossWindows::Assemble", "as", TC) { }
-
-  bool hasIntegratedCPP() const override { return false; }
 
-  void ConstructJob(Compilation &C, const JobAction &JA,
-                    const InputInfo &Output, const InputInfoList &Inputs,
-                    const llvm::opt::ArgList &TCArgs,
-                    const char *LinkingOutput) const override;
-};
-
-class LLVM_LIBRARY_VISIBILITY Link : public Tool {
-public:
-  Link(const ToolChain &TC) : Tool("CrossWindows::Link", "ld", TC, RF_Full) {}
+  class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
+  public:
+    Linker(const ToolChain &TC)
+        : Tool("CrossWindows::Linker", "ld", TC, RF_Full) {}
 
-  bool hasIntegratedCPP() const override { return false; }
-  bool isLinkJob() const override { return true; }
+    bool hasIntegratedCPP() const override { return false; }
+    bool isLinkJob() const override { return true; }
 
   void ConstructJob(Compilation &C, const JobAction &JA,
                     const InputInfo &Output, const InputInfoList &Inputs,
@@ -735,9 +769,9 @@ public:
 
 /// SHAVE tools -- Directly call moviCompile and moviAsm
 namespace SHAVE {
-class LLVM_LIBRARY_VISIBILITY Compile : public Tool {
+class LLVM_LIBRARY_VISIBILITY Compiler : public Tool {
 public:
-  Compile(const ToolChain &TC) : Tool("moviCompile", "movicompile", TC) {}
+  Compiler(const ToolChain &TC) : Tool("moviCompile", "movicompile", TC) {}
 
   bool hasIntegratedCPP() const override { return true; }
 
@@ -747,9 +781,9 @@ public:
                     const char *LinkingOutput) const override;
 };
 
-class LLVM_LIBRARY_VISIBILITY Assemble : public Tool {
+class LLVM_LIBRARY_VISIBILITY Assembler : public Tool {
 public:
-  Assemble(const ToolChain &TC) : Tool("moviAsm", "moviAsm", TC) {}
+  Assembler(const ToolChain &TC) : Tool("moviAsm", "moviAsm", TC) {}
 
   bool hasIntegratedCPP() const override { return false; } // not sure.
 
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index 2357abd..ed01d68 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -126,7 +126,8 @@ bool ContinuationIndenter::canBreak(const LineState &State) {
   // Don't break after very short return types (e.g. "void") as that is often
   // unexpected.
   if (Current.is(TT_FunctionDeclarationName) &&
-      !Style.AlwaysBreakAfterDefinitionReturnType && State.Column < 6)
+      Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_None &&
+      State.Column < 6)
     return false;
 
   return !State.Stack.back().NoLineBreak;
@@ -815,7 +816,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
     //       ParameterToInnerFunction));
     if (*I > prec::Unknown)
       NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column);
-    if (*I != prec::Conditional)
+    if (*I != prec::Conditional && !Current.is(TT_UnaryOperator))
       NewParenState.StartOfFunctionCall = State.Column;
 
     // Always indent conditional expressions. Never indent expression where
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 0feeaa0..2bbe4c6 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -99,6 +99,18 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
   }
 };
 
+template <> struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> {
+  static void enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) {
+    IO.enumCase(Value, "None", FormatStyle::DRTBS_None);
+    IO.enumCase(Value, "All", FormatStyle::DRTBS_All);
+    IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel);
+
+    // For backward compatibility.
+    IO.enumCase(Value, "false", FormatStyle::DRTBS_None);
+    IO.enumCase(Value, "true", FormatStyle::DRTBS_All);
+  }
+};
+
 template <>
 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
   static void enumeration(IO &IO,
@@ -169,55 +181,70 @@ template <> struct MappingTraits<FormatStyle> {
       }
     }
 
+    // For backward compatibility.
+    if (!IO.outputting()) {
+      IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
+      IO.mapOptional("IndentFunctionDeclarationAfterType",
+                     Style.IndentWrappedFunctionNames);
+      IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
+      IO.mapOptional("SpaceAfterControlStatementKeyword",
+                     Style.SpaceBeforeParens);
+    }
+
     IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
     IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
+    IO.mapOptional("AlignConsecutiveAssignments",
+                   Style.AlignConsecutiveAssignments);
     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
     IO.mapOptional("AlignOperands", Style.AlignOperands);
     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
-    IO.mapOptional("AlignConsecutiveAssignments",
-                   Style.AlignConsecutiveAssignments);
     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
                    Style.AllowAllParametersOfDeclarationOnNextLine);
     IO.mapOptional("AllowShortBlocksOnASingleLine",
                    Style.AllowShortBlocksOnASingleLine);
     IO.mapOptional("AllowShortCaseLabelsOnASingleLine",
                    Style.AllowShortCaseLabelsOnASingleLine);
+    IO.mapOptional("AllowShortFunctionsOnASingleLine",
+                   Style.AllowShortFunctionsOnASingleLine);
     IO.mapOptional("AllowShortIfStatementsOnASingleLine",
                    Style.AllowShortIfStatementsOnASingleLine);
     IO.mapOptional("AllowShortLoopsOnASingleLine",
                    Style.AllowShortLoopsOnASingleLine);
-    IO.mapOptional("AllowShortFunctionsOnASingleLine",
-                   Style.AllowShortFunctionsOnASingleLine);
     IO.mapOptional("AlwaysBreakAfterDefinitionReturnType",
                    Style.AlwaysBreakAfterDefinitionReturnType);
-    IO.mapOptional("AlwaysBreakTemplateDeclarations",
-                   Style.AlwaysBreakTemplateDeclarations);
     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
                    Style.AlwaysBreakBeforeMultilineStrings);
+    IO.mapOptional("AlwaysBreakTemplateDeclarations",
+                   Style.AlwaysBreakTemplateDeclarations);
+    IO.mapOptional("BinPackArguments", Style.BinPackArguments);
+    IO.mapOptional("BinPackParameters", Style.BinPackParameters);
     IO.mapOptional("BreakBeforeBinaryOperators",
                    Style.BreakBeforeBinaryOperators);
+    IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
     IO.mapOptional("BreakBeforeTernaryOperators",
                    Style.BreakBeforeTernaryOperators);
     IO.mapOptional("BreakConstructorInitializersBeforeComma",
                    Style.BreakConstructorInitializersBeforeComma);
-    IO.mapOptional("BinPackParameters", Style.BinPackParameters);
-    IO.mapOptional("BinPackArguments", Style.BinPackArguments);
     IO.mapOptional("ColumnLimit", Style.ColumnLimit);
+    IO.mapOptional("CommentPragmas", Style.CommentPragmas);
     IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
                    Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
     IO.mapOptional("ConstructorInitializerIndentWidth",
                    Style.ConstructorInitializerIndentWidth);
+    IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
+    IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
     IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment);
+    IO.mapOptional("DisableFormat", Style.DisableFormat);
     IO.mapOptional("ExperimentalAutoDetectBinPacking",
                    Style.ExperimentalAutoDetectBinPacking);
+    IO.mapOptional("ForEachMacros", Style.ForEachMacros);
     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
+    IO.mapOptional("IndentWidth", Style.IndentWidth);
     IO.mapOptional("IndentWrappedFunctionNames",
                    Style.IndentWrappedFunctionNames);
-    IO.mapOptional("IndentFunctionDeclarationAfterType",
-                   Style.IndentWrappedFunctionNames);
-    IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
     IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
                    Style.KeepEmptyLinesAtTheStartOfBlocks);
+    IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
     IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
     IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
@@ -226,45 +253,30 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
                    Style.PenaltyBreakBeforeFirstCallParameter);
     IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
-    IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
     IO.mapOptional("PenaltyBreakFirstLessLess",
                    Style.PenaltyBreakFirstLessLess);
+    IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
     IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
                    Style.PenaltyReturnTypeOnItsOwnLine);
     IO.mapOptional("PointerAlignment", Style.PointerAlignment);
+    IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
+    IO.mapOptional("SpaceBeforeAssignmentOperators",
+                   Style.SpaceBeforeAssignmentOperators);
+    IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
+    IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
     IO.mapOptional("SpacesBeforeTrailingComments",
                    Style.SpacesBeforeTrailingComments);
-    IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
-    IO.mapOptional("Standard", Style.Standard);
-    IO.mapOptional("IndentWidth", Style.IndentWidth);
-    IO.mapOptional("TabWidth", Style.TabWidth);
-    IO.mapOptional("UseTab", Style.UseTab);
-    IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
-    IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
-    IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets);
     IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
-    IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
-    IO.mapOptional("SpacesInCStyleCastParentheses",
-                   Style.SpacesInCStyleCastParentheses);
-    IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
     IO.mapOptional("SpacesInContainerLiterals",
                    Style.SpacesInContainerLiterals);
-    IO.mapOptional("SpaceBeforeAssignmentOperators",
-                   Style.SpaceBeforeAssignmentOperators);
-    IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
-    IO.mapOptional("CommentPragmas", Style.CommentPragmas);
-    IO.mapOptional("ForEachMacros", Style.ForEachMacros);
-
-    // For backward compatibility.
-    if (!IO.outputting()) {
-      IO.mapOptional("SpaceAfterControlStatementKeyword",
-                     Style.SpaceBeforeParens);
-      IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
-      IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
-    }
-    IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
-    IO.mapOptional("DisableFormat", Style.DisableFormat);
+    IO.mapOptional("SpacesInCStyleCastParentheses",
+                   Style.SpacesInCStyleCastParentheses);
+    IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
+    IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets);
+    IO.mapOptional("Standard", Style.Standard);
+    IO.mapOptional("TabWidth", Style.TabWidth);
+    IO.mapOptional("UseTab", Style.UseTab);
   }
 };
 
@@ -338,7 +350,7 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.AllowShortCaseLabelsOnASingleLine = false;
   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
   LLVMStyle.AllowShortLoopsOnASingleLine = false;
-  LLVMStyle.AlwaysBreakAfterDefinitionReturnType = false;
+  LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None;
   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
   LLVMStyle.BinPackParameters = true;
@@ -462,15 +474,19 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
 FormatStyle getMozillaStyle() {
   FormatStyle MozillaStyle = getLLVMStyle();
   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
+  MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
+  MozillaStyle.AlwaysBreakAfterDefinitionReturnType =
+      FormatStyle::DRTBS_TopLevel;
+  MozillaStyle.AlwaysBreakTemplateDeclarations = true;
+  MozillaStyle.BreakConstructorInitializersBeforeComma = true;
+  MozillaStyle.ConstructorInitializerIndentWidth = 2;
+  MozillaStyle.ContinuationIndentWidth = 2;
   MozillaStyle.Cpp11BracedListStyle = false;
-  MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
-  MozillaStyle.DerivePointerAlignment = true;
   MozillaStyle.IndentCaseLabels = true;
   MozillaStyle.ObjCSpaceAfterProperty = true;
   MozillaStyle.ObjCSpaceBeforeProtocolList = false;
   MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
   MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
-  MozillaStyle.Standard = FormatStyle::LS_Cpp03;
   return MozillaStyle;
 }
 
@@ -496,7 +512,7 @@ FormatStyle getWebKitStyle() {
 
 FormatStyle getGNUStyle() {
   FormatStyle Style = getLLVMStyle();
-  Style.AlwaysBreakAfterDefinitionReturnType = true;
+  Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
   Style.BreakBeforeTernaryOperators = true;
@@ -738,35 +754,62 @@ private:
   bool tryMergeJSRegexLiteral() {
     if (Tokens.size() < 2)
       return false;
+
+    // If this is a string literal with a slash inside, compute the slash's
+    // offset and try to find the beginning of the regex literal.
+    // Also look at tok::unknown, as it can be an unterminated char literal.
+    size_t SlashInStringPos = StringRef::npos;
+    if (Tokens.back()->isOneOf(tok::string_literal, tok::char_constant,
+                               tok::unknown)) {
+      // Start search from position 1 as otherwise, this is an unknown token
+      // for an unterminated /*-comment which is handled elsewhere.
+      SlashInStringPos = Tokens.back()->TokenText.find('/', 1);
+      if (SlashInStringPos == StringRef::npos)
+        return false;
+    }
+
     // If a regex literal ends in "\//", this gets represented by an unknown
     // token "\" and a comment.
     bool MightEndWithEscapedSlash =
         Tokens.back()->is(tok::comment) &&
         Tokens.back()->TokenText.startswith("//") &&
         Tokens[Tokens.size() - 2]->TokenText == "\\";
-    if (!MightEndWithEscapedSlash &&
+    if (!MightEndWithEscapedSlash && SlashInStringPos == StringRef::npos &&
         (Tokens.back()->isNot(tok::slash) ||
          (Tokens[Tokens.size() - 2]->is(tok::unknown) &&
           Tokens[Tokens.size() - 2]->TokenText == "\\")))
       return false;
+
     unsigned TokenCount = 0;
-    unsigned LastColumn = Tokens.back()->OriginalColumn;
     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
       ++TokenCount;
-      if (I[0]->isOneOf(tok::slash, tok::slashequal) && I + 1 != E &&
-          (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace,
-                         tok::exclaim, tok::l_square, tok::colon, tok::comma,
-                         tok::question, tok::kw_return) ||
-           I[1]->isBinaryOperator())) {
+      auto Prev = I + 1;
+      while (Prev != E && Prev[0]->is(tok::comment))
+        ++Prev;
+      if (I[0]->isOneOf(tok::slash, tok::slashequal) &&
+          (Prev == E ||
+           ((Prev[0]->isOneOf(tok::l_paren, tok::semi, tok::l_brace,
+                              tok::r_brace, tok::exclaim, tok::l_square,
+                              tok::colon, tok::comma, tok::question,
+                              tok::kw_return) ||
+             Prev[0]->isBinaryOperator())))) {
+        unsigned LastColumn = Tokens.back()->OriginalColumn;
+        SourceLocation Loc = Tokens.back()->Tok.getLocation();
         if (MightEndWithEscapedSlash) {
           // This regex literal ends in '\//'. Skip past the '//' of the last
           // token and re-start lexing from there.
-          SourceLocation Loc = Tokens.back()->Tok.getLocation();
           resetLexer(SourceMgr.getFileOffset(Loc) + 2);
+        } else if (SlashInStringPos != StringRef::npos) {
+          // This regex literal ends in a string_literal with a slash inside.
+          // Calculate end column and reset lexer appropriately.
+          resetLexer(SourceMgr.getFileOffset(Loc) + SlashInStringPos + 1);
+          LastColumn += SlashInStringPos;
         }
         Tokens.resize(Tokens.size() - TokenCount);
         Tokens.back()->Tok.setKind(tok::unknown);
         Tokens.back()->Type = TT_RegexLiteral;
+        // Treat regex literals like other string_literals.
+        Tokens.back()->Tok.setKind(tok::string_literal);
         Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn;
         return true;
       }
@@ -801,7 +844,7 @@ private:
         EndBacktick->OriginalColumn + EndBacktick->ColumnWidth;
     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) {
       ++TokenCount;
-      if (I[0]->NewlinesBefore > 0 || I[0]->IsMultiline)
+      if (I[0]->IsMultiline)
         IsMultiline = true;
 
       // If there was a preceding template string, this must be the start of a
@@ -817,9 +860,10 @@ private:
           EndColumnInFirstLine = I[0]->OriginalColumn + I[0]->ColumnWidth;
         // If the token has newlines, the token before it (if it exists) is the
         // rhs end of the previous line.
-        if (I[0]->NewlinesBefore > 0 && (I + 1 != E))
+        if (I[0]->NewlinesBefore > 0 && (I + 1 != E)) {
           EndColumnInFirstLine = I[1]->OriginalColumn + I[1]->ColumnWidth;
-
+          IsMultiline = true;
+        }
         continue;
       }
 
@@ -852,7 +896,8 @@ private:
         //     until here`;
         Tokens.back()->ColumnWidth =
             EndColumnInFirstLine - Tokens.back()->OriginalColumn;
-        Tokens.back()->LastLineColumnWidth = EndOriginalColumn;
+        // +1 for the ` itself.
+        Tokens.back()->LastLineColumnWidth = EndOriginalColumn + 1;
         Tokens.back()->IsMultiline = true;
       } else {
         // Token simply spans from start to end, +1 for the ` itself.
@@ -1044,7 +1089,6 @@ private:
           break;
         default:
           FormatTok->Type = TT_ImplicitStringLiteral;
-          ++Column;
           break;
         }
       }
@@ -1189,6 +1233,7 @@ private:
                         getFormattingLangOpts(Style), Buffer.begin(),
                         Buffer.begin() + Offset, Buffer.end()));
     Lex->SetKeepWhitespaceMode(true);
+    TrailingWhitespace = 0;
   }
 };
 
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index 8d08c3d..0e1f14a 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -1008,7 +1008,7 @@ private:
     if (Tok.isNot(tok::identifier) || !Tok.Previous)
       return false;
 
-    if (Tok.Previous->is(TT_LeadingJavaAnnotation))
+    if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof))
       return false;
 
     // Skip "const" as it does not have an influence on whether this is a name.
@@ -1123,7 +1123,7 @@ private:
       return TT_UnaryOperator;
 
     const FormatToken *NextToken = Tok.getNextNonComment();
-    if (!NextToken ||
+    if (!NextToken || NextToken->is(tok::arrow) ||
         (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
       return TT_Unknown;
 
@@ -1336,6 +1336,10 @@ private:
         return 0;
       if (Current->is(TT_RangeBasedForLoopColon))
         return prec::Comma;
+      if ((Style.Language == FormatStyle::LK_Java ||
+           Style.Language == FormatStyle::LK_JavaScript) &&
+          Current->is(Keywords.kw_instanceof))
+        return prec::Relational;
       if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
         return Current->getPrecedence();
       if (Current->isOneOf(tok::period, tok::arrow))
@@ -1539,8 +1543,11 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
     Current->MustBreakBefore =
         Current->MustBreakBefore || mustBreakBefore(Line, *Current);
 
-    if (Style.AlwaysBreakAfterDefinitionReturnType && InFunctionDecl &&
-        Current->is(TT_FunctionDeclarationName) &&
+    if ((Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All ||
+         (Style.AlwaysBreakAfterDefinitionReturnType ==
+              FormatStyle::DRTBS_TopLevel &&
+          Line.Level == 0)) &&
+        InFunctionDecl && Current->is(TT_FunctionDeclarationName) &&
         !Line.Last->isOneOf(tok::semi, tok::comment)) // Only for definitions.
       // FIXME: Line.Last points to other characters than tok::semi
       // and tok::lbrace.
@@ -1876,7 +1883,12 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
                                          const FormatToken &Right) {
   const FormatToken &Left = *Right.Previous;
-  if (Style.Language == FormatStyle::LK_Proto) {
+  if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
+    return true; // Never ever merge two identifiers.
+  if (Style.Language == FormatStyle::LK_Cpp) {
+    if (Left.is(tok::kw_operator))
+      return Right.is(tok::coloncolon);
+  } else if (Style.Language == FormatStyle::LK_Proto) {
     if (Right.is(tok::period) &&
         Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
                      Keywords.kw_repeated))
@@ -1913,8 +1925,6 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
         Right.is(TT_TemplateOpener))
       return true;
   }
-  if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
-    return true; // Never ever merge two identifiers.
   if (Left.is(TT_ImplicitStringLiteral))
     return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
   if (Line.Type == LT_ObjCMethodDecl) {
@@ -1937,8 +1947,6 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     return false;
   if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen))
     return true;
-  if (Left.is(tok::kw_operator))
-    return Right.is(tok::coloncolon);
   if (Right.is(TT_OverloadedOperatorLParen))
     return false;
   if (Right.is(tok::colon)) {
@@ -2128,6 +2136,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
   } else if (Style.Language == FormatStyle::LK_JavaScript) {
     if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
       return false;
+    if (Left.is(TT_JsTypeColon))
+      return true;
   }
 
   if (Left.is(tok::at))
@@ -2234,7 +2244,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
     return true;
   if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
     return true;
-  if (Left.isBinaryOperator() && !Left.isOneOf(tok::arrowstar, tok::lessless) &&
+  if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
+      !Left.isOneOf(tok::arrowstar, tok::lessless) &&
       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
       (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
        Left.getPrecedence() == prec::Assignment))
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index ea1ca39..c58e6bc 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -656,6 +656,16 @@ void UnwrappedLineParser::parseStructuralElement() {
       nextToken();
       addUnwrappedLine();
       return;
+    case tok::objc_autoreleasepool:
+      nextToken();
+      if (FormatTok->Tok.is(tok::l_brace)) {
+        if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
+            Style.BreakBeforeBraces == FormatStyle::BS_GNU)
+          addUnwrappedLine();
+        parseBlock(/*MustBeDeclaration=*/false);
+      }
+      addUnwrappedLine();
+      return;
     case tok::objc_try:
       // This branch isn't strictly necessary (the kw_try case below would
       // do this too after the tok::at is parsed above).  But be explicit.
@@ -1101,16 +1111,15 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
       FormatTok->BlockKind = BK_BracedInit;
       parseBracedList();
       break;
-    case tok::r_paren:
+    case tok::l_paren:
+      parseParens();
       // JavaScript can just have free standing methods and getters/setters in
       // object literals. Detect them by a "{" following ")".
       if (Style.Language == FormatStyle::LK_JavaScript) {
-        nextToken();
         if (FormatTok->is(tok::l_brace))
           parseChildBlock();
         break;
       }
-      nextToken();
       break;
     case tok::r_brace:
       nextToken();
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index 8546763..dd664ca 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -1521,6 +1521,8 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
       Args.hasArg(OPT_fmodules_decluse) || Opts.ModulesStrictDeclUse;
   Opts.ModulesLocalVisibility =
       Args.hasArg(OPT_fmodules_local_submodule_visibility);
+  Opts.ModulesHideInternalLinkage =
+      !Args.hasArg(OPT_fno_modules_hide_internal_linkage);
   Opts.ModulesSearchAll = Opts.Modules &&
     !Args.hasArg(OPT_fno_modules_search_all) &&
     Args.hasArg(OPT_fmodules_search_all);
@@ -1586,6 +1588,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.ImplementationOfModule =
       Args.getLastArgValue(OPT_fmodule_implementation_of);
   Opts.ModuleFeatures = Args.getAllArgValues(OPT_fmodule_feature);
+  std::sort(Opts.ModuleFeatures.begin(), Opts.ModuleFeatures.end());
   Opts.NativeHalfType |= Args.hasArg(OPT_fnative_half_type);
   Opts.HalfArgsAndReturns = Args.hasArg(OPT_fallow_half_arguments_and_returns);
   Opts.GNUAsm = !Args.hasArg(OPT_fno_gnu_inline_asm);
@@ -1847,37 +1850,37 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
   std::unique_ptr<OptTable> Opts(createDriverOptTable());
   const unsigned IncludedFlagsBitmask = options::CC1Option;
   unsigned MissingArgIndex, MissingArgCount;
-  std::unique_ptr<InputArgList> Args(
-      Opts->ParseArgs(ArgBegin, ArgEnd, MissingArgIndex, MissingArgCount,
-                      IncludedFlagsBitmask));
+  InputArgList Args =
+      Opts->ParseArgs(llvm::makeArrayRef(ArgBegin, ArgEnd), MissingArgIndex,
+                      MissingArgCount, IncludedFlagsBitmask);
 
   // Check for missing argument error.
   if (MissingArgCount) {
     Diags.Report(diag::err_drv_missing_argument)
-      << Args->getArgString(MissingArgIndex) << MissingArgCount;
+        << Args.getArgString(MissingArgIndex) << MissingArgCount;
     Success = false;
   }
 
   // Issue errors on unknown arguments.
-  for (const Arg *A : Args->filtered(OPT_UNKNOWN)) {
-    Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(*Args);
+  for (const Arg *A : Args.filtered(OPT_UNKNOWN)) {
+    Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args);
     Success = false;
   }
 
-  Success &= ParseAnalyzerArgs(*Res.getAnalyzerOpts(), *Args, Diags);
-  Success &= ParseMigratorArgs(Res.getMigratorOpts(), *Args);
-  ParseDependencyOutputArgs(Res.getDependencyOutputOpts(), *Args);
-  Success &= ParseDiagnosticArgs(Res.getDiagnosticOpts(), *Args, &Diags);
-  ParseCommentArgs(Res.getLangOpts()->CommentOpts, *Args);
-  ParseFileSystemArgs(Res.getFileSystemOpts(), *Args);
+  Success &= ParseAnalyzerArgs(*Res.getAnalyzerOpts(), Args, Diags);
+  Success &= ParseMigratorArgs(Res.getMigratorOpts(), Args);
+  ParseDependencyOutputArgs(Res.getDependencyOutputOpts(), Args);
+  Success &= ParseDiagnosticArgs(Res.getDiagnosticOpts(), Args, &Diags);
+  ParseCommentArgs(Res.getLangOpts()->CommentOpts, Args);
+  ParseFileSystemArgs(Res.getFileSystemOpts(), Args);
   // FIXME: We shouldn't have to pass the DashX option around here
-  InputKind DashX = ParseFrontendArgs(Res.getFrontendOpts(), *Args, Diags);
-  ParseTargetArgs(Res.getTargetOpts(), *Args);
-  Success &= ParseCodeGenArgs(Res.getCodeGenOpts(), *Args, DashX, Diags,
+  InputKind DashX = ParseFrontendArgs(Res.getFrontendOpts(), Args, Diags);
+  ParseTargetArgs(Res.getTargetOpts(), Args);
+  Success &= ParseCodeGenArgs(Res.getCodeGenOpts(), Args, DashX, Diags,
                               Res.getTargetOpts());
-  ParseHeaderSearchArgs(Res.getHeaderSearchOpts(), *Args);
+  ParseHeaderSearchArgs(Res.getHeaderSearchOpts(), Args);
   if (DashX != IK_AST && DashX != IK_LLVM_IR) {
-    ParseLangArgs(*Res.getLangOpts(), *Args, DashX, Diags);
+    ParseLangArgs(*Res.getLangOpts(), Args, DashX, Diags);
     if (Res.getFrontendOpts().ProgramAction == frontend::RewriteObjC)
       Res.getLangOpts()->ObjCExceptions = 1;
   }
@@ -1886,8 +1889,8 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
   // ParsePreprocessorArgs and remove the FileManager 
   // parameters from the function and the "FileManager.h" #include.
   FileManager FileMgr(Res.getFileSystemOpts());
-  ParsePreprocessorArgs(Res.getPreprocessorOpts(), *Args, FileMgr, Diags);
-  ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), *Args,
+  ParsePreprocessorArgs(Res.getPreprocessorOpts(), Args, FileMgr, Diags);
+  ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), Args,
                               Res.getFrontendOpts().ProgramAction);
   return Success;
 }
@@ -1965,6 +1968,9 @@ std::string CompilerInvocation::getModuleHash() const {
 #define BENIGN_LANGOPT(Name, Bits, Default, Description)
 #define BENIGN_ENUM_LANGOPT(Name, Type, Bits, Default, Description)
 #include "clang/Basic/LangOptions.def"
+
+  for (StringRef Feature : LangOpts->ModuleFeatures)
+    code = hash_combine(code, Feature);
   
   // Extend the signature with the target options.
   code = hash_combine(code, TargetOpts->Triple, TargetOpts->CPU,
diff --git a/lib/Frontend/FrontendActions.cpp b/lib/Frontend/FrontendActions.cpp
index 6f202a1..4997764 100644
--- a/lib/Frontend/FrontendActions.cpp
+++ b/lib/Frontend/FrontendActions.cpp
@@ -470,6 +470,13 @@ namespace {
 #define BENIGN_LANGOPT(Name, Bits, Default, Description)
 #define BENIGN_ENUM_LANGOPT(Name, Type, Bits, Default, Description)
 #include "clang/Basic/LangOptions.def"
+
+      if (!LangOpts.ModuleFeatures.empty()) {
+        Out.indent(4) << "Module features:\n";
+        for (StringRef Feature : LangOpts.ModuleFeatures)
+          Out.indent(6) << Feature << "\n";
+      }
+
       return false;
     }
 
diff --git a/lib/Frontend/InitHeaderSearch.cpp b/lib/Frontend/InitHeaderSearch.cpp
index bf8470e..e3a17c9 100644
--- a/lib/Frontend/InitHeaderSearch.cpp
+++ b/lib/Frontend/InitHeaderSearch.cpp
@@ -82,11 +82,6 @@ public:
                                      StringRef Arch,
                                      StringRef Version);
 
-  /// AddMinGW64CXXPaths - Add the necessary paths to support
-  /// libstdc++ of x86_64-w64-mingw32 aka mingw-w64.
-  void AddMinGW64CXXPaths(StringRef Base,
-                          StringRef Version);
-
   // AddDefaultCIncludePaths - Add paths that should always be searched.
   void AddDefaultCIncludePaths(const llvm::Triple &triple,
                                const HeaderSearchOptions &HSOpts);
@@ -208,19 +203,6 @@ void InitHeaderSearch::AddMinGWCPlusPlusIncludePaths(StringRef Base,
           CXXSystem, false);
 }
 
-void InitHeaderSearch::AddMinGW64CXXPaths(StringRef Base,
-                                          StringRef Version) {
-  // Assumes Base is HeaderSearchOpts' ResourceDir
-  AddPath(Base + "/../../../include/c++/" + Version,
-          CXXSystem, false);
-  AddPath(Base + "/../../../include/c++/" + Version + "/x86_64-w64-mingw32",
-          CXXSystem, false);
-  AddPath(Base + "/../../../include/c++/" + Version + "/i686-w64-mingw32",
-          CXXSystem, false);
-  AddPath(Base + "/../../../include/c++/" + Version + "/backward",
-          CXXSystem, false);
-}
-
 void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,
                                             const HeaderSearchOptions &HSOpts) {
   llvm::Triple::OSType os = triple.getOS();
@@ -234,6 +216,9 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,
     case llvm::Triple::Bitrig:
     case llvm::Triple::NaCl:
       break;
+    case llvm::Triple::Win32:
+      if (triple.getEnvironment() != llvm::Triple::Cygnus)
+        break;
     default:
       // FIXME: temporary hack: hard-coded paths.
       AddPath("/usr/local/include", System, false);
@@ -323,26 +308,6 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,
       AddPath("/usr/include/w32api", System, false);
       break;
     case llvm::Triple::GNU:
-      // mingw-w64 crt include paths
-      // <sysroot>/i686-w64-mingw32/include
-      SmallString<128> P = StringRef(HSOpts.ResourceDir);
-      llvm::sys::path::append(P, "../../../i686-w64-mingw32/include");
-      AddPath(P, System, false);
-
-      // <sysroot>/x86_64-w64-mingw32/include
-      P.resize(HSOpts.ResourceDir.size());
-      llvm::sys::path::append(P, "../../../x86_64-w64-mingw32/include");
-      AddPath(P, System, false);
-
-      // mingw.org crt include paths
-      // <sysroot>/include
-      P.resize(HSOpts.ResourceDir.size());
-      llvm::sys::path::append(P, "../../../include");
-      AddPath(P, System, false);
-      AddPath("/mingw/include", System, false);
-#if defined(LLVM_ON_WIN32)
-      AddPath("c:/mingw/include", System, false); 
-#endif
       break;
     }
     break;
@@ -419,27 +384,8 @@ AddDefaultCPlusPlusIncludePaths(const llvm::Triple &triple, const HeaderSearchOp
       // g++-4 / Cygwin-1.5
       AddMinGWCPlusPlusIncludePaths("/usr/lib/gcc", "i686-pc-cygwin", "4.3.2");
       break;
-    case llvm::Triple::GNU:
-      // mingw-w64 C++ include paths (i686-w64-mingw32 and x86_64-w64-mingw32)
-      AddMinGW64CXXPaths(HSOpts.ResourceDir, "4.7.0");
-      AddMinGW64CXXPaths(HSOpts.ResourceDir, "4.7.1");
-      AddMinGW64CXXPaths(HSOpts.ResourceDir, "4.7.2");
-      AddMinGW64CXXPaths(HSOpts.ResourceDir, "4.7.3");
-      AddMinGW64CXXPaths(HSOpts.ResourceDir, "4.8.0");
-      AddMinGW64CXXPaths(HSOpts.ResourceDir, "4.8.1");
-      AddMinGW64CXXPaths(HSOpts.ResourceDir, "4.8.2");
-      // mingw.org C++ include paths
-#if defined(LLVM_ON_WIN32)
-      AddMinGWCPlusPlusIncludePaths("c:/MinGW/lib/gcc", "mingw32", "4.7.0");
-      AddMinGWCPlusPlusIncludePaths("c:/MinGW/lib/gcc", "mingw32", "4.7.1");
-      AddMinGWCPlusPlusIncludePaths("c:/MinGW/lib/gcc", "mingw32", "4.7.2");
-      AddMinGWCPlusPlusIncludePaths("c:/MinGW/lib/gcc", "mingw32", "4.7.3");
-      AddMinGWCPlusPlusIncludePaths("c:/MinGW/lib/gcc", "mingw32", "4.8.0");
-      AddMinGWCPlusPlusIncludePaths("c:/MinGW/lib/gcc", "mingw32", "4.8.1");
-      AddMinGWCPlusPlusIncludePaths("c:/MinGW/lib/gcc", "mingw32", "4.8.2");
-#endif
-      break;
     }
+    break;
   case llvm::Triple::DragonFly:
     if (llvm::sys::fs::exists("/usr/lib/gcc47"))
       AddPath("/usr/include/c++/4.7", CXXSystem, false);
@@ -482,8 +428,7 @@ void InitHeaderSearch::AddDefaultIncludePaths(const LangOptions &Lang,
     return;
 
   case llvm::Triple::Win32:
-    if (triple.getEnvironment() == llvm::Triple::MSVC ||
-        triple.getEnvironment() == llvm::Triple::Itanium ||
+    if (triple.getEnvironment() != llvm::Triple::Cygnus ||
         triple.isOSBinFormatMachO())
       return;
     break;
diff --git a/lib/Frontend/InitPreprocessor.cpp b/lib/Frontend/InitPreprocessor.cpp
index 2417146..025c8b9 100644
--- a/lib/Frontend/InitPreprocessor.cpp
+++ b/lib/Frontend/InitPreprocessor.cpp
@@ -868,6 +868,14 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
                         "__attribute__((objc_ownership(none)))");
   }
 
+  // On Darwin, there are __double_underscored variants of the type
+  // nullability qualifiers.
+  if (TI.getTriple().isOSDarwin()) {
+    Builder.defineMacro("__nonnull", "_Nonnull");
+    Builder.defineMacro("__null_unspecified", "_Null_unspecified");
+    Builder.defineMacro("__nullable", "_Nullable");
+  }
+
   // OpenMP definition
   if (LangOpts.OpenMP) {
     // OpenMP 2.2:
diff --git a/lib/Frontend/MultiplexConsumer.cpp b/lib/Frontend/MultiplexConsumer.cpp
index a53f4d2..91ee100 100644
--- a/lib/Frontend/MultiplexConsumer.cpp
+++ b/lib/Frontend/MultiplexConsumer.cpp
@@ -128,6 +128,8 @@ public:
   void DeclarationMarkedUsed(const Decl *D) override;
   void DeclarationMarkedOpenMPThreadPrivate(const Decl *D) override;
   void RedefinedHiddenDefinition(const NamedDecl *D, Module *M) override;
+  void AddedAttributeToRecord(const Attr *Attr, 
+                              const RecordDecl *Record) override;
 
 private:
   std::vector<ASTMutationListener*> Listeners;
@@ -226,6 +228,13 @@ void MultiplexASTMutationListener::RedefinedHiddenDefinition(const NamedDecl *D,
   for (auto *L : Listeners)
     L->RedefinedHiddenDefinition(D, M);
 }
+  
+void MultiplexASTMutationListener::AddedAttributeToRecord(
+                                                    const Attr *Attr, 
+                                                    const RecordDecl *Record) {
+  for (auto *L : Listeners)
+    L->AddedAttributeToRecord(Attr, Record);
+}
 
 }  // end namespace clang
 
diff --git a/lib/Frontend/Rewrite/InclusionRewriter.cpp b/lib/Frontend/Rewrite/InclusionRewriter.cpp
index b9ea051..08d6cf1 100644
--- a/lib/Frontend/Rewrite/InclusionRewriter.cpp
+++ b/lib/Frontend/Rewrite/InclusionRewriter.cpp
@@ -29,13 +29,11 @@ namespace {
 class InclusionRewriter : public PPCallbacks {
   /// Information about which #includes were actually performed,
   /// created by preprocessor callbacks.
-  struct FileChange {
-    const Module *Mod;
-    SourceLocation From;
+  struct IncludedFile {
     FileID Id;
     SrcMgr::CharacteristicKind FileType;
-    FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
-    }
+    IncludedFile(FileID Id, SrcMgr::CharacteristicKind FileType)
+        : Id(Id), FileType(FileType) {}
   };
   Preprocessor &PP; ///< Used to find inclusion directives.
   SourceManager &SM; ///< Used to read and manage source files.
@@ -44,11 +42,13 @@ class InclusionRewriter : public PPCallbacks {
   const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines.
   bool ShowLineMarkers; ///< Show #line markers.
   bool UseLineDirectives; ///< Use of line directives or line markers.
-  typedef std::map<unsigned, FileChange> FileChangeMap;
-  FileChangeMap FileChanges; ///< Tracks which files were included where.
-  /// Used transitively for building up the FileChanges mapping over the
+  /// Tracks where inclusions that change the file are found.
+  std::map<unsigned, IncludedFile> FileIncludes;
+  /// Tracks where inclusions that import modules are found.
+  std::map<unsigned, const Module *> ModuleIncludes;
+  /// Used transitively for building up the FileIncludes mapping over the
   /// various \c PPCallbacks callbacks.
-  FileChangeMap::iterator LastInsertedFileChange;
+  SourceLocation LastInclusionLocation;
 public:
   InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers,
                     bool UseLineDirectives);
@@ -82,7 +82,8 @@ private:
   bool HandleHasInclude(FileID FileId, Lexer &RawLex,
                         const DirectoryLookup *Lookup, Token &Tok,
                         bool &FileExists);
-  const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
+  const IncludedFile *FindIncludeAtLocation(SourceLocation Loc) const;
+  const Module *FindModuleAtLocation(SourceLocation Loc) const;
   StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
 };
 
@@ -95,7 +96,7 @@ InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
     : PP(PP), SM(PP.getSourceManager()), OS(OS), MainEOL("\n"),
       PredefinesBuffer(nullptr), ShowLineMarkers(ShowLineMarkers),
       UseLineDirectives(UseLineDirectives),
-      LastInsertedFileChange(FileChanges.end()) {}
+      LastInclusionLocation(SourceLocation()) {}
 
 /// Write appropriate line information as either #line directives or GNU line
 /// markers depending on what mode we're in, including the \p Filename and
@@ -143,12 +144,15 @@ void InclusionRewriter::FileChanged(SourceLocation Loc,
                                     FileID) {
   if (Reason != EnterFile)
     return;
-  if (LastInsertedFileChange == FileChanges.end())
+  if (LastInclusionLocation.isInvalid())
     // we didn't reach this file (eg: the main file) via an inclusion directive
     return;
-  LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
-  LastInsertedFileChange->second.FileType = NewFileType;
-  LastInsertedFileChange = FileChanges.end();
+  FileID Id = FullSourceLoc(Loc, SM).getFileID();
+  auto P = FileIncludes.insert(std::make_pair(
+      LastInclusionLocation.getRawEncoding(), IncludedFile(Id, NewFileType)));
+  (void)P;
+  assert(P.second && "Unexpected revisitation of the same include directive");
+  LastInclusionLocation = SourceLocation();
 }
 
 /// Called whenever an inclusion is skipped due to canonical header protection
@@ -156,10 +160,9 @@ void InclusionRewriter::FileChanged(SourceLocation Loc,
 void InclusionRewriter::FileSkipped(const FileEntry &/*SkippedFile*/,
                                     const Token &/*FilenameTok*/,
                                     SrcMgr::CharacteristicKind /*FileType*/) {
-  assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
-    "found via an inclusion directive, was skipped");
-  FileChanges.erase(LastInsertedFileChange);
-  LastInsertedFileChange = FileChanges.end();
+  assert(!LastInclusionLocation.isInvalid() &&
+         "A file, that wasn't found via an inclusion directive, was skipped");
+  LastInclusionLocation = SourceLocation();
 }
 
 /// This should be called whenever the preprocessor encounters include
@@ -176,25 +179,38 @@ void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
                                            StringRef /*SearchPath*/,
                                            StringRef /*RelativePath*/,
                                            const Module *Imported) {
-  assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
-    "directive was found before the previous one was processed");
-  std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
-    std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
-  assert(p.second && "Unexpected revisitation of the same include directive");
-  if (!Imported)
-    LastInsertedFileChange = p.first;
+  assert(LastInclusionLocation.isInvalid() &&
+         "Another inclusion directive was found before the previous one "
+         "was processed");
+  if (Imported) {
+    auto P = ModuleIncludes.insert(
+        std::make_pair(HashLoc.getRawEncoding(), Imported));
+    (void)P;
+    assert(P.second && "Unexpected revisitation of the same include directive");
+  } else
+    LastInclusionLocation = HashLoc;
 }
 
 /// Simple lookup for a SourceLocation (specifically one denoting the hash in
 /// an inclusion directive) in the map of inclusion information, FileChanges.
-const InclusionRewriter::FileChange *
-InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
-  FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
-  if (I != FileChanges.end())
+const InclusionRewriter::IncludedFile *
+InclusionRewriter::FindIncludeAtLocation(SourceLocation Loc) const {
+  const auto I = FileIncludes.find(Loc.getRawEncoding());
+  if (I != FileIncludes.end())
     return &I->second;
   return nullptr;
 }
 
+/// Simple lookup for a SourceLocation (specifically one denoting the hash in
+/// an inclusion directive) in the map of module inclusion information.
+const Module *
+InclusionRewriter::FindModuleAtLocation(SourceLocation Loc) const {
+  const auto I = ModuleIncludes.find(Loc.getRawEncoding());
+  if (I != ModuleIncludes.end())
+    return I->second;
+  return nullptr;
+}
+
 /// Detect the likely line ending style of \p FromFile by examining the first
 /// newline found within it.
 static StringRef DetectEOL(const MemoryBuffer &FromFile) {
@@ -388,8 +404,7 @@ bool InclusionRewriter::Process(FileID FileId,
 {
   bool Invalid;
   const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
-  if (Invalid) // invalid inclusion
-    return false;
+  assert(!Invalid && "Attempting to process invalid inclusion");
   const char *FileName = FromFile.getBufferIdentifier();
   Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
   RawLex.SetCommentRetentionState(false);
@@ -433,13 +448,12 @@ bool InclusionRewriter::Process(FileID FileId,
             if (FileId != PP.getPredefinesFileID())
               WriteLineInfo(FileName, Line - 1, FileType, "");
             StringRef LineInfoExtra;
-            if (const FileChange *Change = FindFileChangeLocation(
-                HashToken.getLocation())) {
-              if (Change->Mod) {
-                WriteImplicitModuleImport(Change->Mod);
-
-              // else now include and recursively process the file
-              } else if (Process(Change->Id, Change->FileType)) {
+            SourceLocation Loc = HashToken.getLocation();
+            if (const Module *Mod = FindModuleAtLocation(Loc))
+              WriteImplicitModuleImport(Mod);
+            else if (const IncludedFile *Inc = FindIncludeAtLocation(Loc)) {
+              // include and recursively process the file
+              if (Process(Inc->Id, Inc->FileType)) {
                 // and set lineinfo back to this file, if the nested one was
                 // actually included
                 // `2' indicates returning to a file (after having included
diff --git a/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/lib/Frontend/Rewrite/RewriteModernObjC.cpp
index e13cdb3..2902ba7 100644
--- a/lib/Frontend/Rewrite/RewriteModernObjC.cpp
+++ b/lib/Frontend/Rewrite/RewriteModernObjC.cpp
@@ -1932,9 +1932,9 @@ Stmt *RewriteModernObjC::RewriteObjCSynchronizedStmt(ObjCAtSynchronizedStmt *S)
 void RewriteModernObjC::WarnAboutReturnGotoStmts(Stmt *S)
 {
   // Perform a bottom up traversal of all children.
-  for (Stmt::child_range CI = S->children(); CI; ++CI)
-    if (*CI)
-      WarnAboutReturnGotoStmts(*CI);
+  for (Stmt *SubStmt : S->children())
+    if (SubStmt)
+      WarnAboutReturnGotoStmts(SubStmt);
 
   if (isa<ReturnStmt>(S) || isa<GotoStmt>(S)) {
     Diags.Report(Context->getFullLoc(S->getLocStart()),
@@ -4549,12 +4549,12 @@ void RewriteModernObjC::InsertBlockLiteralsWithinMethod(ObjCMethodDecl *MD) {
 }
 
 void RewriteModernObjC::GetBlockDeclRefExprs(Stmt *S) {
-  for (Stmt::child_range CI = S->children(); CI; ++CI)
-    if (*CI) {
-      if (BlockExpr *CBE = dyn_cast<BlockExpr>(*CI))
+  for (Stmt *SubStmt : S->children())
+    if (SubStmt) {
+      if (BlockExpr *CBE = dyn_cast<BlockExpr>(SubStmt))
         GetBlockDeclRefExprs(CBE->getBody());
       else
-        GetBlockDeclRefExprs(*CI);
+        GetBlockDeclRefExprs(SubStmt);
     }
   // Handle specific things.
   if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(S))
@@ -4569,19 +4569,16 @@ void RewriteModernObjC::GetBlockDeclRefExprs(Stmt *S) {
 void RewriteModernObjC::GetInnerBlockDeclRefExprs(Stmt *S,
                 SmallVectorImpl<DeclRefExpr *> &InnerBlockDeclRefs,
                 llvm::SmallPtrSetImpl<const DeclContext *> &InnerContexts) {
-  for (Stmt::child_range CI = S->children(); CI; ++CI)
-    if (*CI) {
-      if (BlockExpr *CBE = dyn_cast<BlockExpr>(*CI)) {
+  for (Stmt *SubStmt : S->children())
+    if (SubStmt) {
+      if (BlockExpr *CBE = dyn_cast<BlockExpr>(SubStmt)) {
         InnerContexts.insert(cast<DeclContext>(CBE->getBlockDecl()));
         GetInnerBlockDeclRefExprs(CBE->getBody(),
                                   InnerBlockDeclRefs,
                                   InnerContexts);
       }
       else
-        GetInnerBlockDeclRefExprs(*CI,
-                                  InnerBlockDeclRefs,
-                                  InnerContexts);
-
+        GetInnerBlockDeclRefExprs(SubStmt, InnerBlockDeclRefs, InnerContexts);
     }
   // Handle specific things.
   if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(S)) {
@@ -5564,12 +5561,11 @@ Stmt *RewriteModernObjC::RewriteFunctionBodyOrGlobalInitializer(Stmt *S) {
   SourceRange OrigStmtRange = S->getSourceRange();
 
   // Perform a bottom up rewrite of all children.
-  for (Stmt::child_range CI = S->children(); CI; ++CI)
-    if (*CI) {
-      Stmt *childStmt = (*CI);
+  for (Stmt *&childStmt : S->children())
+    if (childStmt) {
       Stmt *newStmt = RewriteFunctionBodyOrGlobalInitializer(childStmt);
       if (newStmt) {
-        *CI = newStmt;
+        childStmt = newStmt;
       }
     }
 
diff --git a/lib/Frontend/Rewrite/RewriteObjC.cpp b/lib/Frontend/Rewrite/RewriteObjC.cpp
index b2a45b4..204820b 100644
--- a/lib/Frontend/Rewrite/RewriteObjC.cpp
+++ b/lib/Frontend/Rewrite/RewriteObjC.cpp
@@ -1712,9 +1712,9 @@ Stmt *RewriteObjC::RewriteObjCSynchronizedStmt(ObjCAtSynchronizedStmt *S) {
 void RewriteObjC::WarnAboutReturnGotoStmts(Stmt *S)
 {
   // Perform a bottom up traversal of all children.
-  for (Stmt::child_range CI = S->children(); CI; ++CI)
-    if (*CI)
-      WarnAboutReturnGotoStmts(*CI);
+  for (Stmt *SubStmt : S->children())
+    if (SubStmt)
+      WarnAboutReturnGotoStmts(SubStmt);
 
   if (isa<ReturnStmt>(S) || isa<GotoStmt>(S)) {
     Diags.Report(Context->getFullLoc(S->getLocStart()),
@@ -1726,9 +1726,9 @@ void RewriteObjC::WarnAboutReturnGotoStmts(Stmt *S)
 void RewriteObjC::HasReturnStmts(Stmt *S, bool &hasReturns) 
 {  
   // Perform a bottom up traversal of all children.
-  for (Stmt::child_range CI = S->children(); CI; ++CI)
-   if (*CI)
-     HasReturnStmts(*CI, hasReturns);
+  for (Stmt *SubStmt : S->children())
+    if (SubStmt)
+      HasReturnStmts(SubStmt, hasReturns);
 
  if (isa<ReturnStmt>(S))
    hasReturns = true;
@@ -1737,9 +1737,9 @@ void RewriteObjC::HasReturnStmts(Stmt *S, bool &hasReturns)
 
 void RewriteObjC::RewriteTryReturnStmts(Stmt *S) {
  // Perform a bottom up traversal of all children.
- for (Stmt::child_range CI = S->children(); CI; ++CI)
-   if (*CI) {
-     RewriteTryReturnStmts(*CI);
+ for (Stmt *SubStmt : S->children())
+   if (SubStmt) {
+     RewriteTryReturnStmts(SubStmt);
    }
  if (isa<ReturnStmt>(S)) {
    SourceLocation startLoc = S->getLocStart();
@@ -1760,9 +1760,9 @@ void RewriteObjC::RewriteTryReturnStmts(Stmt *S) {
 
 void RewriteObjC::RewriteSyncReturnStmts(Stmt *S, std::string syncExitBuf) {
   // Perform a bottom up traversal of all children.
-  for (Stmt::child_range CI = S->children(); CI; ++CI)
-    if (*CI) {
-      RewriteSyncReturnStmts(*CI, syncExitBuf);
+  for (Stmt *SubStmt : S->children())
+    if (SubStmt) {
+      RewriteSyncReturnStmts(SubStmt, syncExitBuf);
     }
   if (isa<ReturnStmt>(S)) {
     SourceLocation startLoc = S->getLocStart();
@@ -3663,12 +3663,12 @@ void RewriteObjC::InsertBlockLiteralsWithinMethod(ObjCMethodDecl *MD) {
 }
 
 void RewriteObjC::GetBlockDeclRefExprs(Stmt *S) {
-  for (Stmt::child_range CI = S->children(); CI; ++CI)
-    if (*CI) {
-      if (BlockExpr *CBE = dyn_cast<BlockExpr>(*CI))
+  for (Stmt *SubStmt : S->children())
+    if (SubStmt) {
+      if (BlockExpr *CBE = dyn_cast<BlockExpr>(SubStmt))
         GetBlockDeclRefExprs(CBE->getBody());
       else
-        GetBlockDeclRefExprs(*CI);
+        GetBlockDeclRefExprs(SubStmt);
     }
   // Handle specific things.
   if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(S))
@@ -3683,19 +3683,16 @@ void RewriteObjC::GetBlockDeclRefExprs(Stmt *S) {
 void RewriteObjC::GetInnerBlockDeclRefExprs(Stmt *S,
                 SmallVectorImpl<DeclRefExpr *> &InnerBlockDeclRefs,
                 llvm::SmallPtrSetImpl<const DeclContext *> &InnerContexts) {
-  for (Stmt::child_range CI = S->children(); CI; ++CI)
-    if (*CI) {
-      if (BlockExpr *CBE = dyn_cast<BlockExpr>(*CI)) {
+  for (Stmt *SubStmt : S->children())
+    if (SubStmt) {
+      if (BlockExpr *CBE = dyn_cast<BlockExpr>(SubStmt)) {
         InnerContexts.insert(cast<DeclContext>(CBE->getBlockDecl()));
         GetInnerBlockDeclRefExprs(CBE->getBody(),
                                   InnerBlockDeclRefs,
                                   InnerContexts);
       }
       else
-        GetInnerBlockDeclRefExprs(*CI,
-                                  InnerBlockDeclRefs,
-                                  InnerContexts);
-
+        GetInnerBlockDeclRefExprs(SubStmt, InnerBlockDeclRefs, InnerContexts);
     }
   // Handle specific things.
   if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(S)) {
@@ -4611,12 +4608,11 @@ Stmt *RewriteObjC::RewriteFunctionBodyOrGlobalInitializer(Stmt *S) {
   SourceRange OrigStmtRange = S->getSourceRange();
 
   // Perform a bottom up rewrite of all children.
-  for (Stmt::child_range CI = S->children(); CI; ++CI)
-    if (*CI) {
-      Stmt *childStmt = (*CI);
+  for (Stmt *&childStmt : S->children())
+    if (childStmt) {
       Stmt *newStmt = RewriteFunctionBodyOrGlobalInitializer(childStmt);
       if (newStmt) {
-        *CI = newStmt;
+        childStmt = newStmt;
       }
     }
 
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
index 5f8857c..87afc60 100644
--- a/lib/Headers/CMakeLists.txt
+++ b/lib/Headers/CMakeLists.txt
@@ -5,6 +5,7 @@ set(files
   arm_acle.h
   avx2intrin.h
   avx512bwintrin.h
+  avx512cdintrin.h
   avx512erintrin.h
   avx512fintrin.h
   avx512vlbwintrin.h
@@ -21,11 +22,13 @@ set(files
   float.h
   fma4intrin.h
   fmaintrin.h
+  fxsrintrin.h
   htmintrin.h
   htmxlintrin.h
   ia32intrin.h
   immintrin.h
   Intrin.h
+  inttypes.h
   iso646.h
   limits.h
   lzcntintrin.h
diff --git a/lib/Headers/Intrin.h b/lib/Headers/Intrin.h
index dd04e06..7ba311e 100644
--- a/lib/Headers/Intrin.h
+++ b/lib/Headers/Intrin.h
@@ -40,7 +40,7 @@
 #endif
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
 
 #ifdef __cplusplus
 extern "C" {
@@ -180,8 +180,6 @@ unsigned long __cdecl _byteswap_ulong(unsigned long);
 unsigned short __cdecl _byteswap_ushort(unsigned short);
 void __cdecl _disable(void);
 void __cdecl _enable(void);
-void __cdecl _fxrstor(void const *);
-void __cdecl _fxsave(void *);
 long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value);
 static __inline__
 long _InterlockedAnd(long volatile *_Value, long _Mask);
@@ -358,8 +356,6 @@ unsigned char _bittestandreset64(__int64 *, __int64);
 static __inline__
 unsigned char _bittestandset64(__int64 *, __int64);
 unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64);
-void __cdecl _fxrstor64(void const *);
-void __cdecl _fxsave64(void *);
 long _InterlockedAnd_np(long volatile *_Value, long _Mask);
 short _InterlockedAnd16_np(short volatile *_Value, short _Mask);
 __int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);
@@ -424,7 +420,7 @@ unsigned __int64 _shrx_u64(unsigned __int64, unsigned int);
  * Multiply two 64-bit integers and obtain a 64-bit result.
  * The low-half is returned directly and the high half is in an out parameter.
  */
-static __inline__ unsigned __int64 DEFAULT_FN_ATTRS
+static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS
 _umul128(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand,
          unsigned __int64 *_HighProduct) {
   unsigned __int128 _FullProduct =
@@ -432,7 +428,7 @@ _umul128(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand,
   *_HighProduct = _FullProduct >> 64;
   return _FullProduct;
 }
-static __inline__ unsigned __int64 DEFAULT_FN_ATTRS
+static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS
 __umulh(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand) {
   unsigned __int128 _FullProduct =
       (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand;
@@ -447,54 +443,54 @@ void __cdecl _xsaveopt64(void *, unsigned __int64);
 /*----------------------------------------------------------------------------*\
 |* Bit Twiddling
 \*----------------------------------------------------------------------------*/
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _rotl8(unsigned char _Value, unsigned char _Shift) {
   _Shift &= 0x7;
   return _Shift ? (_Value << _Shift) | (_Value >> (8 - _Shift)) : _Value;
 }
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _rotr8(unsigned char _Value, unsigned char _Shift) {
   _Shift &= 0x7;
   return _Shift ? (_Value >> _Shift) | (_Value << (8 - _Shift)) : _Value;
 }
-static __inline__ unsigned short DEFAULT_FN_ATTRS
+static __inline__ unsigned short __DEFAULT_FN_ATTRS
 _rotl16(unsigned short _Value, unsigned char _Shift) {
   _Shift &= 0xf;
   return _Shift ? (_Value << _Shift) | (_Value >> (16 - _Shift)) : _Value;
 }
-static __inline__ unsigned short DEFAULT_FN_ATTRS
+static __inline__ unsigned short __DEFAULT_FN_ATTRS
 _rotr16(unsigned short _Value, unsigned char _Shift) {
   _Shift &= 0xf;
   return _Shift ? (_Value >> _Shift) | (_Value << (16 - _Shift)) : _Value;
 }
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _rotl(unsigned int _Value, int _Shift) {
   _Shift &= 0x1f;
   return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value;
 }
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _rotr(unsigned int _Value, int _Shift) {
   _Shift &= 0x1f;
   return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value;
 }
-static __inline__ unsigned long DEFAULT_FN_ATTRS
+static __inline__ unsigned long __DEFAULT_FN_ATTRS
 _lrotl(unsigned long _Value, int _Shift) {
   _Shift &= 0x1f;
   return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value;
 }
-static __inline__ unsigned long DEFAULT_FN_ATTRS
+static __inline__ unsigned long __DEFAULT_FN_ATTRS
 _lrotr(unsigned long _Value, int _Shift) {
   _Shift &= 0x1f;
   return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value;
 }
 static
-__inline__ unsigned __int64 DEFAULT_FN_ATTRS
+__inline__ unsigned __int64 __DEFAULT_FN_ATTRS
 _rotl64(unsigned __int64 _Value, int _Shift) {
   _Shift &= 0x3f;
   return _Shift ? (_Value << _Shift) | (_Value >> (64 - _Shift)) : _Value;
 }
 static
-__inline__ unsigned __int64 DEFAULT_FN_ATTRS
+__inline__ unsigned __int64 __DEFAULT_FN_ATTRS
 _rotr64(unsigned __int64 _Value, int _Shift) {
   _Shift &= 0x3f;
   return _Shift ? (_Value >> _Shift) | (_Value << (64 - _Shift)) : _Value;
@@ -502,52 +498,52 @@ _rotr64(unsigned __int64 _Value, int _Shift) {
 /*----------------------------------------------------------------------------*\
 |* Bit Counting and Testing
 \*----------------------------------------------------------------------------*/
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _BitScanForward(unsigned long *_Index, unsigned long _Mask) {
   if (!_Mask)
     return 0;
   *_Index = __builtin_ctzl(_Mask);
   return 1;
 }
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _BitScanReverse(unsigned long *_Index, unsigned long _Mask) {
   if (!_Mask)
     return 0;
   *_Index = 31 - __builtin_clzl(_Mask);
   return 1;
 }
-static __inline__ unsigned short DEFAULT_FN_ATTRS
+static __inline__ unsigned short __DEFAULT_FN_ATTRS
 __popcnt16(unsigned short value) {
   return __builtin_popcount((int)value);
 }
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __popcnt(unsigned int value) {
   return __builtin_popcount(value);
 }
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _bittest(long const *a, long b) {
   return (*a >> b) & 1;
 }
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _bittestandcomplement(long *a, long b) {
   unsigned char x = (*a >> b) & 1;
   *a = *a ^ (1 << b);
   return x;
 }
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _bittestandreset(long *a, long b) {
   unsigned char x = (*a >> b) & 1;
   *a = *a & ~(1 << b);
   return x;
 }
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _bittestandset(long *a, long b) {
   unsigned char x = (*a >> b) & 1;
   *a = *a | (1 << b);
   return x;
 }
 #if defined(__i386__) || defined(__x86_64__)
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _interlockedbittestandset(long volatile *__BitBase, long __BitPos) {
   unsigned char __Res;
   __asm__ ("xor %0, %0\n"
@@ -559,14 +555,14 @@ _interlockedbittestandset(long volatile *__BitBase, long __BitPos) {
 }
 #endif
 #ifdef __x86_64__
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) {
   if (!_Mask)
     return 0;
   *_Index = __builtin_ctzll(_Mask);
   return 1;
 }
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) {
   if (!_Mask)
     return 0;
@@ -574,33 +570,33 @@ _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) {
   return 1;
 }
 static __inline__
-unsigned __int64 DEFAULT_FN_ATTRS
+unsigned __int64 __DEFAULT_FN_ATTRS
  __popcnt64(unsigned __int64 value) {
   return __builtin_popcountll(value);
 }
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _bittest64(__int64 const *a, __int64 b) {
   return (*a >> b) & 1;
 }
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _bittestandcomplement64(__int64 *a, __int64 b) {
   unsigned char x = (*a >> b) & 1;
   *a = *a ^ (1ll << b);
   return x;
 }
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _bittestandreset64(__int64 *a, __int64 b) {
   unsigned char x = (*a >> b) & 1;
   *a = *a & ~(1ll << b);
   return x;
 }
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _bittestandset64(__int64 *a, __int64 b) {
   unsigned char x = (*a >> b) & 1;
   *a = *a | (1ll << b);
   return x;
 }
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _interlockedbittestandset64(__int64 volatile *__BitBase, __int64 __BitPos) {
   unsigned char __Res;
   __asm__ ("xor %0, %0\n"
@@ -614,16 +610,16 @@ _interlockedbittestandset64(__int64 volatile *__BitBase, __int64 __BitPos) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Exchange Add
 \*----------------------------------------------------------------------------*/
-static __inline__ char DEFAULT_FN_ATTRS
+static __inline__ char __DEFAULT_FN_ATTRS
 _InterlockedExchangeAdd8(char volatile *_Addend, char _Value) {
   return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
 }
-static __inline__ short DEFAULT_FN_ATTRS
+static __inline__ short __DEFAULT_FN_ATTRS
 _InterlockedExchangeAdd16(short volatile *_Addend, short _Value) {
   return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
 }
 #ifdef __x86_64__
-static __inline__ __int64 DEFAULT_FN_ATTRS
+static __inline__ __int64 __DEFAULT_FN_ATTRS
 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) {
   return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
 }
@@ -631,20 +627,20 @@ _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Exchange Sub
 \*----------------------------------------------------------------------------*/
-static __inline__ char DEFAULT_FN_ATTRS
+static __inline__ char __DEFAULT_FN_ATTRS
 _InterlockedExchangeSub8(char volatile *_Subend, char _Value) {
   return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
 }
-static __inline__ short DEFAULT_FN_ATTRS
+static __inline__ short __DEFAULT_FN_ATTRS
 _InterlockedExchangeSub16(short volatile *_Subend, short _Value) {
   return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
 }
-static __inline__ long DEFAULT_FN_ATTRS
+static __inline__ long __DEFAULT_FN_ATTRS
 _InterlockedExchangeSub(long volatile *_Subend, long _Value) {
   return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
 }
 #ifdef __x86_64__
-static __inline__ __int64 DEFAULT_FN_ATTRS
+static __inline__ __int64 __DEFAULT_FN_ATTRS
 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) {
   return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
 }
@@ -652,12 +648,12 @@ _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Increment
 \*----------------------------------------------------------------------------*/
-static __inline__ short DEFAULT_FN_ATTRS
+static __inline__ short __DEFAULT_FN_ATTRS
 _InterlockedIncrement16(short volatile *_Value) {
   return __atomic_add_fetch(_Value, 1, 0);
 }
 #ifdef __x86_64__
-static __inline__ __int64 DEFAULT_FN_ATTRS
+static __inline__ __int64 __DEFAULT_FN_ATTRS
 _InterlockedIncrement64(__int64 volatile *_Value) {
   return __atomic_add_fetch(_Value, 1, 0);
 }
@@ -665,12 +661,12 @@ _InterlockedIncrement64(__int64 volatile *_Value) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Decrement
 \*----------------------------------------------------------------------------*/
-static __inline__ short DEFAULT_FN_ATTRS
+static __inline__ short __DEFAULT_FN_ATTRS
 _InterlockedDecrement16(short volatile *_Value) {
   return __atomic_sub_fetch(_Value, 1, 0);
 }
 #ifdef __x86_64__
-static __inline__ __int64 DEFAULT_FN_ATTRS
+static __inline__ __int64 __DEFAULT_FN_ATTRS
 _InterlockedDecrement64(__int64 volatile *_Value) {
   return __atomic_sub_fetch(_Value, 1, 0);
 }
@@ -678,20 +674,20 @@ _InterlockedDecrement64(__int64 volatile *_Value) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked And
 \*----------------------------------------------------------------------------*/
-static __inline__ char DEFAULT_FN_ATTRS
+static __inline__ char __DEFAULT_FN_ATTRS
 _InterlockedAnd8(char volatile *_Value, char _Mask) {
   return __atomic_and_fetch(_Value, _Mask, 0);
 }
-static __inline__ short DEFAULT_FN_ATTRS
+static __inline__ short __DEFAULT_FN_ATTRS
 _InterlockedAnd16(short volatile *_Value, short _Mask) {
   return __atomic_and_fetch(_Value, _Mask, 0);
 }
-static __inline__ long DEFAULT_FN_ATTRS
+static __inline__ long __DEFAULT_FN_ATTRS
 _InterlockedAnd(long volatile *_Value, long _Mask) {
   return __atomic_and_fetch(_Value, _Mask, 0);
 }
 #ifdef __x86_64__
-static __inline__ __int64 DEFAULT_FN_ATTRS
+static __inline__ __int64 __DEFAULT_FN_ATTRS
 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) {
   return __atomic_and_fetch(_Value, _Mask, 0);
 }
@@ -699,20 +695,20 @@ _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Or
 \*----------------------------------------------------------------------------*/
-static __inline__ char DEFAULT_FN_ATTRS
+static __inline__ char __DEFAULT_FN_ATTRS
 _InterlockedOr8(char volatile *_Value, char _Mask) {
   return __atomic_or_fetch(_Value, _Mask, 0);
 }
-static __inline__ short DEFAULT_FN_ATTRS
+static __inline__ short __DEFAULT_FN_ATTRS
 _InterlockedOr16(short volatile *_Value, short _Mask) {
   return __atomic_or_fetch(_Value, _Mask, 0);
 }
-static __inline__ long DEFAULT_FN_ATTRS
+static __inline__ long __DEFAULT_FN_ATTRS
 _InterlockedOr(long volatile *_Value, long _Mask) {
   return __atomic_or_fetch(_Value, _Mask, 0);
 }
 #ifdef __x86_64__
-static __inline__ __int64 DEFAULT_FN_ATTRS
+static __inline__ __int64 __DEFAULT_FN_ATTRS
 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) {
   return __atomic_or_fetch(_Value, _Mask, 0);
 }
@@ -720,20 +716,20 @@ _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Xor
 \*----------------------------------------------------------------------------*/
-static __inline__ char DEFAULT_FN_ATTRS
+static __inline__ char __DEFAULT_FN_ATTRS
 _InterlockedXor8(char volatile *_Value, char _Mask) {
   return __atomic_xor_fetch(_Value, _Mask, 0);
 }
-static __inline__ short DEFAULT_FN_ATTRS
+static __inline__ short __DEFAULT_FN_ATTRS
 _InterlockedXor16(short volatile *_Value, short _Mask) {
   return __atomic_xor_fetch(_Value, _Mask, 0);
 }
-static __inline__ long DEFAULT_FN_ATTRS
+static __inline__ long __DEFAULT_FN_ATTRS
 _InterlockedXor(long volatile *_Value, long _Mask) {
   return __atomic_xor_fetch(_Value, _Mask, 0);
 }
 #ifdef __x86_64__
-static __inline__ __int64 DEFAULT_FN_ATTRS
+static __inline__ __int64 __DEFAULT_FN_ATTRS
 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) {
   return __atomic_xor_fetch(_Value, _Mask, 0);
 }
@@ -741,18 +737,18 @@ _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Exchange
 \*----------------------------------------------------------------------------*/
-static __inline__ char DEFAULT_FN_ATTRS
+static __inline__ char __DEFAULT_FN_ATTRS
 _InterlockedExchange8(char volatile *_Target, char _Value) {
   __atomic_exchange(_Target, &_Value, &_Value, 0);
   return _Value;
 }
-static __inline__ short DEFAULT_FN_ATTRS
+static __inline__ short __DEFAULT_FN_ATTRS
 _InterlockedExchange16(short volatile *_Target, short _Value) {
   __atomic_exchange(_Target, &_Value, &_Value, 0);
   return _Value;
 }
 #ifdef __x86_64__
-static __inline__ __int64 DEFAULT_FN_ATTRS
+static __inline__ __int64 __DEFAULT_FN_ATTRS
 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) {
   __atomic_exchange(_Target, &_Value, &_Value, 0);
   return _Value;
@@ -761,19 +757,19 @@ _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) {
 /*----------------------------------------------------------------------------*\
 |* Interlocked Compare Exchange
 \*----------------------------------------------------------------------------*/
-static __inline__ char DEFAULT_FN_ATTRS
+static __inline__ char __DEFAULT_FN_ATTRS
 _InterlockedCompareExchange8(char volatile *_Destination,
                              char _Exchange, char _Comparand) {
   __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
   return _Comparand;
 }
-static __inline__ short DEFAULT_FN_ATTRS
+static __inline__ short __DEFAULT_FN_ATTRS
 _InterlockedCompareExchange16(short volatile *_Destination,
                               short _Exchange, short _Comparand) {
   __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
   return _Comparand;
 }
-static __inline__ __int64 DEFAULT_FN_ATTRS
+static __inline__ __int64 __DEFAULT_FN_ATTRS
 _InterlockedCompareExchange64(__int64 volatile *_Destination,
                               __int64 _Exchange, __int64 _Comparand) {
   __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
@@ -783,24 +779,24 @@ _InterlockedCompareExchange64(__int64 volatile *_Destination,
 |* Barriers
 \*----------------------------------------------------------------------------*/
 #if defined(__i386__) || defined(__x86_64__)
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
 _ReadWriteBarrier(void) {
   __asm__ volatile ("" : : : "memory");
 }
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
 _ReadBarrier(void) {
   __asm__ volatile ("" : : : "memory");
 }
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
 _WriteBarrier(void) {
   __asm__ volatile ("" : : : "memory");
 }
 #endif
 #ifdef __x86_64__
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __faststorefence(void) {
   __asm__ volatile("lock orq $0, (%%rsp)" : : : "memory");
 }
@@ -815,33 +811,33 @@ __faststorefence(void) {
     (__offset))
 
 #ifdef __i386__
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 __readfsbyte(unsigned long __offset) {
   return *__ptr_to_addr_space(257, unsigned char, __offset);
 }
-static __inline__ unsigned __int64 DEFAULT_FN_ATTRS
+static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS
 __readfsqword(unsigned long __offset) {
   return *__ptr_to_addr_space(257, unsigned __int64, __offset);
 }
-static __inline__ unsigned short DEFAULT_FN_ATTRS
+static __inline__ unsigned short __DEFAULT_FN_ATTRS
 __readfsword(unsigned long __offset) {
   return *__ptr_to_addr_space(257, unsigned short, __offset);
 }
 #endif
 #ifdef __x86_64__
-static __inline__ unsigned char DEFAULT_FN_ATTRS
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 __readgsbyte(unsigned long __offset) {
   return *__ptr_to_addr_space(256, unsigned char, __offset);
 }
-static __inline__ unsigned long DEFAULT_FN_ATTRS
+static __inline__ unsigned long __DEFAULT_FN_ATTRS
 __readgsdword(unsigned long __offset) {
   return *__ptr_to_addr_space(256, unsigned long, __offset);
 }
-static __inline__ unsigned __int64 DEFAULT_FN_ATTRS
+static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS
 __readgsqword(unsigned long __offset) {
   return *__ptr_to_addr_space(256, unsigned __int64, __offset);
 }
-static __inline__ unsigned short DEFAULT_FN_ATTRS
+static __inline__ unsigned short __DEFAULT_FN_ATTRS
 __readgsword(unsigned long __offset) {
   return *__ptr_to_addr_space(256, unsigned short, __offset);
 }
@@ -851,44 +847,44 @@ __readgsword(unsigned long __offset) {
 |* movs, stos
 \*----------------------------------------------------------------------------*/
 #if defined(__i386__) || defined(__x86_64__)
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) {
   __asm__("rep movsb" : : "D"(__dst), "S"(__src), "c"(__n)
                         : "%edi", "%esi", "%ecx");
 }
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) {
   __asm__("rep movsl" : : "D"(__dst), "S"(__src), "c"(__n)
                         : "%edi", "%esi", "%ecx");
 }
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) {
   __asm__("rep movsh" : : "D"(__dst), "S"(__src), "c"(__n)
                         : "%edi", "%esi", "%ecx");
 }
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __stosb(unsigned char *__dst, unsigned char __x, size_t __n) {
   __asm__("rep stosb" : : "D"(__dst), "a"(__x), "c"(__n)
                         : "%edi", "%ecx");
 }
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
   __asm__("rep stosl" : : "D"(__dst), "a"(__x), "c"(__n)
                         : "%edi", "%ecx");
 }
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __stosw(unsigned short *__dst, unsigned short __x, size_t __n) {
   __asm__("rep stosh" : : "D"(__dst), "a"(__x), "c"(__n)
                         : "%edi", "%ecx");
 }
 #endif
 #ifdef __x86_64__
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) {
   __asm__("rep movsq" : : "D"(__dst), "S"(__src), "c"(__n)
                         : "%edi", "%esi", "%ecx");
 }
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {
   __asm__("rep stosq" : : "D"(__dst), "a"(__x), "c"(__n)
                         : "%edi", "%ecx");
@@ -898,32 +894,32 @@ __stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {
 /*----------------------------------------------------------------------------*\
 |* Misc
 \*----------------------------------------------------------------------------*/
-static __inline__ void * DEFAULT_FN_ATTRS
+static __inline__ void * __DEFAULT_FN_ATTRS
 _AddressOfReturnAddress(void) {
   return (void*)((char*)__builtin_frame_address(0) + sizeof(void*));
 }
-static __inline__ void * DEFAULT_FN_ATTRS
+static __inline__ void * __DEFAULT_FN_ATTRS
 _ReturnAddress(void) {
   return __builtin_return_address(0);
 }
 #if defined(__i386__) || defined(__x86_64__)
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __cpuid(int __info[4], int __level) {
   __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3])
                    : "a"(__level));
 }
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __cpuidex(int __info[4], int __level, int __ecx) {
   __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3])
                    : "a"(__level), "c"(__ecx));
 }
-static __inline__ unsigned __int64 __cdecl DEFAULT_FN_ATTRS
+static __inline__ unsigned __int64 __cdecl __DEFAULT_FN_ATTRS
 _xgetbv(unsigned int __xcr_no) {
   unsigned int __eax, __edx;
   __asm__ ("xgetbv" : "=a" (__eax), "=d" (__edx) : "c" (__xcr_no));
   return ((unsigned __int64)__edx << 32) | __eax;
 }
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __halt(void) {
   __asm__ volatile ("hlt");
 }
@@ -933,7 +929,7 @@ __halt(void) {
 |* Privileged intrinsics
 \*----------------------------------------------------------------------------*/
 #if defined(__i386__) || defined(__x86_64__)
-static __inline__ unsigned __int64 DEFAULT_FN_ATTRS
+static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS
 __readmsr(unsigned long __register) {
   // Loads the contents of a 64-bit model specific register (MSR) specified in
   // the ECX register into registers EDX:EAX. The EDX register is loaded with
@@ -947,14 +943,14 @@ __readmsr(unsigned long __register) {
   return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax;
 }
 
-static __inline__ unsigned long DEFAULT_FN_ATTRS
+static __inline__ unsigned long __DEFAULT_FN_ATTRS
 __readcr3(void) {
   unsigned long __cr3_val;
   __asm__ __volatile__ ("mov %%cr3, %0" : "=q"(__cr3_val) : : "memory");
   return __cr3_val;
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 __writecr3(unsigned int __cr3_val) {
   __asm__ ("mov %0, %%cr3" : : "q"(__cr3_val) : "memory");
 }
@@ -964,7 +960,7 @@ __writecr3(unsigned int __cr3_val) {
 }
 #endif
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __INTRIN_H */
 #endif /* _MSC_VER */
diff --git a/lib/Headers/__wmmintrin_aes.h b/lib/Headers/__wmmintrin_aes.h
index 17b3f1d..81b2b8d 100644
--- a/lib/Headers/__wmmintrin_aes.h
+++ b/lib/Headers/__wmmintrin_aes.h
@@ -26,33 +26,33 @@
 #include <emmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes")))
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesenc_si128(__m128i __V, __m128i __R)
 {
   return (__m128i)__builtin_ia32_aesenc128(__V, __R);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesenclast_si128(__m128i __V, __m128i __R)
 {
   return (__m128i)__builtin_ia32_aesenclast128(__V, __R);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesdec_si128(__m128i __V, __m128i __R)
 {
   return (__m128i)__builtin_ia32_aesdec128(__V, __R);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesdeclast_si128(__m128i __V, __m128i __R)
 {
   return (__m128i)__builtin_ia32_aesdeclast128(__V, __R);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesimc_si128(__m128i __V)
 {
   return (__m128i)__builtin_ia32_aesimc128(__V);
@@ -61,6 +61,6 @@ _mm_aesimc_si128(__m128i __V)
 #define _mm_aeskeygenassist_si128(C, R) \
   __builtin_ia32_aeskeygenassist128((C), (R))
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif  /* _WMMINTRIN_AES_H */
diff --git a/lib/Headers/adxintrin.h b/lib/Headers/adxintrin.h
index 050dc8a..ee34728 100644
--- a/lib/Headers/adxintrin.h
+++ b/lib/Headers/adxintrin.h
@@ -29,7 +29,7 @@
 #define __ADXINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
 
 /* Intrinsics that are available only if __ADX__ defined */
 static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__("adx")))
@@ -49,7 +49,7 @@ _addcarryx_u64(unsigned char __cf, unsigned long long __x,
 #endif
 
 /* Intrinsics that are also available if __ADX__ undefined */
-static __inline unsigned char DEFAULT_FN_ATTRS
+static __inline unsigned char __DEFAULT_FN_ATTRS
 _addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
               unsigned int *__p)
 {
@@ -57,7 +57,7 @@ _addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
 }
 
 #ifdef __x86_64__
-static __inline unsigned char DEFAULT_FN_ATTRS
+static __inline unsigned char __DEFAULT_FN_ATTRS
 _addcarry_u64(unsigned char __cf, unsigned long long __x,
               unsigned long long __y, unsigned long long  *__p)
 {
@@ -65,7 +65,7 @@ _addcarry_u64(unsigned char __cf, unsigned long long __x,
 }
 #endif
 
-static __inline unsigned char DEFAULT_FN_ATTRS
+static __inline unsigned char __DEFAULT_FN_ATTRS
 _subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
               unsigned int *__p)
 {
@@ -73,7 +73,7 @@ _subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
 }
 
 #ifdef __x86_64__
-static __inline unsigned char DEFAULT_FN_ATTRS
+static __inline unsigned char __DEFAULT_FN_ATTRS
 _subborrow_u64(unsigned char __cf, unsigned long long __x,
                unsigned long long __y, unsigned long long  *__p)
 {
@@ -81,6 +81,6 @@ _subborrow_u64(unsigned char __cf, unsigned long long __x,
 }
 #endif
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __ADXINTRIN_H */
diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h
index 28df890..2c80e24 100644
--- a/lib/Headers/altivec.h
+++ b/lib/Headers/altivec.h
@@ -110,14 +110,28 @@ static vector signed int __ATTRS_o_ai vec_abs(vector signed int __a) {
   return __builtin_altivec_vmaxsw(__a, -__a);
 }
 
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+static vector signed long long __ATTRS_o_ai
+vec_abs(vector signed long long __a) {
+  return __builtin_altivec_vmaxsd(__a, -__a);
+}
+#endif
+
 static vector float __ATTRS_o_ai vec_abs(vector float __a) {
   vector unsigned int __res =
       (vector unsigned int)__a & (vector unsigned int)(0x7FFFFFFF);
   return (vector float)__res;
 }
 
-/* vec_abss */
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+static vector double __ATTRS_o_ai vec_abs(vector double __a) {
+  vector unsigned long long __res = { 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF };
+  __res &= (vector unsigned int)__a;
+  return (vector double)__res;
+}
+#endif
 
+/* vec_abss */ 
 #define __builtin_altivec_abss_v16qi vec_abss
 #define __builtin_altivec_abss_v8hi vec_abss
 #define __builtin_altivec_abss_v4si vec_abss
@@ -226,6 +240,16 @@ static vector unsigned int __ATTRS_o_ai vec_add(vector unsigned int __a,
 }
 
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+static vector signed long long __ATTRS_o_ai
+vec_add(vector signed long long __a, vector signed long long __b) {
+  return __a + __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_add(vector unsigned long long __a, vector unsigned long long __b) {
+  return __a + __b;
+}
+
 static vector signed __int128 __ATTRS_o_ai vec_add(vector signed __int128 __a,
                                                    vector signed __int128 __b) {
   return __a + __b;
@@ -241,6 +265,13 @@ static vector float __ATTRS_o_ai vec_add(vector float __a, vector float __b) {
   return __a + __b;
 }
 
+#ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_add(vector double __a, vector double __b) {
+  return __a + __b;
+}
+#endif // __VSX__
+
 /* vec_vaddubm */
 
 #define __builtin_altivec_vaddubm vec_vaddubm
@@ -746,6 +777,24 @@ static vector float __ATTRS_o_ai vec_and(vector float __a,
 }
 
 #ifdef __VSX__
+static vector double __ATTRS_o_ai vec_and(vector bool long long __a, vector double __b) {
+  vector unsigned long long __res =
+      (vector unsigned long long)__a & (vector unsigned long long)__b;
+  return (vector double)__res;
+}
+
+static vector double __ATTRS_o_ai vec_and(vector double __a, vector bool long long __b) {
+  vector unsigned long long __res =
+      (vector unsigned long long)__a & (vector unsigned long long)__b;
+  return (vector double)__res;
+}
+
+static vector double __ATTRS_o_ai vec_and(vector double __a, vector double __b) {
+  vector unsigned long long __res =
+      (vector unsigned long long)__a & (vector unsigned long long)__b;
+  return (vector double)__res;
+}
+
 static vector signed long long __ATTRS_o_ai
 vec_and(vector signed long long __a, vector signed long long __b) {
   return __a & __b;
@@ -1068,6 +1117,26 @@ static vector float __ATTRS_o_ai vec_andc(vector float __a,
 }
 
 #ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_andc(vector bool long long __a, vector double __b) {
+  vector unsigned long long __res =
+      (vector unsigned long long)__a & ~(vector unsigned long long)__b;
+  return (vector double)__res;
+}
+
+static vector double __ATTRS_o_ai
+vec_andc(vector double __a, vector bool long long __b) {
+  vector unsigned long long __res =
+      (vector unsigned long long)__a & ~(vector unsigned long long)__b;
+  return (vector double)__res;
+}
+
+static vector double __ATTRS_o_ai vec_andc(vector double __a, vector double __b) {
+  vector unsigned long long __res =
+      (vector unsigned long long)__a & ~(vector unsigned long long)__b;
+  return (vector double)__res;
+}
+
 static vector signed long long __ATTRS_o_ai
 vec_andc(vector signed long long __a, vector signed long long __b) {
   return __a & ~__b;
@@ -1338,11 +1407,20 @@ vec_vavguw(vector unsigned int __a, vector unsigned int __b) {
 
 /* vec_ceil */
 
-static vector float __attribute__((__always_inline__))
-vec_ceil(vector float __a) {
+static vector float __ATTRS_o_ai vec_ceil(vector float __a) {
+#ifdef __VSX__
+  return __builtin_vsx_xvrspip(__a);
+#else
   return __builtin_altivec_vrfip(__a);
+#endif
 }
 
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_ceil(vector double __a) {
+  return __builtin_vsx_xvrdpip(__a);
+}
+#endif
+
 /* vec_vrfip */
 
 static vector float __attribute__((__always_inline__))
@@ -1414,16 +1492,56 @@ vec_cmpeq(vector unsigned long long __a, vector unsigned long long __b) {
 
 static vector bool int __ATTRS_o_ai vec_cmpeq(vector float __a,
                                               vector float __b) {
+#ifdef __VSX__
+  return (vector bool int)__builtin_vsx_xvcmpeqsp(__a, __b);
+#else
   return (vector bool int)__builtin_altivec_vcmpeqfp(__a, __b);
+#endif
+}
+
+#ifdef __VSX__
+static vector bool long long __ATTRS_o_ai
+vec_cmpeq(vector double __a, vector double __b) {
+  return (vector bool long long)__builtin_vsx_xvcmpeqdp(__a, __b);
 }
+#endif
 
 /* vec_cmpge */
 
-static vector bool int __attribute__((__always_inline__))
+static vector bool int __ATTRS_o_ai
 vec_cmpge(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return (vector bool int)__builtin_vsx_xvcmpgesp(__a, __b);
+#else
   return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static vector bool long long __ATTRS_o_ai
+vec_cmpge(vector double __a, vector double __b) {
+  return (vector bool long long)__builtin_vsx_xvcmpgedp(__a, __b);
+}
+#endif
+
+#ifdef __POWER8_VECTOR__
+/*  Forwrad declarations as the functions are used here */
+static vector bool long long __ATTRS_o_ai
+vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b);
+static vector bool long long __ATTRS_o_ai
+vec_cmpgt(vector signed long long __a, vector signed long long __b);
+
+static vector bool long long __ATTRS_o_ai
+vec_cmpge(vector signed long long __a, vector signed long long __b) {
+  return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool long long __ATTRS_o_ai
+vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) {
+  return ~(vec_cmpgt(__b, __a));
+}
+#endif
+
 /* vec_vcmpgefp */
 
 static vector bool int __attribute__((__always_inline__))
@@ -1476,9 +1594,19 @@ vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b) {
 
 static vector bool int __ATTRS_o_ai vec_cmpgt(vector float __a,
                                               vector float __b) {
+#ifdef __VSX__
+  return (vector bool int)__builtin_vsx_xvcmpgtsp(__a, __b);
+#else
   return (vector bool int)__builtin_altivec_vcmpgtfp(__a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static vector bool long long __ATTRS_o_ai
+vec_cmpgt(vector double __a, vector double __b) {
+  return (vector bool long long)__builtin_vsx_xvcmpgtdp(__a, __b);
+}
+#endif
 /* vec_vcmpgtsb */
 
 static vector bool char __attribute__((__always_inline__))
@@ -1530,47 +1658,85 @@ vec_vcmpgtfp(vector float __a, vector float __b) {
 
 /* vec_cmple */
 
-static vector bool int __attribute__((__always_inline__))
+static vector bool int __ATTRS_o_ai
 vec_cmple(vector float __a, vector float __b) {
-  return (vector bool int)__builtin_altivec_vcmpgefp(__b, __a);
+  return vec_cmpge(__b, __a);
 }
 
+#ifdef __VSX__
+static vector bool long long __ATTRS_o_ai
+vec_cmple(vector double __a, vector double __b) {
+  return vec_cmpge(__b, __a);
+}
+#endif
+
+#ifdef __POWER8_VECTOR__
+static vector bool long long __ATTRS_o_ai
+vec_cmple(vector signed long long __a, vector signed long long __b) {
+  return vec_cmpge(__b, __a);
+}
+
+static vector bool long long __ATTRS_o_ai
+vec_cmple(vector unsigned long long __a, vector unsigned long long __b) {
+  return vec_cmpge(__b, __a);
+}
+#endif
+
 /* vec_cmplt */
 
 static vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a,
                                                vector signed char __b) {
-  return (vector bool char)__builtin_altivec_vcmpgtsb(__b, __a);
+  return vec_cmpgt(__b, __a);
 }
 
 static vector bool char __ATTRS_o_ai vec_cmplt(vector unsigned char __a,
                                                vector unsigned char __b) {
-  return (vector bool char)__builtin_altivec_vcmpgtub(__b, __a);
+  return vec_cmpgt(__b, __a);
 }
 
 static vector bool short __ATTRS_o_ai vec_cmplt(vector short __a,
                                                 vector short __b) {
-  return (vector bool short)__builtin_altivec_vcmpgtsh(__b, __a);
+  return vec_cmpgt(__b, __a);
 }
 
 static vector bool short __ATTRS_o_ai vec_cmplt(vector unsigned short __a,
                                                 vector unsigned short __b) {
-  return (vector bool short)__builtin_altivec_vcmpgtuh(__b, __a);
+  return vec_cmpgt(__b, __a);
 }
 
 static vector bool int __ATTRS_o_ai vec_cmplt(vector int __a, vector int __b) {
-  return (vector bool int)__builtin_altivec_vcmpgtsw(__b, __a);
+  return vec_cmpgt(__b, __a);
 }
 
 static vector bool int __ATTRS_o_ai vec_cmplt(vector unsigned int __a,
                                               vector unsigned int __b) {
-  return (vector bool int)__builtin_altivec_vcmpgtuw(__b, __a);
+  return vec_cmpgt(__b, __a);
 }
 
 static vector bool int __ATTRS_o_ai vec_cmplt(vector float __a,
                                               vector float __b) {
-  return (vector bool int)__builtin_altivec_vcmpgtfp(__b, __a);
+  return vec_cmpgt(__b, __a);
+}
+
+#ifdef __VSX__
+static vector bool long long __ATTRS_o_ai
+vec_cmplt(vector double __a, vector double __b) {
+  return vec_cmpgt(__b, __a);
+}
+#endif
+
+#ifdef __POWER8_VECTOR__
+static vector bool long long __ATTRS_o_ai
+vec_cmplt(vector signed long long __a, vector signed long long __b) {
+  return vec_cmpgt(__b, __a);
 }
 
+static vector bool long long __ATTRS_o_ai
+vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) {
+  return vec_cmpgt(__b, __a);
+}
+#endif
+
 /* vec_ctf */
 
 static vector float __ATTRS_o_ai vec_ctf(vector int __a, int __b) {
@@ -3014,6 +3180,56 @@ static vector float __ATTRS_o_ai vec_vmrglw(vector float __a,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
 
+
+#ifdef __POWER8_VECTOR__
+/* vec_mergee */
+
+static vector bool int __ATTRS_o_ai
+vec_mergee(vector bool int __a, vector bool int __b) {
+  return vec_perm(__a, __b, (vector unsigned char)
+                  (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 
+                   0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B));
+}
+
+static vector signed int __ATTRS_o_ai
+vec_mergee(vector signed int __a, vector signed int __b) {
+  return vec_perm(__a, __b, (vector unsigned char)
+                  (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 
+                   0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B));
+}
+
+static vector unsigned int __ATTRS_o_ai
+vec_mergee(vector unsigned int __a, vector unsigned int __b) {
+  return vec_perm(__a, __b, (vector unsigned char)
+                  (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 
+                   0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B));
+}
+
+/* vec_mergeo */
+
+static vector bool int  __ATTRS_o_ai
+vec_mergeo(vector bool int __a, vector bool int __b) {
+  return vec_perm(__a, __b, (vector unsigned char)
+                  (0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17,
+                   0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+}
+
+static vector signed int  __ATTRS_o_ai
+vec_mergeo(vector signed int __a, vector signed int __b) {
+  return vec_perm(__a, __b, (vector unsigned char)
+                  (0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17,
+                   0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+}
+
+static vector unsigned int  __ATTRS_o_ai
+vec_mergeo(vector unsigned int __a, vector unsigned int __b) {
+  return vec_perm(__a, __b, (vector unsigned char)
+                  (0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17,
+                   0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+}
+
+#endif
+
 /* vec_mfvscr */
 
 static vector unsigned short __attribute__((__always_inline__))
diff --git a/lib/Headers/ammintrin.h b/lib/Headers/ammintrin.h
index 3f38205..91c6333 100644
--- a/lib/Headers/ammintrin.h
+++ b/lib/Headers/ammintrin.h
@@ -27,7 +27,7 @@
 #include <pmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a")))
 
 /// \brief Extracts the specified bits from the lower 64 bits of the 128-bit
 ///    integer vector operand at the index idx and of the length len.
@@ -80,7 +80,7 @@
 ///    non-zero, the result is undefined. 
 /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted 
 ///    from the source operand.
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_extract_si64(__m128i __x, __m128i __y)
 {
   return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
@@ -156,7 +156,7 @@ _mm_extract_si64(__m128i __x, __m128i __y)
 ///    lower bits of source operand __y. The upper 64 bits of the return value 
 ///    are undefined.
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_insert_si64(__m128i __x, __m128i __y)
 {
   return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
@@ -177,7 +177,7 @@ _mm_insert_si64(__m128i __x, __m128i __y)
 /// \param __a
 ///    The 64-bit double-precision floating-point register value to
 ///    be stored.
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_sd(double *__p, __m128d __a)
 {
   __builtin_ia32_movntsd(__p, (__v2df)__a);
@@ -198,12 +198,12 @@ _mm_stream_sd(double *__p, __m128d __a)
 /// \param __a
 ///    The 32-bit single-precision floating-point register value to
 ///    be stored.
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_ss(float *__p, __m128 __a)
 {
   __builtin_ia32_movntss(__p, (__v4sf)__a);
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __AMMINTRIN_H */
diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h
index 5f1817e..cfa9141 100644
--- a/lib/Headers/avx2intrin.h
+++ b/lib/Headers/avx2intrin.h
@@ -29,96 +29,96 @@
 #define __AVX2INTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx2")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx2")))
 
 /* SSE4 Multiple Packed Sums of Absolute Difference.  */
 #define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_abs_epi8(__m256i __a)
 {
     return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_abs_epi16(__m256i __a)
 {
     return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_abs_epi32(__m256i __a)
 {
     return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_packs_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_packs_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_packus_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_packus_epi32(__m256i __V1, __m256i __V2)
 {
   return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_add_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qi)__a + (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_add_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a + (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_add_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a + (__v8si)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_add_epi64(__m256i __a, __m256i __b)
 {
   return __a + __b;
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_adds_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_adds_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_adds_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_adds_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
@@ -129,31 +129,31 @@ _mm256_adds_epu16(__m256i __a, __m256i __b)
   __m256i __b = (b); \
   (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); })
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_and_si256(__m256i __a, __m256i __b)
 {
   return __a & __b;
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_andnot_si256(__m256i __a, __m256i __b)
 {
   return ~__a & __b;
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_avg_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_avg_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
 {
   return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,
@@ -181,307 +181,307 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
                                    (((M) & 0x40) ? 30 : 14), \
                                    (((M) & 0x80) ? 31 : 15)); })
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qi)__a == (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a == (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a == (__v8si)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)(__a == __b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qi)__a > (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a > (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a > (__v8si)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)(__a > __b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_hadd_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_hadd_epi32(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_hadds_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_hsub_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_hsub_epi32(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_hsubs_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maddubs_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_madd_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_max_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_max_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_max_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_max_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_max_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_max_epu32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_min_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_min_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_min_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_min_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_min_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_min_epu32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm256_movemask_epi8(__m256i __a)
 {
   return __builtin_ia32_pmovmskb256((__v32qi)__a);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi16(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi32(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi8_epi64(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi16_epi32(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi16_epi64(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepi32_epi64(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi16(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi32(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu8_epi64(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu16_epi32(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu16_epi64(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtepu32_epi64(__m128i __V)
 {
   return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V);
 }
 
-static __inline__  __m256i DEFAULT_FN_ATTRS
+static __inline__  __m256i __DEFAULT_FN_ATTRS
 _mm256_mul_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mulhi_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mulhi_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mullo_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a * (__v16hi)__b);
 }
 
-static __inline__  __m256i DEFAULT_FN_ATTRS
+static __inline__  __m256i __DEFAULT_FN_ATTRS
 _mm256_mullo_epi32 (__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a * (__v8si)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mul_epu32(__m256i __a, __m256i __b)
 {
   return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_or_si256(__m256i __a, __m256i __b)
 {
   return __a | __b;
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sad_epu8(__m256i __a, __m256i __b)
 {
   return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_shuffle_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);
@@ -523,19 +523,19 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
                                    8 + (((imm) & 0xc0) >> 6), \
                                    12, 13, 14, 15); })
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sign_epi8(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sign_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sign_epi32(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
@@ -547,61 +547,61 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
 
 #define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count))
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_slli_epi16(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sll_epi16(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_slli_epi32(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sll_epi32(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_slli_epi64(__m256i __a, int __count)
 {
   return __builtin_ia32_psllqi256(__a, __count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sll_epi64(__m256i __a, __m128i __count)
 {
   return __builtin_ia32_psllq256(__a, __count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srai_epi16(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sra_epi16(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srai_epi32(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sra_epi32(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);
@@ -613,175 +613,175 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
 
 #define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count))
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srli_epi16(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srl_epi16(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srli_epi32(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srl_epi32(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srli_epi64(__m256i __a, int __count)
 {
   return __builtin_ia32_psrlqi256(__a, __count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srl_epi64(__m256i __a, __m128i __count)
 {
   return __builtin_ia32_psrlq256(__a, __count);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sub_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qi)__a - (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sub_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a - (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sub_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a - (__v8si)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sub_epi64(__m256i __a, __m256i __b)
 {
   return __a - __b;
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_subs_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_subs_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_subs_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_subs_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_xor_si256(__m256i __a, __m256i __b)
 {
   return __a ^ __b;
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_stream_load_si256(__m256i *__V)
 {
   return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_broadcastss_ps(__m128 __X)
 {
   return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_broadcastsd_pd(__m128d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 0);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_broadcastss_ps(__m128 __X)
 {
   return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_broadcastsd_pd(__m128d __X)
 {
   return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastsi128_si256(__m128i __X)
 {
   return (__m256i)__builtin_shufflevector(__X, __X, 0, 1, 0, 1);
@@ -809,56 +809,56 @@ _mm256_broadcastsi128_si256(__m128i __X)
                                    (((M) & 0x40) ? 14 : 6), \
                                    (((M) & 0x80) ? 15 : 7)); })
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastb_epi8(__m128i __X)
 {
   return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastw_epi16(__m128i __X)
 {
   return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastd_epi32(__m128i __X)
 {
   return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastq_epi64(__m128i __X)
 {
   return (__m256i)__builtin_ia32_pbroadcastq256(__X);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastb_epi8(__m128i __X)
 {
   return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastw_epi16(__m128i __X)
 {
   return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X);
 }
 
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastd_epi32(__m128i __X)
 {
   return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastq_epi64(__m128i __X)
 {
   return (__m128i)__builtin_ia32_pbroadcastq128(__X);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
@@ -870,7 +870,7 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
                                    (M) & 0x3, ((M) & 0xc) >> 2, \
                                    ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b);
@@ -903,109 +903,109 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
     (((M) & 1) ? 4 : 2), \
     (((M) & 1) ? 5 : 3) );})
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskload_epi32(int const *__X, __m256i __M)
 {
   return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskload_epi64(long long const *__X, __m256i __M)
 {
   return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, __M);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskload_epi32(int const *__X, __m128i __M)
 {
   return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskload_epi64(long long const *__X, __m128i __M)
 {
   return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)
 {
   __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)
 {
   __builtin_ia32_maskstoreq256((__v4di *)__X, __M, __Y);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)
 {
   __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
 {
   __builtin_ia32_maskstoreq(( __v2di *)__X, __M, __Y);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sllv_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sllv_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_sllv_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psllv4di(__X, __Y);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sllv_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psllv2di(__X, __Y);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srav_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srav_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srlv_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srlv_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_srlv_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrlv4di(__X, __Y);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srlv_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrlv2di(__X, __Y);
@@ -1251,6 +1251,6 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
              (const __v4di *)__m, (__v4di)__i, \
              (__v4di)_mm256_set1_epi64x(-1), (s)); })
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __AVX2INTRIN_H */
diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h
index 4eb9747..b0d3462 100644
--- a/lib/Headers/avx512bwintrin.h
+++ b/lib/Headers/avx512bwintrin.h
@@ -34,9 +34,9 @@ typedef char __v64qi __attribute__ ((__vector_size__ (64)));
 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
 
-static  __inline __v64qi DEFAULT_FN_ATTRS
+static  __inline __v64qi __DEFAULT_FN_ATTRS
 _mm512_setzero_qi (void) {
   return (__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0,
@@ -48,7 +48,7 @@ _mm512_setzero_qi (void) {
                        0, 0, 0, 0, 0, 0, 0, 0 };
 }
 
-static  __inline __v32hi DEFAULT_FN_ATTRS
+static  __inline __v32hi __DEFAULT_FN_ATTRS
 _mm512_setzero_hi (void) {
   return (__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0,
@@ -58,300 +58,300 @@ _mm512_setzero_hi (void) {
 
 /* Integer compare */
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
                                                    (__mmask64)-1);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
                                                  (__mmask64)-1);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
                                                  __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
                                                    (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
                                                  __u);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
                                                 (__mmask64)-1);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
                                                  (__mmask64)-1);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
                                                  __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
                                                  __u);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
                                                    (__mmask64)-1);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
                                                  (__mmask64)-1);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
                                                  __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
                                                    (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
                                                  __u);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_cmple_epi8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
                                                 (__mmask64)-1);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_cmple_epu8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
                                                  (__mmask64)-1);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
                                                  __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_cmple_epi16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_cmple_epu16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
                                                  __u);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
                                                 (__mmask64)-1);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
                                                  (__mmask64)-1);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
                                                  __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
                                                  __u);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
                                                 (__mmask64)-1);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
                                                  (__mmask64)-1);
 }
 
-static __inline__ __mmask64 DEFAULT_FN_ATTRS
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
   return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
                                                  __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
                                                  __u);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_add_epi8 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v64qi) __A + (__v64qi) __B);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
              (__v64qi) __B,
@@ -359,7 +359,7 @@ _mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
              (__mmask64) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
              (__v64qi) __B,
@@ -368,12 +368,12 @@ _mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
              (__mmask64) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_sub_epi8 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v64qi) __A - (__v64qi) __B);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
              (__v64qi) __B,
@@ -381,7 +381,7 @@ _mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
              (__mmask64) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
              (__v64qi) __B,
@@ -390,12 +390,12 @@ _mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
              (__mmask64) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_add_epi16 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hi) __A + (__v32hi) __B);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
              (__v32hi) __B,
@@ -403,7 +403,7 @@ _mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
              (__v32hi) __B,
@@ -412,12 +412,12 @@ _mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_sub_epi16 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hi) __A - (__v32hi) __B);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
              (__v32hi) __B,
@@ -425,7 +425,7 @@ _mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
              (__v32hi) __B,
@@ -434,12 +434,12 @@ _mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mullo_epi16 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hi) __A * (__v32hi) __B);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
               (__v32hi) __B,
@@ -447,7 +447,7 @@ _mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
               (__mmask32) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
               (__v32hi) __B,
@@ -456,6 +456,754 @@ _mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
               (__mmask32) __U);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
+{
+  return (__m512i) __builtin_ia32_blendmb_512_mask ((__v64qi) __A,
+              (__v64qi) __W,
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
+{
+  return (__m512i) __builtin_ia32_blendmw_512_mask ((__v32hi) __A,
+              (__v32hi) __W,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_abs_epi8 (__m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
+              (__v64qi) __W,
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_abs_epi16 (__m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
+              (__v32hi) __W,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_packs_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_packs_epi32 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v32hi) _mm512_setzero_hi(),
+              __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_packs_epi32 (__m512i __W, __mmask32 __M, __m512i __A,
+       __m512i __B)
+{
+  return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v32hi) __W,
+              __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_packs_epi16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_packs_epi16 (__m512i __W, __mmask64 __M, __m512i __A,
+       __m512i __B)
+{
+  return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v64qi) __W,
+              (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_packs_epi16 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v64qi) _mm512_setzero_qi(),
+              __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_packus_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_packus_epi32 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v32hi) _mm512_setzero_hi(),
+              __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_packus_epi32 (__m512i __W, __mmask32 __M, __m512i __A,
+        __m512i __B)
+{
+  return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v32hi) __W,
+              __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_packus_epi16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_packus_epi16 (__m512i __W, __mmask64 __M, __m512i __A,
+        __m512i __B)
+{
+  return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v64qi) __W,
+              (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_packus_epi16 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v64qi) _mm512_setzero_qi(),
+              (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_adds_epi8 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+           __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) __W,
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_adds_epi16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+      __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_adds_epu8 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
+           __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) __W,
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_adds_epu16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
+      __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_avg_epu8 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
+          __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) __W,
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi(),
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_avg_epu16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
+           __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi(),
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_max_epi8 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi(),
+              (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
+          __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) __W,
+              (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_max_epi16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi(),
+              (__mmask32) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
+           __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_max_epu8 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi(),
+              (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A,
+          __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) __W,
+              (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_max_epu16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi(),
+              (__mmask32) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A,
+           __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_min_epi8 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi(),
+              (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
+          __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) __W,
+              (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_min_epi16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi(),
+              (__mmask32) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
+           __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_min_epu8 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi(),
+              (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A,
+          __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) __W,
+              (__mmask64) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_min_epu16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi(),
+              (__mmask32) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A,
+           __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_shuffle_epi8 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_shuffle_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+        __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) __W,
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_shuffle_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_subs_epi8 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+           __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) __W,
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_subs_epi16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+      __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_subs_epu8 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
+           __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) __W,
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
+              (__v64qi) __B,
+              (__v64qi) _mm512_setzero_qi (),
+              (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_subs_epu16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
+      __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi (),
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask2_permutex2var_epi16 (__m512i __A, __m512i __I,
+         __mmask32 __U, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A,
+              (__v32hi) __I /* idx */ ,
+              (__v32hi) __B,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_permutex2var_epi16 (__m512i __A, __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I /* idx */,
+              (__v32hi) __A,
+              (__v32hi) __B,
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_permutex2var_epi16 (__m512i __A, __mmask32 __U,
+        __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I /* idx */,
+              (__v32hi) __A,
+              (__v32hi) __B,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_permutex2var_epi16 (__mmask32 __U, __m512i __A,
+         __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2varhi512_maskz ((__v32hi) __I
+              /* idx */ ,
+              (__v32hi) __A,
+              (__v32hi) __B,
+              (__mmask32) __U);
+}
+
 #define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
                                          (__v64qi)(__m512i)(b), \
@@ -496,6 +1244,7 @@ _mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
                                           (__v32hi)(__m512i)(b), \
                                           (p), (__mmask32)(m)); })
 
-#undef DEFAULT_FN_ATTRS
+
+#undef __DEFAULT_FN_ATTRS
 
 #endif
diff --git a/lib/Headers/avx512cdintrin.h b/lib/Headers/avx512cdintrin.h
new file mode 100644
index 0000000..3894b29
--- /dev/null
+++ b/lib/Headers/avx512cdintrin.h
@@ -0,0 +1,131 @@
+/*===------------- avx512cdintrin.h - AVX512CD intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512CDINTRIN_H
+#define __AVX512CDINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd")))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_conflict_epi64 (__m512i __A)
+{
+  return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
+                 (__v8di) _mm512_setzero_si512 (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
+               (__v8di) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
+                 (__v8di) _mm512_setzero_si512 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_conflict_epi32 (__m512i __A)
+{
+  return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
+                 (__v16si) _mm512_setzero_si512 (),
+                 (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
+               (__v16si) __W,
+               (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
+                 (__v16si) _mm512_setzero_si512 (),
+                 (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_lzcnt_epi32 (__m512i __A)
+{
+  return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
+             (__v16si) _mm512_setzero_si512 (),
+             (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
+                 (__v16si) __W,
+                 (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
+             (__v16si) _mm512_setzero_si512 (),
+             (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_lzcnt_epi64 (__m512i __A)
+{
+  return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
+             (__v8di) _mm512_setzero_si512 (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
+                 (__v8di) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
+{
+  return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
+             (__v8di) _mm512_setzero_si512 (),
+             (__mmask8) __U);
+}
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Headers/avx512dqintrin.h b/lib/Headers/avx512dqintrin.h
index cfcfc62..8a69f7f 100644
--- a/lib/Headers/avx512dqintrin.h
+++ b/lib/Headers/avx512dqintrin.h
@@ -29,14 +29,14 @@
 #define __AVX512DQINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mullo_epi64 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v8di) __A * (__v8di) __B);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
               (__v8di) __B,
@@ -44,7 +44,7 @@ _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
               (__v8di) __B,
@@ -53,12 +53,12 @@ _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_xor_pd (__m512d __A, __m512d __B) {
   return (__m512d) ((__v8di) __A ^ (__v8di) __B);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
              (__v8df) __B,
@@ -66,7 +66,7 @@ _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
              (__v8df) __B,
@@ -75,12 +75,12 @@ _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_xor_ps (__m512 __A, __m512 __B) {
   return (__m512) ((__v16si) __A ^ (__v16si) __B);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
             (__v16sf) __B,
@@ -88,7 +88,7 @@ _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
             (__mmask16) __U);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
             (__v16sf) __B,
@@ -97,12 +97,12 @@ _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
             (__mmask16) __U);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_or_pd (__m512d __A, __m512d __B) {
   return (__m512d) ((__v8di) __A | (__v8di) __B);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
             (__v8df) __B,
@@ -110,7 +110,7 @@ _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
             (__v8df) __B,
@@ -119,12 +119,12 @@ _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_or_ps (__m512 __A, __m512 __B) {
   return (__m512) ((__v16si) __A | (__v16si) __B);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
                  (__v16sf) __B,
@@ -132,7 +132,7 @@ _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
                  (__mmask16) __U);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
                  (__v16sf) __B,
@@ -141,12 +141,12 @@ _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
                  (__mmask16) __U);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_and_pd (__m512d __A, __m512d __B) {
   return (__m512d) ((__v8di) __A & (__v8di) __B);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
              (__v8df) __B,
@@ -154,7 +154,7 @@ _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
              (__v8df) __B,
@@ -163,12 +163,12 @@ _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_and_ps (__m512 __A, __m512 __B) {
   return (__m512) ((__v16si) __A & (__v16si) __B);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
             (__v16sf) __B,
@@ -176,7 +176,7 @@ _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
             (__mmask16) __U);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
             (__v16sf) __B,
@@ -185,7 +185,7 @@ _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
             (__mmask16) __U);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_andnot_pd (__m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
               (__v8df) __B,
@@ -194,7 +194,7 @@ _mm512_andnot_pd (__m512d __A, __m512d __B) {
               (__mmask8) -1);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
               (__v8df) __B,
@@ -202,7 +202,7 @@ _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
               (__v8df) __B,
@@ -211,7 +211,7 @@ _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_andnot_ps (__m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
              (__v16sf) __B,
@@ -220,7 +220,7 @@ _mm512_andnot_ps (__m512 __A, __m512 __B) {
              (__mmask16) -1);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
              (__v16sf) __B,
@@ -228,7 +228,7 @@ _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
              (__v16sf) __B,
@@ -237,6 +237,6 @@ _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
              (__mmask16) __U);
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h
index 98eb73b..0991144 100644
--- a/lib/Headers/avx512fintrin.h
+++ b/lib/Headers/avx512fintrin.h
@@ -47,17 +47,17 @@ typedef unsigned short __mmask16;
 #define _MM_FROUND_CUR_DIRECTION    0x04
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
 
 /* Create vectors with repeated elements */
 
-static  __inline __m512i DEFAULT_FN_ATTRS
+static  __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_setzero_si512(void)
 {
   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
 {
   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
@@ -66,7 +66,7 @@ _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
                  __M);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
 {
 #ifdef __x86_64__
@@ -82,45 +82,45 @@ _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
 #endif
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_setzero_ps(void)
 {
   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
 }
-static  __inline __m512d DEFAULT_FN_ATTRS
+static  __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_setzero_pd(void)
 {
   return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_set1_ps(float __w)
 {
   return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
                    __w, __w, __w, __w, __w, __w, __w, __w  };
 }
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_set1_pd(double __w)
 {
   return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_set1_epi32(int __s)
 {
   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
                              __s, __s, __s, __s, __s, __s, __s, __s };
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_set1_epi64(long long __d)
 {
   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_broadcastss_ps(__m128 __X)
 {
   float __f = __X[0];
@@ -130,7 +130,7 @@ _mm512_broadcastss_ps(__m128 __X)
                     __f, __f, __f, __f };
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_broadcastsd_pd(__m128d __X)
 {
   double __d = __X[0];
@@ -140,39 +140,39 @@ _mm512_broadcastsd_pd(__m128d __X)
 
 /* Cast between vector types */
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_castpd256_pd512(__m256d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_castps256_ps512(__m256 __a)
 {
   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
                                           -1, -1, -1, -1, -1, -1, -1, -1);
 }
 
-static __inline __m128d DEFAULT_FN_ATTRS
+static __inline __m128d __DEFAULT_FN_ATTRS
 _mm512_castpd512_pd128(__m512d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1);
 }
 
-static __inline __m128 DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS
 _mm512_castps512_ps128(__m512 __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
 }
 
 /* Bitwise operators */
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_and_epi32(__m512i __a, __m512i __b)
 {
   return __a & __b;
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
@@ -180,7 +180,7 @@ _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
               (__v16si) __src,
               (__mmask16) __k);
 }
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a,
@@ -190,13 +190,13 @@ _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
               (__mmask16) __k);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_and_epi64(__m512i __a, __m512i __b)
 {
   return __a & __b;
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
@@ -204,7 +204,7 @@ _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
               (__v8di) __src,
               (__mmask8) __k);
 }
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a,
@@ -214,7 +214,7 @@ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
               (__mmask8) __k);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
@@ -224,7 +224,7 @@ _mm512_andnot_epi32 (__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
@@ -233,7 +233,7 @@ _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
               (__mmask16) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
@@ -243,7 +243,7 @@ _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
               (__mmask16) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
@@ -253,7 +253,7 @@ _mm512_andnot_epi64 (__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
@@ -261,7 +261,7 @@ _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
               (__v8di) __W, __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
@@ -270,13 +270,13 @@ _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
               _mm512_setzero_pd (),
               __U);
 }
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_or_epi32(__m512i __a, __m512i __b)
 {
   return __a | __b;
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
@@ -284,7 +284,7 @@ _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
               (__v16si) __src,
               (__mmask16) __k);
 }
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a,
@@ -294,13 +294,13 @@ _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
               (__mmask16) __k);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_or_epi64(__m512i __a, __m512i __b)
 {
   return __a | __b;
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
@@ -308,7 +308,7 @@ _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
               (__v8di) __src,
               (__mmask8) __k);
 }
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a,
@@ -318,13 +318,13 @@ _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
               (__mmask8) __k);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_xor_epi32(__m512i __a, __m512i __b)
 {
   return __a ^ __b;
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
@@ -332,7 +332,7 @@ _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
               (__v16si) __src,
               (__mmask16) __k);
 }
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a,
@@ -342,13 +342,13 @@ _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
               (__mmask16) __k);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_xor_epi64(__m512i __a, __m512i __b)
 {
   return __a ^ __b;
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
@@ -356,7 +356,7 @@ _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
               (__v8di) __src,
               (__mmask8) __k);
 }
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a,
@@ -366,68 +366,68 @@ _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
               (__mmask8) __k);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_and_si512(__m512i __a, __m512i __b)
 {
   return __a & __b;
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_or_si512(__m512i __a, __m512i __b)
 {
   return __a | __b;
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_xor_si512(__m512i __a, __m512i __b)
 {
   return __a ^ __b;
 }
 /* Arithmetic */
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_add_pd(__m512d __a, __m512d __b)
 {
   return __a + __b;
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_add_ps(__m512 __a, __m512 __b)
 {
   return __a + __b;
 }
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_mul_pd(__m512d __a, __m512d __b)
 {
   return __a * __b;
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_mul_ps(__m512 __a, __m512 __b)
 {
   return __a * __b;
 }
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_sub_pd(__m512d __a, __m512d __b)
 {
   return __a - __b;
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_sub_ps(__m512 __a, __m512 __b)
 {
   return __a - __b;
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_add_epi64 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v8di) __A + (__v8di) __B);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
@@ -436,7 +436,7 @@ _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
@@ -446,13 +446,13 @@ _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_sub_epi64 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v8di) __A - (__v8di) __B);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
@@ -461,7 +461,7 @@ _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
@@ -471,13 +471,13 @@ _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_add_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v16si) __A + (__v16si) __B);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
@@ -486,7 +486,7 @@ _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
              (__mmask16) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
@@ -496,13 +496,13 @@ _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
              (__mmask16) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_sub_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v16si) __A - (__v16si) __B);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
@@ -511,7 +511,7 @@ _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
              (__mmask16) __U);
 }
 
-static __inline__ __m512i DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
@@ -521,7 +521,7 @@ _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
              (__mmask16) __U);
 }
 
-static  __inline__ __m512d DEFAULT_FN_ATTRS
+static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_max_pd(__m512d __A, __m512d __B)
 {
   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
@@ -532,7 +532,7 @@ _mm512_max_pd(__m512d __A, __m512d __B)
              _MM_FROUND_CUR_DIRECTION);
 }
 
-static  __inline__ __m512 DEFAULT_FN_ATTRS
+static  __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_max_ps(__m512 __A, __m512 __B)
 {
   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
@@ -544,7 +544,7 @@ _mm512_max_ps(__m512 __A, __m512 __B)
 }
 
 static __inline __m512i
-DEFAULT_FN_ATTRS
+__DEFAULT_FN_ATTRS
 _mm512_max_epi32(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
@@ -554,7 +554,7 @@ _mm512_max_epi32(__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_max_epu32(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
@@ -564,7 +564,7 @@ _mm512_max_epu32(__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_max_epi64(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
@@ -574,7 +574,7 @@ _mm512_max_epi64(__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_max_epu64(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
@@ -584,7 +584,7 @@ _mm512_max_epu64(__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
-static  __inline__ __m512d DEFAULT_FN_ATTRS
+static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_min_pd(__m512d __A, __m512d __B)
 {
   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
@@ -595,7 +595,7 @@ _mm512_min_pd(__m512d __A, __m512d __B)
              _MM_FROUND_CUR_DIRECTION);
 }
 
-static  __inline__ __m512 DEFAULT_FN_ATTRS
+static  __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_min_ps(__m512 __A, __m512 __B)
 {
   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
@@ -607,7 +607,7 @@ _mm512_min_ps(__m512 __A, __m512 __B)
 }
 
 static __inline __m512i
-DEFAULT_FN_ATTRS
+__DEFAULT_FN_ATTRS
 _mm512_min_epi32(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
@@ -617,7 +617,7 @@ _mm512_min_epi32(__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_min_epu32(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
@@ -627,7 +627,7 @@ _mm512_min_epu32(__m512i __A, __m512i __B)
               (__mmask16) -1);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_min_epi64(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
@@ -637,7 +637,7 @@ _mm512_min_epi64(__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_min_epu64(__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
@@ -647,7 +647,7 @@ _mm512_min_epu64(__m512i __A, __m512i __B)
               (__mmask8) -1);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_mul_epi32(__m512i __X, __m512i __Y)
 {
   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
@@ -657,7 +657,7 @@ _mm512_mul_epi32(__m512i __X, __m512i __Y)
               (__mmask8) -1);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
@@ -665,7 +665,7 @@ _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
               (__v8di) __W, __M);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
@@ -675,7 +675,7 @@ _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
               __M);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_mul_epu32(__m512i __X, __m512i __Y)
 {
   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
@@ -685,7 +685,7 @@ _mm512_mul_epu32(__m512i __X, __m512i __Y)
                (__mmask8) -1);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
@@ -693,7 +693,7 @@ _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
                (__v8di) __W, __M);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
@@ -703,13 +703,13 @@ _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
                __M);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v16si) __A * (__v16si) __B);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
@@ -719,7 +719,7 @@ _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
               __M);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
@@ -727,7 +727,7 @@ _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
               (__v16si) __W, __M);
 }
 
-static  __inline__ __m512d DEFAULT_FN_ATTRS
+static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_sqrt_pd(__m512d a)
 {
   return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
@@ -736,7 +736,7 @@ _mm512_sqrt_pd(__m512d a)
                                                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static  __inline__ __m512 DEFAULT_FN_ATTRS
+static  __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_sqrt_ps(__m512 a)
 {
   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
@@ -745,7 +745,7 @@ _mm512_sqrt_ps(__m512 a)
                                                _MM_FROUND_CUR_DIRECTION);
 }
 
-static  __inline__ __m512d DEFAULT_FN_ATTRS
+static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_rsqrt14_pd(__m512d __A)
 {
   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
@@ -753,7 +753,7 @@ _mm512_rsqrt14_pd(__m512d __A)
                  _mm512_setzero_pd (),
                  (__mmask8) -1);}
 
-static  __inline__ __m512 DEFAULT_FN_ATTRS
+static  __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_rsqrt14_ps(__m512 __A)
 {
   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
@@ -762,7 +762,7 @@ _mm512_rsqrt14_ps(__m512 __A)
                 (__mmask16) -1);
 }
 
-static  __inline__ __m128 DEFAULT_FN_ATTRS
+static  __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
@@ -772,7 +772,7 @@ _mm_rsqrt14_ss(__m128 __A, __m128 __B)
              (__mmask8) -1);
 }
 
-static  __inline__ __m128d DEFAULT_FN_ATTRS
+static  __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
 {
   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
@@ -782,7 +782,7 @@ _mm_rsqrt14_sd(__m128d __A, __m128d __B)
               (__mmask8) -1);
 }
 
-static  __inline__ __m512d DEFAULT_FN_ATTRS
+static  __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_rcp14_pd(__m512d __A)
 {
   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
@@ -791,7 +791,7 @@ _mm512_rcp14_pd(__m512d __A)
                (__mmask8) -1);
 }
 
-static  __inline__ __m512 DEFAULT_FN_ATTRS
+static  __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_rcp14_ps(__m512 __A)
 {
   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
@@ -799,7 +799,7 @@ _mm512_rcp14_ps(__m512 __A)
               _mm512_setzero_ps (),
               (__mmask16) -1);
 }
-static  __inline__ __m128 DEFAULT_FN_ATTRS
+static  __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rcp14_ss(__m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
@@ -809,7 +809,7 @@ _mm_rcp14_ss(__m128 __A, __m128 __B)
                  (__mmask8) -1);
 }
 
-static  __inline__ __m128d DEFAULT_FN_ATTRS
+static  __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_rcp14_sd(__m128d __A, __m128d __B)
 {
   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
@@ -819,7 +819,7 @@ _mm_rcp14_sd(__m128d __A, __m128d __B)
             (__mmask8) -1);
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_floor_ps(__m512 __A)
 {
   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
@@ -828,7 +828,7 @@ _mm512_floor_ps(__m512 __A)
                                                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_floor_pd(__m512d __A)
 {
   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
@@ -837,7 +837,7 @@ _mm512_floor_pd(__m512d __A)
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_ceil_ps(__m512 __A)
 {
   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
@@ -846,7 +846,7 @@ _mm512_ceil_ps(__m512 __A)
                                                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_ceil_pd(__m512d __A)
 {
   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
@@ -855,7 +855,7 @@ _mm512_ceil_pd(__m512d __A)
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_abs_epi64(__m512i __A)
 {
   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
@@ -864,7 +864,7 @@ _mm512_abs_epi64(__m512i __A)
              (__mmask8) -1);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_abs_epi32(__m512i __A)
 {
   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
@@ -881,75 +881,779 @@ _mm512_abs_epi32(__m512i __A)
   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
                                           -1, _MM_FROUND_CUR_DIRECTION); })
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
+                                             (__v8df) (B), (__v8df) (C), \
+                                             (__mmask8) -1, (R)); })
+
+
+#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
+                                             (__v8df) (B), (__v8df) (C), \
+                                             (__mmask8) (U), (R)); })
+
+
+#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
+                                             (__v8df) (B), -(__v8df) (C), \
+                                             (__mmask8) -1, (R)); })
+
+
+#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \
+                                             (__v8df) (B), -(__v8df) (C), \
+                                             (__mmask8) (U), (R)); })
+
+
+#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \
+                                              (__v8df) (B), -(__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
+                                             (__v8df) (B), (__v8df) (C), \
+                                             (__mmask8) -1, (R)); })
+
+
+#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \
+                                             (__v8df) (B), -(__v8df) (C), \
+                                             (__mmask8) -1, (R)); })
+
+
+#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \
+                                              (__v8df) (B), -(__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d)
-    __builtin_ia32_vfmaddpd512_mask(__A,
-                                    __B,
-                                    __C,
-                                    (__mmask8) -1,
-                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+                                                    (__v8df) __B,
+                                                    (__v8df) __C,
+                                                    (__mmask8) -1,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+                                                    (__v8df) __B,
+                                                    (__v8df) __C,
+                                                    (__mmask8) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d)
-    __builtin_ia32_vfmsubpd512_mask(__A,
-                                    __B,
-                                    __C,
-                                    (__mmask8) -1,
-                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+                                                    (__v8df) __B,
+                                                    -(__v8df) __C,
+                                                    (__mmask8) -1,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+                                                    (__v8df) __B,
+                                                    -(__v8df) __C,
+                                                    (__mmask8) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     -(__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
 {
-  return (__m512d)
-    __builtin_ia32_vfnmaddpd512_mask(__A,
-                                     __B,
-                                     __C,
-                                     (__mmask8) -1,
-                                     _MM_FROUND_CUR_DIRECTION);
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+                                                    (__v8df) __B,
+                                                    (__v8df) __C,
+                                                    (__mmask8) -1,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
+                                                    (__v8df) __B,
+                                                    -(__v8df) __C,
+                                                    (__mmask8) -1,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
+                                                     (__v8df) __B,
+                                                     -(__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
+                                            (__v16sf) (B), (__v16sf) (C), \
+                                            (__mmask16) -1, (R)); })
+
+
+#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
+                                            (__v16sf) (B), (__v16sf) (C), \
+                                            (__mmask16) (U), (R)); })
+
+
+#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
+                                            (__v16sf) (B), -(__v16sf) (C), \
+                                            (__mmask16) -1, (R)); })
+
+
+#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \
+                                            (__v16sf) (B), -(__v16sf) (C), \
+                                            (__mmask16) (U), (R)); })
+
+
+#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \
+                                             (__v16sf) (B), -(__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
+                                            (__v16sf) (B), (__v16sf) (C), \
+                                            (__mmask16) -1, (R)); })
+
+
+#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \
+                                            (__v16sf) (B), -(__v16sf) (C), \
+                                            (__mmask16) -1, (R)); })
+
+
+#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \
+                                             (__v16sf) (B), -(__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512)
-    __builtin_ia32_vfmaddps512_mask(__A,
-                                    __B,
-                                    __C,
-                                    (__mmask16) -1,
-                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   (__v16sf) __C,
+                                                   (__mmask16) -1,
+                                                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   (__v16sf) __C,
+                                                   (__mmask16) __U,
+                                                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512)
-    __builtin_ia32_vfmsubps512_mask(__A,
-                                    __B,
-                                    __C,
-                                    (__mmask16) -1,
-                                    _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   -(__v16sf) __C,
+                                                   (__mmask16) -1,
+                                                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   -(__v16sf) __C,
+                                                   (__mmask16) __U,
+                                                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    -(__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
 {
-  return (__m512)
-    __builtin_ia32_vfnmaddps512_mask(__A,
-                                     __B,
-                                     __C,
-                                     (__mmask16) -1,
-                                     _MM_FROUND_CUR_DIRECTION);
+  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   (__v16sf) __C,
+                                                   (__mmask16) -1,
+                                                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
+                                                   (__v16sf) __B,
+                                                   -(__v16sf) __C,
+                                                   (__mmask16) -1,
+                                                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    -(__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
+                                                (__v8df) (B), (__v8df) (C), \
+                                                (__mmask8) -1, (R)); })
+
+
+#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
+                                                (__v8df) (B), (__v8df) (C), \
+                                                (__mmask8) (U), (R)); })
+
+
+#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \
+                                                 (__v8df) (B), (__v8df) (C), \
+                                                 (__mmask8) (U), (R)); })
+
+
+#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
+                                                 (__v8df) (B), (__v8df) (C), \
+                                                 (__mmask8) (U), (R)); })
+
+
+#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
+                                                (__v8df) (B), -(__v8df) (C), \
+                                                (__mmask8) -1, (R)); })
+
+
+#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \
+                                                (__v8df) (B), -(__v8df) (C), \
+                                                (__mmask8) (U), (R)); })
+
+
+#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \
+                                                 (__v8df) (B), -(__v8df) (C), \
+                                                 (__mmask8) (U), (R)); })
+
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+                                                       (__v8df) __B,
+                                                       (__v8df) __C,
+                                                       (__mmask8) -1,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+                                                       (__v8df) __B,
+                                                       (__v8df) __C,
+                                                       (__mmask8) __U,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
+                                                        (__v8df) __B,
+                                                        (__v8df) __C,
+                                                        (__mmask8) __U,
+                                                        _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+                                                        (__v8df) __B,
+                                                        (__v8df) __C,
+                                                        (__mmask8) __U,
+                                                        _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+                                                       (__v8df) __B,
+                                                       -(__v8df) __C,
+                                                       (__mmask8) -1,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
+                                                       (__v8df) __B,
+                                                       -(__v8df) __C,
+                                                       (__mmask8) __U,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
+                                                        (__v8df) __B,
+                                                        -(__v8df) __C,
+                                                        (__mmask8) __U,
+                                                        _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
+                                               (__v16sf) (B), (__v16sf) (C), \
+                                               (__mmask16) -1, (R)); })
+
+
+#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
+                                               (__v16sf) (B), (__v16sf) (C), \
+                                               (__mmask16) (U), (R)); })
+
+
+#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \
+                                                (__v16sf) (B), (__v16sf) (C), \
+                                                (__mmask16) (U), (R)); })
+
+
+#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
+                                                (__v16sf) (B), (__v16sf) (C), \
+                                                (__mmask16) (U), (R)); })
+
+
+#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
+                                               (__v16sf) (B), -(__v16sf) (C), \
+                                               (__mmask16) -1, (R)); })
+
+
+#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \
+                                               (__v16sf) (B), -(__v16sf) (C), \
+                                               (__mmask16) (U), (R)); })
+
+
+#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \
+                                                (__v16sf) (B), -(__v16sf) (C), \
+                                                (__mmask16) (U), (R)); })
+
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+                                                      (__v16sf) __B,
+                                                      (__v16sf) __C,
+                                                      (__mmask16) -1,
+                                                      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+                                                      (__v16sf) __B,
+                                                      (__v16sf) __C,
+                                                      (__mmask16) __U,
+                                                      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
+                                                       (__v16sf) __B,
+                                                       (__v16sf) __C,
+                                                       (__mmask16) __U,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+                                                       (__v16sf) __B,
+                                                       (__v16sf) __C,
+                                                       (__mmask16) __U,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+                                                      (__v16sf) __B,
+                                                      -(__v16sf) __C,
+                                                      (__mmask16) -1,
+                                                      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
+                                                      (__v16sf) __B,
+                                                      -(__v16sf) __C,
+                                                      (__mmask16) __U,
+                                                      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
+                                                       (__v16sf) __B,
+                                                       -(__v16sf) __C,
+                                                       (__mmask16) __U,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \
+                                                 (__v8df) (B), (__v8df) (C), \
+                                                 (__mmask8) (U), (R)); })
+
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
+                                                        (__v8df) __B,
+                                                        (__v8df) __C,
+                                                        (__mmask8) __U,
+                                                        _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \
+                                                (__v16sf) (B), (__v16sf) (C), \
+                                                (__mmask16) (U), (R)); })
+
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
+                                                       (__v16sf) __B,
+                                                       (__v16sf) __C,
+                                                       (__mmask16) __U,
+                                                       _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \
+                                              (__v8df) (B), (__v8df) (C), \
+                                              (__mmask8) (U), (R)); })
+
+
+#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
+  (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \
+                                               (__v8df) (B), (__v8df) (C), \
+                                               (__mmask8) (U), (R)); })
+
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
+{
+  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
+                                                     (__v8df) __B,
+                                                     (__v8df) __C,
+                                                     (__mmask8) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
+{
+  return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
+                                                      (__v8df) __B,
+                                                      (__v8df) __C,
+                                                      (__mmask8) __U,
+                                                      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \
+                                             (__v16sf) (B), (__v16sf) (C), \
+                                             (__mmask16) (U), (R)); })
+
+
+#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
+  (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \
+                                              (__v16sf) (B), (__v16sf) (C), \
+                                              (__mmask16) (U), (R)); })
+
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
+{
+  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
+                                                    (__v16sf) __B,
+                                                    (__v16sf) __C,
+                                                    (__mmask16) __U,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
+{
+  return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
+                                                     (__v16sf) __B,
+                                                     (__v16sf) __C,
+                                                     (__mmask16) __U,
+                                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+
+
 /* Vector permutations */
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
 {
   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
@@ -958,7 +1662,7 @@ _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
                                                        (__v16si) __B,
                                                        (__mmask16) -1);
 }
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
 {
   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
@@ -968,7 +1672,7 @@ _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
                                                        (__mmask8) -1);
 }
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
 {
   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
@@ -977,7 +1681,7 @@ _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
                                                         (__v8df) __B,
                                                         (__mmask8) -1);
 }
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
 {
   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
@@ -1019,7 +1723,7 @@ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
 
 /* Vector Blend */
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
 {
   return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
@@ -1027,7 +1731,7 @@ _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
                  (__mmask8) __U);
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
 {
   return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
@@ -1035,7 +1739,7 @@ _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
                 (__mmask16) __U);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
 {
   return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
@@ -1043,7 +1747,7 @@ _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
                 (__mmask8) __U);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
 {
   return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
@@ -1087,7 +1791,7 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
 
 /* Conversion */
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_cvttps_epu32(__m512 __A)
 {
   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
@@ -1107,7 +1811,7 @@ _mm512_cvttps_epu32(__m512 __A)
                                            (__v16sf)_mm512_setzero_ps(), \
                                            (__mmask16)-1, (R)); })
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_cvtepi32_pd(__m256i __A)
 {
   return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
@@ -1116,7 +1820,7 @@ _mm512_cvtepi32_pd(__m256i __A)
                 (__mmask8) -1);
 }
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_cvtepu32_pd(__m256i __A)
 {
   return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
@@ -1135,7 +1839,7 @@ _mm512_cvtepu32_pd(__m256i __A)
                                             (__v16hi)_mm256_setzero_si256(), \
                                             -1); })
 
-static  __inline __m512 DEFAULT_FN_ATTRS
+static  __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_cvtph_ps(__m256i __A)
 {
   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
@@ -1145,7 +1849,7 @@ _mm512_cvtph_ps(__m256i __A)
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_cvttps_epi32(__m512 a)
 {
   return (__m512i)
@@ -1154,7 +1858,7 @@ _mm512_cvttps_epi32(__m512 a)
                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm512_cvttpd_epi32(__m512d a)
 {
   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
@@ -1194,19 +1898,19 @@ _mm512_cvttpd_epi32(__m512d a)
                                             (__mmask8) -1, (R)); })
 
 /* Unpack and Interleave */
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
 {
   return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
 }
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
 {
   return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
 {
   return __builtin_shufflevector(__a, __b,
@@ -1216,7 +1920,7 @@ _mm512_unpackhi_ps(__m512 __a, __m512 __b)
                                  2+12, 18+12, 3+12, 19+12);
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
 {
   return __builtin_shufflevector(__a, __b,
@@ -1228,7 +1932,7 @@ _mm512_unpacklo_ps(__m512 __a, __m512 __b)
 
 /* Bit Test */
 
-static __inline __mmask16 DEFAULT_FN_ATTRS
+static __inline __mmask16 __DEFAULT_FN_ATTRS
 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
 {
   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
@@ -1236,7 +1940,7 @@ _mm512_test_epi32_mask(__m512i __A, __m512i __B)
             (__mmask16) -1);
 }
 
-static __inline __mmask8 DEFAULT_FN_ATTRS
+static __inline __mmask8 __DEFAULT_FN_ATTRS
 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
 {
   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
@@ -1246,7 +1950,7 @@ _mm512_test_epi64_mask(__m512i __A, __m512i __B)
 
 /* SIMD load ops */
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
@@ -1255,7 +1959,7 @@ _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
                                                      (__mmask16) __U);
 }
 
-static __inline __m512i DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
@@ -1264,7 +1968,7 @@ _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
                                                      (__mmask8) __U);
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
 {
   return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
@@ -1273,7 +1977,7 @@ _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
                                                   (__mmask16) __U);
 }
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
 {
   return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
@@ -1282,7 +1986,7 @@ _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
                                                    (__mmask8) __U);
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
 {
   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
@@ -1291,7 +1995,7 @@ _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
                                                   (__mmask16) __U);
 }
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
 {
   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
@@ -1300,7 +2004,7 @@ _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
                                                    (__mmask8) __U);
 }
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_loadu_pd(double const *__p)
 {
   struct __loadu_pd {
@@ -1309,7 +2013,7 @@ _mm512_loadu_pd(double const *__p)
   return ((struct __loadu_pd*)__p)->__v;
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_loadu_ps(float const *__p)
 {
   struct __loadu_ps {
@@ -1318,7 +2022,7 @@ _mm512_loadu_ps(float const *__p)
   return ((struct __loadu_ps*)__p)->__v;
 }
 
-static __inline __m512 DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_load_ps(double const *__p)
 {
   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
@@ -1327,7 +2031,7 @@ _mm512_load_ps(double const *__p)
                                                   (__mmask16) -1);
 }
 
-static __inline __m512d DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_load_pd(float const *__p)
 {
   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
@@ -1338,65 +2042,65 @@ _mm512_load_pd(float const *__p)
 
 /* SIMD store ops */
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
 {
   __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
                                      (__mmask8) __U);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
 {
   __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
                                      (__mmask16) __U);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
 {
   __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm512_storeu_pd(void *__P, __m512d __A)
 {
   __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
 {
   __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
                                    (__mmask16) __U);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm512_storeu_ps(void *__P, __m512 __A)
 {
   __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
 {
   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm512_store_pd(void *__P, __m512d __A)
 {
   *(__m512d*)__P = __A;
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
 {
   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
                                    (__mmask16) __U);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm512_store_ps(void *__P, __m512 __A)
 {
   *(__m512*)__P = __A;
@@ -1404,7 +2108,7 @@ _mm512_store_ps(void *__P, __m512 __A)
 
 /* Mask ops */
 
-static __inline __mmask16 DEFAULT_FN_ATTRS
+static __inline __mmask16 __DEFAULT_FN_ATTRS
 _mm512_knot(__mmask16 __M)
 {
   return __builtin_ia32_knothi(__M);
@@ -1412,289 +2116,289 @@ _mm512_knot(__mmask16 __M)
 
 /* Integer compare */
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
                                                    (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
                                                    __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
                                                  __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
                                                  __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
                                                    (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
                                                    __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
                                                  __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
                                                  __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
                                                  __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
                                                  __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
                                                 __u);
@@ -1748,6 +2452,6 @@ _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
                                          (__mmask8)(m)); })
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif // __AVX512FINTRIN_H
diff --git a/lib/Headers/avx512vlbwintrin.h b/lib/Headers/avx512vlbwintrin.h
index 1fbffd4..59849e4 100644
--- a/lib/Headers/avx512vlbwintrin.h
+++ b/lib/Headers/avx512vlbwintrin.h
@@ -29,587 +29,587 @@
 #define __AVX512VLBWINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw")))
 
 /* Integer compare */
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_cmpeq_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b,
                                                    (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_cmpeq_epu8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0,
                                                  __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b,
                                                    (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epu8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0,
                                                  __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpeq_epi16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpeq_epu16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b,
                                                    (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epu16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0,
                                                  __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_cmpge_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_cmpge_epu8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
                                                  __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_cmpge_epi8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_cmpge_epu8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
                                                  __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpge_epi16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpge_epu16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_cmpge_epi16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_cmpge_epu16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
                                                  __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b,
                                                    (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_cmpgt_epu8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6,
                                                  __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b,
                                                    (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epu8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6,
                                                  __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpgt_epu16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b,
                                                    (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b,
                                                    __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epu16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6,
                                                  __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_cmple_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_cmple_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_cmple_epu8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_cmple_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
                                                  __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_cmple_epi8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_cmple_epu8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
                                                  __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmple_epi16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmple_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmple_epu16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmple_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_cmple_epi16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_cmple_epu16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
                                                  __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_cmplt_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_cmplt_epu8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
                                                  __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_cmplt_epi8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_cmplt_epu8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
                                                  __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmplt_epi16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmplt_epu16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_cmplt_epi16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_cmplt_epu16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
                                                  __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_cmpneq_epi8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_cmpneq_epu8_mask(__m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
   return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
                                                  __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_cmpneq_epi8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
                                                 (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_cmpneq_epu8_mask(__m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
                                                  (__mmask32)-1);
 }
 
-static __inline__ __mmask32 DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
   return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
                                                  __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpneq_epi16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpneq_epu16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_cmpneq_epi16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
                                                 (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_cmpneq_epu16_mask(__m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
                                                  (__mmask16)-1);
 }
 
-static __inline__ __mmask16 DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
   return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
                                                  __u);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){
   return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
              (__v32qi) __B,
@@ -617,7 +617,7 @@ _mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){
              (__mmask32) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
              (__v32qi) __B,
@@ -626,7 +626,7 @@ _mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
              (__v16hi) __B,
@@ -634,7 +634,7 @@ _mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
              (__v16hi) __B,
@@ -643,7 +643,7 @@ _mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
              (__v32qi) __B,
@@ -651,7 +651,7 @@ _mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
              (__v32qi) __B,
@@ -660,7 +660,7 @@ _mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
              (__mmask32) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
              (__v16hi) __B,
@@ -668,7 +668,7 @@ _mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
              (__v16hi) __B,
@@ -676,7 +676,7 @@ _mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
              _mm256_setzero_si256 (),
              (__mmask16) __U);
 }
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
              (__v16qi) __B,
@@ -684,7 +684,7 @@ _mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
              (__v16qi) __B,
@@ -693,7 +693,7 @@ _mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
              (__v8hi) __B,
@@ -701,7 +701,7 @@ _mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
              (__v8hi) __B,
@@ -710,7 +710,7 @@ _mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
              (__v16qi) __B,
@@ -718,7 +718,7 @@ _mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
              (__v16qi) __B,
@@ -727,7 +727,7 @@ _mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
              (__mmask16) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
              (__v8hi) __B,
@@ -735,7 +735,7 @@ _mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
              (__v8hi) __B,
@@ -744,7 +744,7 @@ _mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
               (__v16hi) __B,
@@ -752,7 +752,7 @@ _mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
               (__mmask16) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
               (__v16hi) __B,
@@ -761,7 +761,7 @@ _mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
               (__mmask16) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
               (__v8hi) __B,
@@ -769,7 +769,7 @@ _mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
               (__v8hi) __B,
@@ -857,6 +857,6 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
                                           (__v16hi)(__m256i)(b), \
                                           (p), (__mmask16)(m)); })
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __AVX512VLBWINTRIN_H */
diff --git a/lib/Headers/avx512vldqintrin.h b/lib/Headers/avx512vldqintrin.h
index 2a32edd..032070b 100644
--- a/lib/Headers/avx512vldqintrin.h
+++ b/lib/Headers/avx512vldqintrin.h
@@ -29,14 +29,14 @@
 #define __AVX512VLDQINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mullo_epi64 (__m256i __A, __m256i __B) {
   return (__m256i) ((__v4di) __A * (__v4di) __B);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
               (__v4di) __B,
@@ -44,7 +44,7 @@ _mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
               (__v4di) __B,
@@ -53,12 +53,12 @@ _mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mullo_epi64 (__m128i __A, __m128i __B) {
   return (__m128i) ((__v2di) __A * (__v2di) __B);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
               (__v2di) __B,
@@ -66,7 +66,7 @@ _mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
               (__v2di) __B,
@@ -75,7 +75,7 @@ _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
               (__v4df) __B,
@@ -83,7 +83,7 @@ _mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
               (__v4df) __B,
@@ -92,7 +92,7 @@ _mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
               (__v2df) __B,
@@ -100,7 +100,7 @@ _mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
               (__v2df) __B,
@@ -109,7 +109,7 @@ _mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
               (__mmask8) __U);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
              (__v8sf) __B,
@@ -117,7 +117,7 @@ _mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
              (__v8sf) __B,
@@ -126,7 +126,7 @@ _mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
              (__v4sf) __B,
@@ -134,7 +134,7 @@ _mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
              (__v4sf) __B,
@@ -143,7 +143,7 @@ _mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
              (__v4df) __B,
@@ -151,7 +151,7 @@ _mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
              (__v4df) __B,
@@ -160,7 +160,7 @@ _mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
              (__v2df) __B,
@@ -168,7 +168,7 @@ _mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
              (__v2df) __B,
@@ -177,7 +177,7 @@ _mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
             (__v8sf) __B,
@@ -185,7 +185,7 @@ _mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
             (__v8sf) __B,
@@ -194,7 +194,7 @@ _mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
             (__v4sf) __B,
@@ -202,7 +202,7 @@ _mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
             (__v4sf) __B,
@@ -211,7 +211,7 @@ _mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
         __m256d __B) {
   return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
@@ -220,7 +220,7 @@ _mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
              (__v4df) __B,
@@ -229,7 +229,7 @@ _mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
              (__v2df) __B,
@@ -237,7 +237,7 @@ _mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
              (__v2df) __B,
@@ -246,7 +246,7 @@ _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
              (__mmask8) __U);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
             (__v8sf) __B,
@@ -254,7 +254,7 @@ _mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
             (__v8sf) __B,
@@ -263,7 +263,7 @@ _mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
             (__v4sf) __B,
@@ -271,7 +271,7 @@ _mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
             (__v4sf) __B,
@@ -280,7 +280,7 @@ _mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
             (__v4df) __B,
@@ -288,7 +288,7 @@ _mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
             (__v4df) __B,
@@ -297,7 +297,7 @@ _mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
             (__v2df) __B,
@@ -305,7 +305,7 @@ _mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
             (__v2df) __B,
@@ -314,7 +314,7 @@ _mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
             (__mmask8) __U);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
                  (__v8sf) __B,
@@ -322,7 +322,7 @@ _mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
                  (__mmask8) __U);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
                  (__v8sf) __B,
@@ -331,7 +331,7 @@ _mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
                  (__mmask8) __U);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
                  (__v4sf) __B,
@@ -339,7 +339,7 @@ _mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
                  (__mmask8) __U);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
                  (__v4sf) __B,
@@ -348,6 +348,6 @@ _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
                  (__mmask8) __U);
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif
diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h
index 59ff5eb..73f46d1 100644
--- a/lib/Headers/avx512vlintrin.h
+++ b/lib/Headers/avx512vlintrin.h
@@ -29,198 +29,198 @@
 #define __AVX512VLINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
 
 /* Integer compare */
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
                                                 __u);
 }
 
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpge_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpge_epu32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpge_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpge_epu64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
                                                 __u);
@@ -229,391 +229,391 @@ _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
 
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
                                                   __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmple_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmple_epu32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmple_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmple_epu32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmple_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmple_epu64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmple_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmple_epu64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmplt_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmplt_epu32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmplt_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmplt_epu64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
                                                 __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
                                                (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
                                                __u);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
                                                 (__mmask8)-1);
 }
 
-static __inline__ __mmask8 DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS
 _mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
                                                 __u);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -623,7 +623,7 @@ _mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
@@ -633,7 +633,7 @@ _mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -643,7 +643,7 @@ _mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
@@ -653,7 +653,7 @@ _mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -663,7 +663,7 @@ _mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
@@ -673,7 +673,7 @@ _mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -683,7 +683,7 @@ _mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
@@ -693,7 +693,7 @@ _mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -703,7 +703,7 @@ _mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
@@ -713,7 +713,7 @@ _mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -723,7 +723,7 @@ _mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
@@ -733,7 +733,7 @@ _mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -743,7 +743,7 @@ _mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
@@ -753,7 +753,7 @@ _mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -763,7 +763,7 @@ _mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
@@ -773,7 +773,7 @@ _mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
            __m256i __Y)
 {
@@ -782,7 +782,7 @@ _mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
               (__v4di) __W, __M);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
@@ -792,7 +792,7 @@ _mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
               __M);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
         __m128i __Y)
 {
@@ -801,7 +801,7 @@ _mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
               (__v2di) __W, __M);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
 {
   return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
@@ -811,7 +811,7 @@ _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
               __M);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
            __m256i __Y)
 {
@@ -820,7 +820,7 @@ _mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
                (__v4di) __W, __M);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
@@ -830,7 +830,7 @@ _mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
                __M);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
         __m128i __Y)
 {
@@ -839,7 +839,7 @@ _mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
                (__v2di) __W, __M);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
 {
   return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
@@ -849,7 +849,7 @@ _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
                __M);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
@@ -859,7 +859,7 @@ _mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
               __M);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
        __m256i __B)
 {
@@ -868,7 +868,7 @@ _mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
               (__v8si) __W, __M);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
@@ -878,7 +878,7 @@ _mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
               __M);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
           __m128i __B)
 {
@@ -887,7 +887,7 @@ _mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
               (__v4si) __W, __M);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -897,7 +897,7 @@ _mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
@@ -907,7 +907,7 @@ _mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
@@ -916,7 +916,7 @@ _mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
@@ -926,7 +926,7 @@ _mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
         __m256i __B)
 {
@@ -936,7 +936,7 @@ _mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
               (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
@@ -946,7 +946,7 @@ _mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
               (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
            __m128i __B)
 {
@@ -956,7 +956,7 @@ _mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
               (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
@@ -966,7 +966,7 @@ _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
               (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
           __m256i __B)
 {
@@ -976,7 +976,7 @@ _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
             (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
@@ -986,7 +986,7 @@ _mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
             (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
@@ -995,7 +995,7 @@ _mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
             (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
@@ -1005,7 +1005,7 @@ _mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
             (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -1015,7 +1015,7 @@ _mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
@@ -1025,7 +1025,7 @@ _mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -1035,7 +1035,7 @@ _mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
@@ -1045,7 +1045,7 @@ _mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -1054,7 +1054,7 @@ _mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
              (__v4di) __W, __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
@@ -1064,7 +1064,7 @@ _mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
              __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -1073,7 +1073,7 @@ _mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
              (__v2di) __W, __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
@@ -1083,7 +1083,7 @@ _mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
              __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
         __m256i __B)
 {
@@ -1092,7 +1092,7 @@ _mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
               (__v4di) __W, __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
@@ -1102,7 +1102,7 @@ _mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
               __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
            __m128i __B)
 {
@@ -1111,7 +1111,7 @@ _mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
               (__v2di) __W, __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
@@ -1121,7 +1121,7 @@ _mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
               __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
           __m256i __B)
 {
@@ -1131,7 +1131,7 @@ _mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
             (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
@@ -1141,7 +1141,7 @@ _mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
             (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
@@ -1150,7 +1150,7 @@ _mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
             (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
@@ -1160,7 +1160,7 @@ _mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
             (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
            __m256i __B)
 {
@@ -1170,7 +1170,7 @@ _mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
@@ -1180,7 +1180,7 @@ _mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -1190,7 +1190,7 @@ _mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
              (__mmask8) __U);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
@@ -1320,6 +1320,663 @@ _mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
                                          (__v2df)(__m128)(b), \
                                          (p), (__mmask8)(m)); })
 
-#undef DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
+                                                    (__v2df) __B,
+                                                    (__v2df) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
+                                                    (__v2df) __B,
+                                                    -(__v2df) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
+                                                     (__v2df) __B,
+                                                     -(__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
+                                                     (__v2df) __B,
+                                                     -(__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
+                                                    (__v4df) __B,
+                                                    (__v4df) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
+                                                    (__v4df) __B,
+                                                    -(__v4df) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
+                                                     (__v4df) __B,
+                                                     -(__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
+                                                     (__v4df) __B,
+                                                     -(__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
+                                                   (__v4sf) __B,
+                                                   (__v4sf) __C,
+                                                   (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
+                                                   (__v4sf) __B,
+                                                   -(__v4sf) __C,
+                                                   (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    -(__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    -(__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
+                                                   (__v8sf) __B,
+                                                   (__v8sf) __C,
+                                                   (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
+                                                   (__v8sf) __B,
+                                                   -(__v8sf) __C,
+                                                   (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    -(__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    -(__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
+                                                       (__v2df) __B,
+                                                       (__v2df) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
+                                                        (__v2df) __B,
+                                                        (__v2df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
+                                                        (__v2df) __B,
+                                                        (__v2df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
+                                                       (__v2df) __B,
+                                                       -(__v2df) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
+                                                        (__v2df) __B,
+                                                        -(__v2df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
+                                                       (__v4df) __B,
+                                                       (__v4df) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
+                                                        (__v4df) __B,
+                                                        (__v4df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
+                                                        (__v4df) __B,
+                                                        (__v4df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
+                                                       (__v4df) __B,
+                                                       -(__v4df) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
+                                                        (__v4df) __B,
+                                                        -(__v4df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
+                                                      (__v4sf) __B,
+                                                      (__v4sf) __C,
+                                                      (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
+                                                       (__v4sf) __B,
+                                                       (__v4sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
+                                                       (__v4sf) __B,
+                                                       (__v4sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
+                                                      (__v4sf) __B,
+                                                      -(__v4sf) __C,
+                                                      (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
+                                                       (__v4sf) __B,
+                                                       -(__v4sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
+                         __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
+                                                      (__v8sf) __B,
+                                                      (__v8sf) __C,
+                                                      (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
+                                                       (__v8sf) __B,
+                                                       (__v8sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
+                                                       (__v8sf) __B,
+                                                       (__v8sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
+                                                      (__v8sf) __B,
+                                                      -(__v8sf) __C,
+                                                      (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
+                                                       (__v8sf) __B,
+                                                       -(__v8sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
+{
+  return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
+{
+  return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
+                                                        (__v2df) __B,
+                                                        (__v2df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
+{
+  return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
+                                                        (__v4df) __B,
+                                                        (__v4df) __C,
+                                                        (__mmask8)
+                                                        __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
+                                                       (__v4sf) __B,
+                                                       (__v4sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
+{
+  return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
+                                                       (__v8sf) __B,
+                                                       (__v8sf) __C,
+                                                       (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
+                                                     (__v2df) __B,
+                                                     (__v2df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
+{
+  return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
+                                                      (__v2df) __B,
+                                                      (__v2df) __C,
+                                                      (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
+{
+  return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
+                                                     (__v4df) __B,
+                                                     (__v4df) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
+{
+  return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
+                                                      (__v4df) __B,
+                                                      (__v4df) __C,
+                                                      (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
+                                                    (__v4sf) __B,
+                                                    (__v4sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+  return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
+                                                     (__v4sf) __B,
+                                                     (__v4sf) __C,
+                                                     (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+  return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
+                                                    (__v8sf) __B,
+                                                    (__v8sf) __C,
+                                                    (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
+{
+  return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
+                                                     (__v8sf) __B,
+                                                     (__v8sf) __C,
+                                                     (__mmask8) __U);
+}
+
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __AVX512VLINTRIN_H */
diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h
index d28a915..6a9972b 100644
--- a/lib/Headers/avxintrin.h
+++ b/lib/Headers/avxintrin.h
@@ -40,112 +40,112 @@ typedef double __m256d __attribute__((__vector_size__(32)));
 typedef long long __m256i __attribute__((__vector_size__(32)));
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx")))
 
 /* Arithmetic */
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_add_pd(__m256d __a, __m256d __b)
 {
   return __a+__b;
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_add_ps(__m256 __a, __m256 __b)
 {
   return __a+__b;
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_sub_pd(__m256d __a, __m256d __b)
 {
   return __a-__b;
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_sub_ps(__m256 __a, __m256 __b)
 {
   return __a-__b;
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_addsub_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_addsub_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_div_pd(__m256d __a, __m256d __b)
 {
   return __a / __b;
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_div_ps(__m256 __a, __m256 __b)
 {
   return __a / __b;
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_max_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_max_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_min_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_min_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_mul_pd(__m256d __a, __m256d __b)
 {
   return __a * __b;
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_mul_ps(__m256 __a, __m256 __b)
 {
   return __a * __b;
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_sqrt_pd(__m256d __a)
 {
   return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_sqrt_ps(__m256 __a)
 {
   return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_rsqrt_ps(__m256 __a)
 {
   return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_rcp_ps(__m256 __a)
 {
   return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);
@@ -165,99 +165,99 @@ _mm256_rcp_ps(__m256 __a)
 #define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)
 
 /* Logical */
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_and_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)((__v4di)__a & (__v4di)__b);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_and_ps(__m256 __a, __m256 __b)
 {
   return (__m256)((__v8si)__a & (__v8si)__b);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_andnot_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)(~(__v4di)__a & (__v4di)__b);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_andnot_ps(__m256 __a, __m256 __b)
 {
   return (__m256)(~(__v8si)__a & (__v8si)__b);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_or_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)((__v4di)__a | (__v4di)__b);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_or_ps(__m256 __a, __m256 __b)
 {
   return (__m256)((__v8si)__a | (__v8si)__b);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_xor_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)((__v4di)__a ^ (__v4di)__b);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_xor_ps(__m256 __a, __m256 __b)
 {
   return (__m256)((__v8si)__a ^ (__v8si)__b);
 }
 
 /* Horizontal arithmetic */
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_hadd_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_hadd_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_hsub_pd(__m256d __a, __m256d __b)
 {
   return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_hsub_ps(__m256 __a, __m256 __b)
 {
   return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);
 }
 
 /* Vector permutations */
-static __inline __m128d DEFAULT_FN_ATTRS
+static __inline __m128d __DEFAULT_FN_ATTRS
 _mm_permutevar_pd(__m128d __a, __m128i __c)
 {
   return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_permutevar_pd(__m256d __a, __m256i __c)
 {
   return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
 }
 
-static __inline __m128 DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS
 _mm_permutevar_ps(__m128 __a, __m128i __c)
 {
   return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_permutevar_ps(__m256 __a, __m256i __c)
 {
   return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
@@ -329,14 +329,14 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
                                   (((M) & 0x40) ? 14 : 6), \
                                   (((M) & 0x80) ? 15 : 7)); })
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
 {
   return (__m256d)__builtin_ia32_blendvpd256(
     (__v4df)__a, (__v4df)__b, (__v4df)__c);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 {
   return (__m256)__builtin_ia32_blendvps256(
@@ -432,21 +432,21 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
   __m128 __b = (b); \
   (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); })
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi32(__m256i __a, const int __imm)
 {
   __v8si __b = (__v8si)__a;
   return __b[__imm & 7];
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi16(__m256i __a, const int __imm)
 {
   __v16hi __b = (__v16hi)__a;
   return __b[__imm & 15];
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi8(__m256i __a, const int __imm)
 {
   __v32qi __b = (__v32qi)__a;
@@ -454,7 +454,7 @@ _mm256_extract_epi8(__m256i __a, const int __imm)
 }
 
 #ifdef __x86_64__
-static __inline long long  DEFAULT_FN_ATTRS
+static __inline long long  __DEFAULT_FN_ATTRS
 _mm256_extract_epi64(__m256i __a, const int __imm)
 {
   __v4di __b = (__v4di)__a;
@@ -462,7 +462,7 @@ _mm256_extract_epi64(__m256i __a, const int __imm)
 }
 #endif
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_insert_epi32(__m256i __a, int __b, int const __imm)
 {
   __v8si __c = (__v8si)__a;
@@ -470,7 +470,7 @@ _mm256_insert_epi32(__m256i __a, int __b, int const __imm)
   return (__m256i)__c;
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_insert_epi16(__m256i __a, int __b, int const __imm)
 {
   __v16hi __c = (__v16hi)__a;
@@ -478,7 +478,7 @@ _mm256_insert_epi16(__m256i __a, int __b, int const __imm)
   return (__m256i)__c;
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_insert_epi8(__m256i __a, int __b, int const __imm)
 {
   __v32qi __c = (__v32qi)__a;
@@ -487,7 +487,7 @@ _mm256_insert_epi8(__m256i __a, int __b, int const __imm)
 }
 
 #ifdef __x86_64__
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
 {
   __v4di __c = (__v4di)__a;
@@ -497,263 +497,263 @@ _mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
 #endif
 
 /* Conversion */
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_cvtepi32_pd(__m128i __a)
 {
   return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_cvtepi32_ps(__m256i __a)
 {
   return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a);
 }
 
-static __inline __m128 DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS
 _mm256_cvtpd_ps(__m256d __a)
 {
   return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_cvtps_epi32(__m256 __a)
 {
   return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_cvtps_pd(__m128 __a)
 {
   return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a);
 }
 
-static __inline __m128i DEFAULT_FN_ATTRS
+static __inline __m128i __DEFAULT_FN_ATTRS
 _mm256_cvttpd_epi32(__m256d __a)
 {
   return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
 }
 
-static __inline __m128i DEFAULT_FN_ATTRS
+static __inline __m128i __DEFAULT_FN_ATTRS
 _mm256_cvtpd_epi32(__m256d __a)
 {
   return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_cvttps_epi32(__m256 __a)
 {
   return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);
 }
 
 /* Vector replicate */
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_movehdup_ps(__m256 __a)
 {
   return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_moveldup_ps(__m256 __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_movedup_pd(__m256d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
 }
 
 /* Unpack and Interleave */
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_unpackhi_pd(__m256d __a, __m256d __b)
 {
   return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_unpacklo_pd(__m256d __a, __m256d __b)
 {
   return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_unpackhi_ps(__m256 __a, __m256 __b)
 {
   return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_unpacklo_ps(__m256 __a, __m256 __b)
 {
   return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);
 }
 
 /* Bit Test */
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm_testz_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm_testc_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm_testnzc_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm_testz_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm_testc_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm_testnzc_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_testz_pd(__m256d __a, __m256d __b)
 {
   return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_testc_pd(__m256d __a, __m256d __b)
 {
   return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_testnzc_pd(__m256d __a, __m256d __b)
 {
   return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_testz_ps(__m256 __a, __m256 __b)
 {
   return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_testc_ps(__m256 __a, __m256 __b)
 {
   return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_testnzc_ps(__m256 __a, __m256 __b)
 {
   return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_testz_si256(__m256i __a, __m256i __b)
 {
   return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_testc_si256(__m256i __a, __m256i __b)
 {
   return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_testnzc_si256(__m256i __a, __m256i __b)
 {
   return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
 }
 
 /* Vector extract sign mask */
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_movemask_pd(__m256d __a)
 {
   return __builtin_ia32_movmskpd256((__v4df)__a);
 }
 
-static __inline int DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS
 _mm256_movemask_ps(__m256 __a)
 {
   return __builtin_ia32_movmskps256((__v8sf)__a);
 }
 
 /* Vector __zero */
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_zeroall(void)
 {
   __builtin_ia32_vzeroall();
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_zeroupper(void)
 {
   __builtin_ia32_vzeroupper();
 }
 
 /* Vector load with broadcast */
-static __inline __m128 DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS
 _mm_broadcast_ss(float const *__a)
 {
   float __f = *__a;
   return (__m128)(__v4sf){ __f, __f, __f, __f };
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_broadcast_sd(double const *__a)
 {
   double __d = *__a;
   return (__m256d)(__v4df){ __d, __d, __d, __d };
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_broadcast_ss(float const *__a)
 {
   float __f = *__a;
   return (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_broadcast_pd(__m128d const *__a)
 {
   return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_broadcast_ps(__m128 const *__a)
 {
   return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a);
 }
 
 /* SIMD load ops */
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_load_pd(double const *__p)
 {
   return *(__m256d *)__p;
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_load_ps(float const *__p)
 {
   return *(__m256 *)__p;
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_loadu_pd(double const *__p)
 {
   struct __loadu_pd {
@@ -762,7 +762,7 @@ _mm256_loadu_pd(double const *__p)
   return ((struct __loadu_pd*)__p)->__v;
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_loadu_ps(float const *__p)
 {
   struct __loadu_ps {
@@ -771,13 +771,13 @@ _mm256_loadu_ps(float const *__p)
   return ((struct __loadu_ps*)__p)->__v;
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_load_si256(__m256i const *__p)
 {
   return *__p;
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_loadu_si256(__m256i const *__p)
 {
   struct __loadu_si256 {
@@ -786,141 +786,141 @@ _mm256_loadu_si256(__m256i const *__p)
   return ((struct __loadu_si256*)__p)->__v;
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_lddqu_si256(__m256i const *__p)
 {
   return (__m256i)__builtin_ia32_lddqu256((char const *)__p);
 }
 
 /* SIMD store ops */
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_store_pd(double *__p, __m256d __a)
 {
   *(__m256d *)__p = __a;
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_store_ps(float *__p, __m256 __a)
 {
   *(__m256 *)__p = __a;
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_storeu_pd(double *__p, __m256d __a)
 {
   __builtin_ia32_storeupd256(__p, (__v4df)__a);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_storeu_ps(float *__p, __m256 __a)
 {
   __builtin_ia32_storeups256(__p, (__v8sf)__a);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_store_si256(__m256i *__p, __m256i __a)
 {
   *__p = __a;
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_storeu_si256(__m256i *__p, __m256i __a)
 {
   __builtin_ia32_storedqu256((char *)__p, (__v32qi)__a);
 }
 
 /* Conditional load ops */
-static __inline __m128d DEFAULT_FN_ATTRS
+static __inline __m128d __DEFAULT_FN_ATTRS
 _mm_maskload_pd(double const *__p, __m128d __m)
 {
   return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_maskload_pd(double const *__p, __m256d __m)
 {
   return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,
                                                (__v4df)__m);
 }
 
-static __inline __m128 DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS
 _mm_maskload_ps(float const *__p, __m128 __m)
 {
   return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_maskload_ps(float const *__p, __m256 __m)
 {
   return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m);
 }
 
 /* Conditional store ops */
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a)
 {
   __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm_maskstore_pd(double *__p, __m128d __m, __m128d __a)
 {
   __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a)
 {
   __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm_maskstore_ps(float *__p, __m128 __m, __m128 __a)
 {
   __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a);
 }
 
 /* Cacheability support ops */
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_si256(__m256i *__a, __m256i __b)
 {
   __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_pd(double *__a, __m256d __b)
 {
   __builtin_ia32_movntpd256(__a, (__v4df)__b);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_ps(float *__p, __m256 __a)
 {
   __builtin_ia32_movntps256(__p, (__v8sf)__a);
 }
 
 /* Create vectors */
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_set_pd(double __a, double __b, double __c, double __d)
 {
   return (__m256d){ __d, __c, __b, __a };
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_set_ps(float __a, float __b, float __c, float __d,
               float __e, float __f, float __g, float __h)
 {
   return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
                  int __i4, int __i5, int __i6, int __i7)
 {
   return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
                  short __w11, short __w10, short __w09, short __w08,
                  short __w07, short __w06, short __w05, short __w04,
@@ -930,7 +930,7 @@ _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
     __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
                 char __b27, char __b26, char __b25, char __b24,
                 char __b23, char __b22, char __b21, char __b20,
@@ -948,34 +948,34 @@ _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
   };
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)
 {
   return (__m256i)(__v4di){ __d, __c, __b, __a };
 }
 
 /* Create vectors with elements in reverse order */
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_setr_pd(double __a, double __b, double __c, double __d)
 {
   return (__m256d){ __a, __b, __c, __d };
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_setr_ps(float __a, float __b, float __c, float __d,
                float __e, float __f, float __g, float __h)
 {
   return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h };
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
                   int __i4, int __i5, int __i6, int __i7)
 {
   return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 };
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
        short __w11, short __w10, short __w09, short __w08,
        short __w07, short __w06, short __w05, short __w04,
@@ -985,7 +985,7 @@ _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
     __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 };
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
                  char __b27, char __b26, char __b25, char __b24,
                  char __b23, char __b22, char __b21, char __b20,
@@ -1002,39 +1002,39 @@ _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
     __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 };
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)
 {
   return (__m256i)(__v4di){ __a, __b, __c, __d };
 }
 
 /* Create vectors with repeated elements */
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_set1_pd(double __w)
 {
   return (__m256d){ __w, __w, __w, __w };
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_set1_ps(float __w)
 {
   return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w };
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_set1_epi32(int __i)
 {
   return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i };
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_set1_epi16(short __w)
 {
   return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w,
     __w, __w, __w, __w, __w, __w };
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_set1_epi8(char __b)
 {
   return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
@@ -1042,99 +1042,99 @@ _mm256_set1_epi8(char __b)
     __b, __b, __b, __b, __b, __b, __b };
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_set1_epi64x(long long __q)
 {
   return (__m256i)(__v4di){ __q, __q, __q, __q };
 }
 
 /* Create __zeroed vectors */
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_setzero_pd(void)
 {
   return (__m256d){ 0, 0, 0, 0 };
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_setzero_ps(void)
 {
   return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_setzero_si256(void)
 {
   return (__m256i){ 0LL, 0LL, 0LL, 0LL };
 }
 
 /* Cast between vector types */
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_castpd_ps(__m256d __a)
 {
   return (__m256)__a;
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_castpd_si256(__m256d __a)
 {
   return (__m256i)__a;
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_castps_pd(__m256 __a)
 {
   return (__m256d)__a;
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_castps_si256(__m256 __a)
 {
   return (__m256i)__a;
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_castsi256_ps(__m256i __a)
 {
   return (__m256)__a;
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_castsi256_pd(__m256i __a)
 {
   return (__m256d)__a;
 }
 
-static __inline __m128d DEFAULT_FN_ATTRS
+static __inline __m128d __DEFAULT_FN_ATTRS
 _mm256_castpd256_pd128(__m256d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1);
 }
 
-static __inline __m128 DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS
 _mm256_castps256_ps128(__m256 __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
 }
 
-static __inline __m128i DEFAULT_FN_ATTRS
+static __inline __m128i __DEFAULT_FN_ATTRS
 _mm256_castsi256_si128(__m256i __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_castpd128_pd256(__m128d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_castps128_ps256(__m128 __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_castsi128_si256(__m128i __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
@@ -1205,7 +1205,7 @@ _mm256_castsi128_si256(__m128i __a)
     (((M) & 1) ? 3 : 1) );})
 
 /* SIMD load ops (unaligned) */
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
 {
   struct __loadu_ps {
@@ -1216,7 +1216,7 @@ _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
   return _mm256_insertf128_ps(__v256, ((struct __loadu_ps*)__addr_hi)->__v, 1);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
 {
   struct __loadu_pd {
@@ -1227,7 +1227,7 @@ _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
   return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1);
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)
 {
   struct __loadu_si128 {
@@ -1240,7 +1240,7 @@ _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)
 }
 
 /* SIMD store ops (unaligned) */
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
 {
   __m128 __v128;
@@ -1251,7 +1251,7 @@ _mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)
   __builtin_ia32_storeups(__addr_hi, __v128);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
 {
   __m128d __v128;
@@ -1262,7 +1262,7 @@ _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)
   __builtin_ia32_storeupd(__addr_hi, __v128);
 }
 
-static __inline void DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS
 _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
 {
   __m128i __v128;
@@ -1273,36 +1273,36 @@ _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
   __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_set_m128 (__m128 __hi, __m128 __lo) {
   return (__m256) __builtin_shufflevector(__lo, __hi, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_set_m128d (__m128d __hi, __m128d __lo) {
   return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_set_m128i (__m128i __hi, __m128i __lo) {
   return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_setr_m128 (__m128 __lo, __m128 __hi) {
   return _mm256_set_m128(__hi, __lo);
 }
 
-static __inline __m256d DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_setr_m128d (__m128d __lo, __m128d __hi) {
   return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
 }
 
-static __inline __m256i DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS
 _mm256_setr_m128i (__m128i __lo, __m128i __hi) {
   return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __AVXINTRIN_H */
diff --git a/lib/Headers/bmi2intrin.h b/lib/Headers/bmi2intrin.h
index 7818934..fdae82c 100644
--- a/lib/Headers/bmi2intrin.h
+++ b/lib/Headers/bmi2intrin.h
@@ -29,21 +29,21 @@
 #define __BMI2INTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _bzhi_u32(unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_bzhi_si(__X, __Y);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _pdep_u32(unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_pdep_si(__X, __Y);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _pext_u32(unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_pext_si(__X, __Y);
@@ -51,25 +51,25 @@ _pext_u32(unsigned int __X, unsigned int __Y)
 
 #ifdef  __x86_64__
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _bzhi_u64(unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_bzhi_di(__X, __Y);
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _pdep_u64(unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_pdep_di(__X, __Y);
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _pext_u64(unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_pext_di(__X, __Y);
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _mulx_u64 (unsigned long long __X, unsigned long long __Y,
 	   unsigned long long *__P)
 {
@@ -80,7 +80,7 @@ _mulx_u64 (unsigned long long __X, unsigned long long __Y,
 
 #else /* !__x86_64__ */
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
 {
   unsigned long long __res = (unsigned long long) __X * __Y;
@@ -90,6 +90,6 @@ _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
 
 #endif /* !__x86_64__  */
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __BMI2INTRIN_H */
diff --git a/lib/Headers/bmiintrin.h b/lib/Headers/bmiintrin.h
index 0aad8f2..dc2f83f 100644
--- a/lib/Headers/bmiintrin.h
+++ b/lib/Headers/bmiintrin.h
@@ -37,53 +37,53 @@
 #define _tzcnt_u32(a)     (__tzcnt_u32((a)))
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
 
-static __inline__ unsigned short DEFAULT_FN_ATTRS
+static __inline__ unsigned short __DEFAULT_FN_ATTRS
 __tzcnt_u16(unsigned short __X)
 {
   return __X ? __builtin_ctzs(__X) : 16;
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __andn_u32(unsigned int __X, unsigned int __Y)
 {
   return ~__X & __Y;
 }
 
 /* AMD-specified, double-leading-underscore version of BEXTR */
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __bextr_u32(unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_bextr_u32(__X, __Y);
 }
 
 /* Intel-specified, single-leading-underscore version of BEXTR */
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
 {
   return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blsi_u32(unsigned int __X)
 {
   return __X & -__X;
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blsmsk_u32(unsigned int __X)
 {
   return __X ^ (__X - 1);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blsr_u32(unsigned int __X)
 {
   return __X & (__X - 1);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __tzcnt_u32(unsigned int __X)
 {
   return __X ? __builtin_ctz(__X) : 32;
@@ -98,45 +98,45 @@ __tzcnt_u32(unsigned int __X)
 #define _blsr_u64(a)      (__blsr_u64((a)))
 #define _tzcnt_u64(a)     (__tzcnt_u64((a)))
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __andn_u64 (unsigned long long __X, unsigned long long __Y)
 {
   return ~__X & __Y;
 }
 
 /* AMD-specified, double-leading-underscore version of BEXTR */
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __bextr_u64(unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_bextr_u64(__X, __Y);
 }
 
 /* Intel-specified, single-leading-underscore version of BEXTR */
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
 {
   return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blsi_u64(unsigned long long __X)
 {
   return __X & -__X;
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blsmsk_u64(unsigned long long __X)
 {
   return __X ^ (__X - 1);
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blsr_u64(unsigned long long __X)
 {
   return __X & (__X - 1);
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __tzcnt_u64(unsigned long long __X)
 {
   return __X ? __builtin_ctzll(__X) : 64;
@@ -144,6 +144,6 @@ __tzcnt_u64(unsigned long long __X)
 
 #endif /* __x86_64__ */
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __BMIINTRIN_H */
diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h
index eee2428..e22ffaf 100644
--- a/lib/Headers/emmintrin.h
+++ b/lib/Headers/emmintrin.h
@@ -36,435 +36,435 @@ typedef short __v8hi __attribute__((__vector_size__(16)));
 typedef char __v16qi __attribute__((__vector_size__(16)));
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_add_sd(__m128d __a, __m128d __b)
 {
   __a[0] += __b[0];
   return __a;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_add_pd(__m128d __a, __m128d __b)
 {
   return __a + __b;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_sub_sd(__m128d __a, __m128d __b)
 {
   __a[0] -= __b[0];
   return __a;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_sub_pd(__m128d __a, __m128d __b)
 {
   return __a - __b;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mul_sd(__m128d __a, __m128d __b)
 {
   __a[0] *= __b[0];
   return __a;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mul_pd(__m128d __a, __m128d __b)
 {
   return __a * __b;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_div_sd(__m128d __a, __m128d __b)
 {
   __a[0] /= __b[0];
   return __a;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_div_pd(__m128d __a, __m128d __b)
 {
   return __a / __b;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_sqrt_sd(__m128d __a, __m128d __b)
 {
   __m128d __c = __builtin_ia32_sqrtsd(__b);
   return (__m128d) { __c[0], __a[1] };
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_sqrt_pd(__m128d __a)
 {
   return __builtin_ia32_sqrtpd(__a);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_min_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_minsd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_min_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_minpd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_max_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_maxsd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_max_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_maxpd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_and_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)((__v4si)__a & (__v4si)__b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_andnot_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)(~(__v4si)__a & (__v4si)__b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_or_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)((__v4si)__a | (__v4si)__b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_xor_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)((__v4si)__a ^ (__v4si)__b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpeq_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpeqpd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmplt_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpltpd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmple_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmplepd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpgt_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpltpd(__b, __a);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpge_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmplepd(__b, __a);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpord_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpordpd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpunord_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpunordpd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpneq_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpneqpd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpnlt_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpnltpd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpnle_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpnlepd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpngt_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpnltpd(__b, __a);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpnge_pd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpnlepd(__b, __a);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpeq_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpeqsd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmplt_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpltsd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmple_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmplesd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpgt_sd(__m128d __a, __m128d __b)
 {
   __m128d __c = __builtin_ia32_cmpltsd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpge_sd(__m128d __a, __m128d __b)
 {
   __m128d __c = __builtin_ia32_cmplesd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpord_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpordsd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpunord_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpunordsd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpneq_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpneqsd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpnlt_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpnltsd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpnle_sd(__m128d __a, __m128d __b)
 {
   return (__m128d)__builtin_ia32_cmpnlesd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpngt_sd(__m128d __a, __m128d __b)
 {
   __m128d __c = __builtin_ia32_cmpnltsd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cmpnge_sd(__m128d __a, __m128d __b)
 {
   __m128d __c = __builtin_ia32_cmpnlesd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comieq_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_comisdeq(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comilt_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_comisdlt(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comile_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_comisdle(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comigt_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_comisdgt(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comige_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_comisdge(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comineq_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_comisdneq(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomieq_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_ucomisdeq(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomilt_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_ucomisdlt(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomile_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_ucomisdle(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomigt_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_ucomisdgt(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomige_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_ucomisdge(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomineq_sd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_ucomisdneq(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpd_ps(__m128d __a)
 {
   return __builtin_ia32_cvtpd2ps(__a);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtps_pd(__m128 __a)
 {
   return __builtin_ia32_cvtps2pd(__a);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtepi32_pd(__m128i __a)
 {
   return __builtin_ia32_cvtdq2pd((__v4si)__a);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtpd_epi32(__m128d __a)
 {
   return __builtin_ia32_cvtpd2dq(__a);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvtsd_si32(__m128d __a)
 {
   return __builtin_ia32_cvtsd2si(__a);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtsd_ss(__m128 __a, __m128d __b)
 {
   __a[0] = __b[0];
   return __a;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtsi32_sd(__m128d __a, int __b)
 {
   __a[0] = __b;
   return __a;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtss_sd(__m128d __a, __m128 __b)
 {
   __a[0] = __b[0];
   return __a;
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvttpd_epi32(__m128d __a)
 {
   return (__m128i)__builtin_ia32_cvttpd2dq(__a);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvttsd_si32(__m128d __a)
 {
   return __a[0];
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtpd_pi32(__m128d __a)
 {
   return (__m64)__builtin_ia32_cvtpd2pi(__a);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvttpd_pi32(__m128d __a)
 {
   return (__m64)__builtin_ia32_cvttpd2pi(__a);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtpi32_pd(__m64 __a)
 {
   return __builtin_ia32_cvtpi2pd((__v2si)__a);
 }
 
-static __inline__ double DEFAULT_FN_ATTRS
+static __inline__ double __DEFAULT_FN_ATTRS
 _mm_cvtsd_f64(__m128d __a)
 {
   return __a[0];
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_load_pd(double const *__dp)
 {
   return *(__m128d*)__dp;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_load1_pd(double const *__dp)
 {
   struct __mm_load1_pd_struct {
@@ -476,14 +476,14 @@ _mm_load1_pd(double const *__dp)
 
 #define        _mm_load_pd1(dp)        _mm_load1_pd(dp)
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_loadr_pd(double const *__dp)
 {
   __m128d __u = *(__m128d*)__dp;
   return __builtin_shufflevector(__u, __u, 1, 0);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_loadu_pd(double const *__dp)
 {
   struct __loadu_pd {
@@ -492,7 +492,7 @@ _mm_loadu_pd(double const *__dp)
   return ((struct __loadu_pd*)__dp)->__v;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_load_sd(double const *__dp)
 {
   struct __mm_load_sd_struct {
@@ -502,7 +502,7 @@ _mm_load_sd(double const *__dp)
   return (__m128d){ __u, 0 };
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_loadh_pd(__m128d __a, double const *__dp)
 {
   struct __mm_loadh_pd_struct {
@@ -512,7 +512,7 @@ _mm_loadh_pd(__m128d __a, double const *__dp)
   return (__m128d){ __a[0], __u };
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_loadl_pd(__m128d __a, double const *__dp)
 {
   struct __mm_loadl_pd_struct {
@@ -522,43 +522,43 @@ _mm_loadl_pd(__m128d __a, double const *__dp)
   return (__m128d){ __u, __a[1] };
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_set_sd(double __w)
 {
   return (__m128d){ __w, 0 };
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_set1_pd(double __w)
 {
   return (__m128d){ __w, __w };
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_set_pd(double __w, double __x)
 {
   return (__m128d){ __x, __w };
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_setr_pd(double __w, double __x)
 {
   return (__m128d){ __w, __x };
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_setzero_pd(void)
 {
   return (__m128d){ 0, 0 };
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_move_sd(__m128d __a, __m128d __b)
 {
   return (__m128d){ __b[0], __a[1] };
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store_sd(double *__dp, __m128d __a)
 {
   struct __mm_store_sd_struct {
@@ -567,7 +567,7 @@ _mm_store_sd(double *__dp, __m128d __a)
   ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store1_pd(double *__dp, __m128d __a)
 {
   struct __mm_store1_pd_struct {
@@ -577,26 +577,26 @@ _mm_store1_pd(double *__dp, __m128d __a)
   ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store_pd(double *__dp, __m128d __a)
 {
   *(__m128d *)__dp = __a;
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storeu_pd(double *__dp, __m128d __a)
 {
   __builtin_ia32_storeupd(__dp, __a);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storer_pd(double *__dp, __m128d __a)
 {
   __a = __builtin_shufflevector(__a, __a, 1, 0);
   *(__m128d *)__dp = __a;
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storeh_pd(double *__dp, __m128d __a)
 {
   struct __mm_storeh_pd_struct {
@@ -605,7 +605,7 @@ _mm_storeh_pd(double *__dp, __m128d __a)
   ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storel_pd(double *__dp, __m128d __a)
 {
   struct __mm_storeh_pd_struct {
@@ -614,211 +614,211 @@ _mm_storel_pd(double *__dp, __m128d __a)
   ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_add_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v16qi)__a + (__v16qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_add_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v8hi)__a + (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_add_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v4si)__a + (__v4si)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_add_si64(__m64 __a, __m64 __b)
 {
   return __a + __b;
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_add_epi64(__m128i __a, __m128i __b)
 {
   return __a + __b;
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_adds_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_adds_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_adds_epu8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_adds_epu16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_avg_epu8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_avg_epu16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_madd_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_max_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_max_epu8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_min_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_min_epu8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mulhi_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mulhi_epu16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mullo_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v8hi)__a * (__v8hi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_mul_su32(__m64 __a, __m64 __b)
 {
   return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mul_epu32(__m128i __a, __m128i __b)
 {
   return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sad_epu8(__m128i __a, __m128i __b)
 {
   return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sub_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v16qi)__a - (__v16qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sub_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v8hi)__a - (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sub_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v4si)__a - (__v4si)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sub_si64(__m64 __a, __m64 __b)
 {
   return __a - __b;
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sub_epi64(__m128i __a, __m128i __b)
 {
   return __a - __b;
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_subs_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_subs_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_subs_epu8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_subs_epu16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_and_si128(__m128i __a, __m128i __b)
 {
   return __a & __b;
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_andnot_si128(__m128i __a, __m128i __b)
 {
   return ~__a & __b;
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_or_si128(__m128i __a, __m128i __b)
 {
   return __a | __b;
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_xor_si128(__m128i __a, __m128i __b)
 {
   return __a ^ __b;
@@ -847,61 +847,61 @@ _mm_xor_si128(__m128i __a, __m128i __b)
 #define _mm_bslli_si128(a, imm) \
   _mm_slli_si128((a), (imm))
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_slli_epi16(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sll_epi16(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_slli_epi32(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sll_epi32(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_slli_epi64(__m128i __a, int __count)
 {
   return __builtin_ia32_psllqi128(__a, __count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sll_epi64(__m128i __a, __m128i __count)
 {
   return __builtin_ia32_psllq128(__a, __count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srai_epi16(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sra_epi16(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srai_epi32(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sra_epi32(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
@@ -930,61 +930,61 @@ _mm_sra_epi32(__m128i __a, __m128i __count)
 #define _mm_bsrli_si128(a, imm) \
   _mm_srli_si128((a), (imm))
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srli_epi16(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srl_epi16(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srli_epi32(__m128i __a, int __count)
 {
   return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srl_epi32(__m128i __a, __m128i __count)
 {
   return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srli_epi64(__m128i __a, int __count)
 {
   return __builtin_ia32_psrlqi128(__a, __count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_srl_epi64(__m128i __a, __m128i __count)
 {
   return __builtin_ia32_psrlq128(__a, __count);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpeq_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v16qi)__a == (__v16qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpeq_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v8hi)__a == (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpeq_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v4si)__a == (__v4si)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi8(__m128i __a, __m128i __b)
 {
   /* This function always performs a signed comparison, but __v16qi is a char
@@ -993,90 +993,90 @@ _mm_cmpgt_epi8(__m128i __a, __m128i __b)
   return (__m128i)((__v16qs)__a > (__v16qs)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v8hi)__a > (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)((__v4si)__a > (__v4si)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmplt_epi8(__m128i __a, __m128i __b)
 {
   return _mm_cmpgt_epi8(__b, __a);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmplt_epi16(__m128i __a, __m128i __b)
 {
   return _mm_cmpgt_epi16(__b, __a);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmplt_epi32(__m128i __a, __m128i __b)
 {
   return _mm_cmpgt_epi32(__b, __a);
 }
 
 #ifdef __x86_64__
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtsi64_sd(__m128d __a, long long __b)
 {
   __a[0] = __b;
   return __a;
 }
 
-static __inline__ long long DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvtsd_si64(__m128d __a)
 {
   return __builtin_ia32_cvtsd2si64(__a);
 }
 
-static __inline__ long long DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvttsd_si64(__m128d __a)
 {
   return __a[0];
 }
 #endif
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtepi32_ps(__m128i __a)
 {
   return __builtin_ia32_cvtdq2ps((__v4si)__a);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtps_epi32(__m128 __a)
 {
   return (__m128i)__builtin_ia32_cvtps2dq(__a);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvttps_epi32(__m128 __a)
 {
   return (__m128i)__builtin_ia32_cvttps2dq(__a);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtsi32_si128(int __a)
 {
   return (__m128i)(__v4si){ __a, 0, 0, 0 };
 }
 
 #ifdef __x86_64__
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtsi64_si128(long long __a)
 {
   return (__m128i){ __a, 0 };
 }
 #endif
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvtsi128_si32(__m128i __a)
 {
   __v4si __b = (__v4si)__a;
@@ -1084,20 +1084,20 @@ _mm_cvtsi128_si32(__m128i __a)
 }
 
 #ifdef __x86_64__
-static __inline__ long long DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvtsi128_si64(__m128i __a)
 {
   return __a[0];
 }
 #endif
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_load_si128(__m128i const *__p)
 {
   return *__p;
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_loadu_si128(__m128i const *__p)
 {
   struct __loadu_si128 {
@@ -1106,7 +1106,7 @@ _mm_loadu_si128(__m128i const *__p)
   return ((struct __loadu_si128*)__p)->__v;
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_loadl_epi64(__m128i const *__p)
 {
   struct __mm_loadl_epi64_struct {
@@ -1115,115 +1115,115 @@ _mm_loadl_epi64(__m128i const *__p)
   return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set_epi64x(long long q1, long long q0)
 {
   return (__m128i){ q0, q1 };
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set_epi64(__m64 q1, __m64 q0)
 {
   return (__m128i){ (long long)q0, (long long)q1 };
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set_epi32(int i3, int i2, int i1, int i0)
 {
   return (__m128i)(__v4si){ i0, i1, i2, i3};
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
 {
   return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
 {
   return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set1_epi64x(long long __q)
 {
   return (__m128i){ __q, __q };
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set1_epi64(__m64 __q)
 {
   return (__m128i){ (long long)__q, (long long)__q };
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set1_epi32(int __i)
 {
   return (__m128i)(__v4si){ __i, __i, __i, __i };
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set1_epi16(short __w)
 {
   return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_set1_epi8(char __b)
 {
   return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b };
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_setr_epi64(__m64 q0, __m64 q1)
 {
   return (__m128i){ (long long)q0, (long long)q1 };
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_setr_epi32(int i0, int i1, int i2, int i3)
 {
   return (__m128i)(__v4si){ i0, i1, i2, i3};
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
 {
   return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
 {
   return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_setzero_si128(void)
 {
   return (__m128i){ 0LL, 0LL };
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store_si128(__m128i *__p, __m128i __b)
 {
   *__p = __b;
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storeu_si128(__m128i *__p, __m128i __b)
 {
   __builtin_ia32_storedqu((char *)__p, (__v16qi)__b);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
 {
   __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storel_epi64(__m128i *__p, __m128i __a)
 {
   struct __mm_storel_epi64_struct {
@@ -1232,76 +1232,76 @@ _mm_storel_epi64(__m128i *__p, __m128i __a)
   ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_pd(double *__p, __m128d __a)
 {
   __builtin_ia32_movntpd(__p, __a);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_si128(__m128i *__p, __m128i __a)
 {
   __builtin_ia32_movntdq(__p, __a);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_si32(int *__p, int __a)
 {
   __builtin_ia32_movnti(__p, __a);
 }
 
 #ifdef __x86_64__
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_si64(long long *__p, long long __a)
 {
   __builtin_ia32_movnti64(__p, __a);
 }
 #endif
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_clflush(void const *__p)
 {
   __builtin_ia32_clflush(__p);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_lfence(void)
 {
   __builtin_ia32_lfence();
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_mfence(void)
 {
   __builtin_ia32_mfence();
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_packs_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_packs_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_packus_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_extract_epi16(__m128i __a, int __imm)
 {
   __v8hi __b = (__v8hi)__a;
   return (unsigned short)__b[__imm & 7];
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_insert_epi16(__m128i __a, int __b, int __imm)
 {
   __v8hi __c = (__v8hi)__a;
@@ -1309,7 +1309,7 @@ _mm_insert_epi16(__m128i __a, int __b, int __imm)
   return (__m128i)__c;
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_movemask_epi8(__m128i __a)
 {
   return __builtin_ia32_pmovmskb128((__v16qi)__a);
@@ -1337,85 +1337,85 @@ _mm_movemask_epi8(__m128i __a)
                                    4 + (((imm) & 0x30) >> 4), \
                                    4 + (((imm) & 0xc0) >> 6)); })
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpackhi_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpackhi_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpackhi_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpackhi_epi64(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpacklo_epi8(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpacklo_epi16(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpacklo_epi32(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_unpacklo_epi64(__m128i __a, __m128i __b)
 {
   return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_movepi64_pi64(__m128i __a)
 {
   return (__m64)__a[0];
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_movpi64_epi64(__m64 __a)
 {
   return (__m128i){ (long long)__a, 0 };
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_move_epi64(__m128i __a)
 {
   return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_unpackhi_pd(__m128d __a, __m128d __b)
 {
   return __builtin_shufflevector(__a, __b, 1, 2+1);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_unpacklo_pd(__m128d __a, __m128d __b)
 {
   return __builtin_shufflevector(__a, __b, 0, 2+0);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_movemask_pd(__m128d __a)
 {
   return __builtin_ia32_movmskpd(__a);
@@ -1425,49 +1425,49 @@ _mm_movemask_pd(__m128d __a)
   __builtin_shufflevector((__m128d)(a), (__m128d)(b), \
                           (i) & 1, (((i) & 2) >> 1) + 2); })
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_castpd_ps(__m128d __a)
 {
   return (__m128)__a;
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_castpd_si128(__m128d __a)
 {
   return (__m128i)__a;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_castps_pd(__m128 __a)
 {
   return (__m128d)__a;
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_castps_si128(__m128 __a)
 {
   return (__m128i)__a;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_castsi128_ps(__m128i __a)
 {
   return (__m128)__a;
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_castsi128_pd(__m128i __a)
 {
   return (__m128d)__a;
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_pause(void)
 {
   __asm__ volatile ("pause");
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
 
diff --git a/lib/Headers/f16cintrin.h b/lib/Headers/f16cintrin.h
index c56960c..9349b78 100644
--- a/lib/Headers/f16cintrin.h
+++ b/lib/Headers/f16cintrin.h
@@ -32,7 +32,7 @@ typedef float __v8sf __attribute__ ((__vector_size__ (32)));
 typedef float __m256 __attribute__ ((__vector_size__ (32)));
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
 
 #define _mm_cvtps_ph(a, imm) __extension__ ({ \
   __m128 __a = (a); \
@@ -42,18 +42,18 @@ typedef float __m256 __attribute__ ((__vector_size__ (32)));
   __m256 __a = (a); \
  (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); })
 
-static __inline __m128 DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS
 _mm_cvtph_ps(__m128i __a)
 {
   return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
 }
 
-static __inline __m256 DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS
 _mm256_cvtph_ps(__m128i __a)
 {
   return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __F16CINTRIN_H */
diff --git a/lib/Headers/fma4intrin.h b/lib/Headers/fma4intrin.h
index 5268805..f117887 100644
--- a/lib/Headers/fma4intrin.h
+++ b/lib/Headers/fma4intrin.h
@@ -31,200 +31,200 @@
 #include <pmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4")))
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __FMA4INTRIN_H */
diff --git a/lib/Headers/fmaintrin.h b/lib/Headers/fmaintrin.h
index 6f5d1b9..114a143 100644
--- a/lib/Headers/fmaintrin.h
+++ b/lib/Headers/fmaintrin.h
@@ -29,200 +29,200 @@
 #define __FMAINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma")))
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __FMAINTRIN_H */
diff --git a/lib/Headers/fxsrintrin.h b/lib/Headers/fxsrintrin.h
new file mode 100644
index 0000000..2b3549c
--- /dev/null
+++ b/lib/Headers/fxsrintrin.h
@@ -0,0 +1,55 @@
+/*===---- fxsrintrin.h - FXSR intrinsic ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <fxsrintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __FXSRINTRIN_H
+#define __FXSRINTRIN_H
+
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_fxsave(void *__p) {
+  return __builtin_ia32_fxsave(__p);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_fxsave64(void *__p) {
+  return __builtin_ia32_fxsave64(__p);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_fxrstor(void *__p) {
+  return __builtin_ia32_fxrstor(__p);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_fxrstor64(void *__p) {
+  return __builtin_ia32_fxrstor64(__p);
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h
index 4af407d..604bc8c 100644
--- a/lib/Headers/immintrin.h
+++ b/lib/Headers/immintrin.h
@@ -56,6 +56,8 @@
 
 #include <avx512bwintrin.h>
 
+#include <avx512cdintrin.h>
+
 #include <avx512dqintrin.h>
 
 #include <avx512vlbwintrin.h>
@@ -140,6 +142,8 @@ _writegsbase_u64(unsigned long long __V)
 
 #include <shaintrin.h>
 
+#include <fxsrintrin.h>
+
 /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
  * whereas others are also available at all times. */
 #include <adxintrin.h>
diff --git a/lib/Headers/inttypes.h b/lib/Headers/inttypes.h
new file mode 100644
index 0000000..3d59d14
--- /dev/null
+++ b/lib/Headers/inttypes.h
@@ -0,0 +1,102 @@
+/*===---- inttypes.h - Standard header for integer printf macros ----------===*\
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+\*===----------------------------------------------------------------------===*/
+
+#ifndef __CLANG_INTTYPES_H
+#define __CLANG_INTTYPES_H
+
+#include_next <inttypes.h>
+
+#if defined(_MSC_VER) && _MSC_VER < 1900
+/* MSVC headers define int32_t as int, but PRIx32 as "lx" instead of "x".
+ * This triggers format warnings, so fix it up here. */
+#undef PRId32
+#undef PRIdLEAST32
+#undef PRIdFAST32
+#undef PRIi32
+#undef PRIiLEAST32
+#undef PRIiFAST32
+#undef PRIo32
+#undef PRIoLEAST32
+#undef PRIoFAST32
+#undef PRIu32
+#undef PRIuLEAST32
+#undef PRIuFAST32
+#undef PRIx32
+#undef PRIxLEAST32
+#undef PRIxFAST32
+#undef PRIX32
+#undef PRIXLEAST32
+#undef PRIXFAST32
+
+#undef SCNd32
+#undef SCNdLEAST32
+#undef SCNdFAST32
+#undef SCNi32
+#undef SCNiLEAST32
+#undef SCNiFAST32
+#undef SCNo32
+#undef SCNoLEAST32
+#undef SCNoFAST32
+#undef SCNu32
+#undef SCNuLEAST32
+#undef SCNuFAST32
+#undef SCNx32
+#undef SCNxLEAST32
+#undef SCNxFAST32
+
+#define PRId32 "d"
+#define PRIdLEAST32 "d"
+#define PRIdFAST32 "d"
+#define PRIi32 "i"
+#define PRIiLEAST32 "i"
+#define PRIiFAST32 "i"
+#define PRIo32 "o"
+#define PRIoLEAST32 "o"
+#define PRIoFAST32 "o"
+#define PRIu32 "u"
+#define PRIuLEAST32 "u"
+#define PRIuFAST32 "u"
+#define PRIx32 "x"
+#define PRIxLEAST32 "x"
+#define PRIxFAST32 "x"
+#define PRIX32 "X"
+#define PRIXLEAST32 "X"
+#define PRIXFAST32 "X"
+
+#define SCNd32 "d"
+#define SCNdLEAST32 "d"
+#define SCNdFAST32 "d"
+#define SCNi32 "i"
+#define SCNiLEAST32 "i"
+#define SCNiFAST32 "i"
+#define SCNo32 "o"
+#define SCNoLEAST32 "o"
+#define SCNoFAST32 "o"
+#define SCNu32 "u"
+#define SCNuLEAST32 "u"
+#define SCNuFAST32 "u"
+#define SCNx32 "x"
+#define SCNxLEAST32 "x"
+#define SCNxFAST32 "x"
+#endif
+
+#endif /* __CLANG_INTTYPES_H */
diff --git a/lib/Headers/lzcntintrin.h b/lib/Headers/lzcntintrin.h
index 41e61e9..4c00e42 100644
--- a/lib/Headers/lzcntintrin.h
+++ b/lib/Headers/lzcntintrin.h
@@ -29,40 +29,40 @@
 #define __LZCNTINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
 
-static __inline__ unsigned short DEFAULT_FN_ATTRS
+static __inline__ unsigned short __DEFAULT_FN_ATTRS
 __lzcnt16(unsigned short __X)
 {
   return __X ? __builtin_clzs(__X) : 16;
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __lzcnt32(unsigned int __X)
 {
   return __X ? __builtin_clz(__X) : 32;
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _lzcnt_u32(unsigned int __X)
 {
   return __X ? __builtin_clz(__X) : 32;
 }
 
 #ifdef __x86_64__
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __lzcnt64(unsigned long long __X)
 {
   return __X ? __builtin_clzll(__X) : 64;
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _lzcnt_u64(unsigned long long __X)
 {
   return __X ? __builtin_clzll(__X) : 64;
 }
 #endif
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __LZCNTINTRIN_H */
diff --git a/lib/Headers/mm3dnow.h b/lib/Headers/mm3dnow.h
index 70734e4..3218df8 100644
--- a/lib/Headers/mm3dnow.h
+++ b/lib/Headers/mm3dnow.h
@@ -30,138 +30,138 @@
 typedef float __v2sf __attribute__((__vector_size__(8)));
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _m_femms() {
   __builtin_ia32_femms();
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pavgusb(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pf2id(__m64 __m) {
   return (__m64)__builtin_ia32_pf2id((__v2sf)__m);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfacc(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfadd(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfcmpeq(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfcmpge(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfcmpgt(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfmax(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfmin(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfmul(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfrcp(__m64 __m) {
   return (__m64)__builtin_ia32_pfrcp((__v2sf)__m);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfrcpit1(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfrcpit2(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfrsqrt(__m64 __m) {
   return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfrsqrtit1(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfsub(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfsubr(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pi2fd(__m64 __m) {
   return (__m64)__builtin_ia32_pi2fd((__v2si)__m);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pmulhrw(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pf2iw(__m64 __m) {
   return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfnacc(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pfpnacc(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pi2fw(__m64 __m) {
   return (__m64)__builtin_ia32_pi2fw((__v2si)__m);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pswapdsf(__m64 __m) {
   return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pswapdsi(__m64 __m) {
   return (__m64)__builtin_ia32_pswapdsi((__v2si)__m);
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif
diff --git a/lib/Headers/mmintrin.h b/lib/Headers/mmintrin.h
index d1d729f..484b499 100644
--- a/lib/Headers/mmintrin.h
+++ b/lib/Headers/mmintrin.h
@@ -31,369 +31,369 @@ typedef short __v4hi __attribute__((__vector_size__(8)));
 typedef char __v8qi __attribute__((__vector_size__(8)));
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_empty(void)
 {
     __builtin_ia32_emms();
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtsi32_si64(int __i)
 {
     return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvtsi64_si32(__m64 __m)
 {
     return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtsi64_m64(long long __i)
 {
     return (__m64)__i;
 }
 
-static __inline__ long long DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvtm64_si64(__m64 __m)
 {
     return (long long)__m;
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_packs_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_packs_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_packs_pu16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_add_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_add_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_add_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_adds_pi8(__m64 __m1, __m64 __m2) 
 {
     return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_adds_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);    
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_adds_pu8(__m64 __m1, __m64 __m2) 
 {
     return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
 }
  
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_adds_pu16(__m64 __m1, __m64 __m2) 
 {
     return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sub_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
 }
  
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sub_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
 }
  
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sub_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_subs_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_subs_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_subs_pu8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
 }
  
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_subs_pu16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_madd_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
 }
  
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_mullo_pi16(__m64 __m1, __m64 __m2) 
 {
     return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sll_pi16(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_slli_pi16(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);    
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sll_pi32(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_slli_pi32(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sll_si64(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psllq(__m, __count);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_slli_si64(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psllqi(__m, __count);    
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sra_pi16(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);    
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srai_pi16(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sra_pi32(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);    
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srai_pi32(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srl_pi16(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);    
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srli_pi16(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);    
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srl_pi32(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);       
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srli_pi32(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srl_si64(__m64 __m, __m64 __count)
 {
     return (__m64)__builtin_ia32_psrlq(__m, __count);    
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srli_si64(__m64 __m, int __count)
 {
     return (__m64)__builtin_ia32_psrlqi(__m, __count);    
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_and_si64(__m64 __m1, __m64 __m2)
 {
     return __builtin_ia32_pand(__m1, __m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_andnot_si64(__m64 __m1, __m64 __m2)
 {
     return __builtin_ia32_pandn(__m1, __m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_or_si64(__m64 __m1, __m64 __m2)
 {
     return __builtin_ia32_por(__m1, __m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_xor_si64(__m64 __m1, __m64 __m2)
 {
     return __builtin_ia32_pxor(__m1, __m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_setzero_si64(void)
 {
     return (__m64){ 0LL };
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_set_pi32(int __i1, int __i0)
 {
     return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
 {
     return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
             char __b1, char __b0)
 {
@@ -401,44 +401,44 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
                                                __b4, __b5, __b6, __b7);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_set1_pi32(int __i)
 {
     return _mm_set_pi32(__i, __i);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_set1_pi16(short __w)
 {
     return _mm_set_pi16(__w, __w, __w, __w);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_set1_pi8(char __b)
 {
     return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_setr_pi32(int __i0, int __i1)
 {
     return _mm_set_pi32(__i1, __i0);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
 {
     return _mm_set_pi16(__w3, __w2, __w1, __w0);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
              char __b6, char __b7)
 {
     return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 /* Aliases for compatibility. */
 #define _m_empty _mm_empty
diff --git a/lib/Headers/module.modulemap b/lib/Headers/module.modulemap
index 8fcb5bc..0fc70a8 100644
--- a/lib/Headers/module.modulemap
+++ b/lib/Headers/module.modulemap
@@ -32,142 +32,117 @@ module _Builtin_intrinsics [system] [extern_c] {
     }
 
     explicit module cpuid {
-      requires x86
       header "cpuid.h"
     }
 
     explicit module mmx {
-      requires mmx
       header "mmintrin.h"
     }
 
     explicit module f16c {
-      requires f16c
       header "f16cintrin.h"
     }
 
     explicit module sse {
-      requires sse
       export mmx
       export sse2 // note: for hackish <emmintrin.h> dependency
       header "xmmintrin.h"
     }
 
     explicit module sse2 {
-      requires sse2
       export sse
       header "emmintrin.h"
     }
 
     explicit module sse3 {
-      requires sse3
       export sse2
       header "pmmintrin.h"
     }
 
     explicit module ssse3 {
-      requires ssse3
       export sse3
       header "tmmintrin.h"
     }
 
     explicit module sse4_1 {
-      requires sse41
       export ssse3
       header "smmintrin.h"
     }
 
     explicit module sse4_2 {
-      requires sse42
       export sse4_1
       header "nmmintrin.h"
     }
 
     explicit module sse4a {
-      requires sse4a
       export sse3
       header "ammintrin.h"
     }
 
     explicit module avx {
-      requires avx
       export sse4_2
       header "avxintrin.h"
     }
 
     explicit module avx2 {
-      requires avx2
       export avx
       header "avx2intrin.h"
     }
 
     explicit module avx512f {
-      requires avx512f
       export avx2
       header "avx512fintrin.h"
     }
 
     explicit module avx512er {
-      requires avx512er
       header "avx512erintrin.h"
     }
 
     explicit module bmi {
-      requires bmi
       header "bmiintrin.h"
     }
 
     explicit module bmi2 {
-      requires bmi2
       header "bmi2intrin.h"
     }
 
     explicit module fma {
-      requires fma
       header "fmaintrin.h"
     }
 
     explicit module fma4 {
-      requires fma4
       export sse3
       header "fma4intrin.h"
     }
 
     explicit module lzcnt {
-      requires lzcnt
       header "lzcntintrin.h"
     }
 
     explicit module popcnt {
-      requires popcnt
       header "popcntintrin.h"
     }
 
     explicit module mm3dnow {
-      requires mm3dnow
       header "mm3dnow.h"
     }
 
     explicit module xop {
-      requires xop
       export fma4
       header "xopintrin.h"
     }
 
     explicit module aes_pclmul {
-      requires aes, pclmul
       header "wmmintrin.h"
       export aes
       export pclmul
     }
 
     explicit module aes {
-      requires aes
       header "__wmmintrin_aes.h"
     }
 
     explicit module pclmul {
-      requires pclmul
       header "__wmmintrin_pclmul.h"
     }
   }
diff --git a/lib/Headers/pmmintrin.h b/lib/Headers/pmmintrin.h
index 6e61539..e888b6f 100644
--- a/lib/Headers/pmmintrin.h
+++ b/lib/Headers/pmmintrin.h
@@ -27,57 +27,57 @@
 #include <emmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse3")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse3")))
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_lddqu_si128(__m128i const *__p)
 {
   return (__m128i)__builtin_ia32_lddqu((char const *)__p);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_addsub_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_addsubps(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_hadd_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_haddps(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_hsub_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_hsubps(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_movehdup_ps(__m128 __a)
 {
   return __builtin_shufflevector(__a, __a, 1, 1, 3, 3);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_moveldup_ps(__m128 __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_addsub_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_addsubpd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_hadd_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_haddpd(__a, __b);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_hsub_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_hsubpd(__a, __b);
@@ -85,7 +85,7 @@ _mm_hsub_pd(__m128d __a, __m128d __b)
 
 #define        _mm_loaddup_pd(dp)        _mm_load1_pd(dp)
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_movedup_pd(__m128d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 0);
@@ -99,18 +99,18 @@ _mm_movedup_pd(__m128d __a)
 #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
 #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
 {
   __builtin_ia32_monitor((void *)__p, __extensions, __hints);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_mwait(unsigned __extensions, unsigned __hints)
 {
   __builtin_ia32_mwait(__extensions, __hints);
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __PMMINTRIN_H */
diff --git a/lib/Headers/popcntintrin.h b/lib/Headers/popcntintrin.h
index fede8da..29c074b 100644
--- a/lib/Headers/popcntintrin.h
+++ b/lib/Headers/popcntintrin.h
@@ -25,22 +25,22 @@
 #define _POPCNTINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt")))
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_popcnt_u32(unsigned int __A)
 {
   return __builtin_popcount(__A);
 }
 
 #ifdef __x86_64__
-static __inline__ long long DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_popcnt_u64(unsigned long long __A)
 {
   return __builtin_popcountll(__A);
 }
 #endif /* __x86_64__ */
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* _POPCNTINTRIN_H */
diff --git a/lib/Headers/rdseedintrin.h b/lib/Headers/rdseedintrin.h
index ac9ec4f..421f4ea 100644
--- a/lib/Headers/rdseedintrin.h
+++ b/lib/Headers/rdseedintrin.h
@@ -29,28 +29,28 @@
 #define __RDSEEDINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rdseed")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rdseed")))
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _rdseed16_step(unsigned short *__p)
 {
   return __builtin_ia32_rdseed16_step(__p);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _rdseed32_step(unsigned int *__p)
 {
   return __builtin_ia32_rdseed32_step(__p);
 }
 
 #ifdef __x86_64__
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _rdseed64_step(unsigned long long *__p)
 {
   return __builtin_ia32_rdseed64_step(__p);
 }
 #endif
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __RDSEEDINTRIN_H */
diff --git a/lib/Headers/rtmintrin.h b/lib/Headers/rtmintrin.h
index 8709a12..e6a58d7 100644
--- a/lib/Headers/rtmintrin.h
+++ b/lib/Headers/rtmintrin.h
@@ -38,15 +38,15 @@
 #define _XABORT_CODE(x)   (((x) >> 24) & 0xFF)
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rtm")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rtm")))
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _xbegin(void)
 {
   return __builtin_ia32_xbegin();
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _xend(void)
 {
   __builtin_ia32_xend();
@@ -54,6 +54,6 @@ _xend(void)
 
 #define _xabort(imm) __builtin_ia32_xabort((imm))
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __RTMINTRIN_H */
diff --git a/lib/Headers/shaintrin.h b/lib/Headers/shaintrin.h
index 4b74291..8602d02 100644
--- a/lib/Headers/shaintrin.h
+++ b/lib/Headers/shaintrin.h
@@ -29,47 +29,47 @@
 #define __SHAINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha")))
 
 #define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \
   __builtin_ia32_sha1rnds4((V1), (V2), (M)); })
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sha1nexte_epu32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sha1msg1_epu32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sha1msg2_epu32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z)
 {
   return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sha256msg1_epu32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sha256msg2_epu32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y);
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __SHAINTRIN_H */
diff --git a/lib/Headers/smmintrin.h b/lib/Headers/smmintrin.h
index f2cc909..e197590 100644
--- a/lib/Headers/smmintrin.h
+++ b/lib/Headers/smmintrin.h
@@ -27,7 +27,7 @@
 #include <tmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1")))
 
 /* SSE4 Rounding macros. */
 #define _MM_FROUND_TO_NEAREST_INT    0x00
@@ -91,21 +91,21 @@
                                   (((M) & 0x04) ? 6 : 2), \
                                   (((M) & 0x08) ? 7 : 3)); })
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)
 {
   return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2,
                                             (__v2df)__M);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)
 {
   return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2,
                                            (__v4sf)__M);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
 {
   return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2,
@@ -126,13 +126,13 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
                                    (((M) & 0x80) ? 15 : 7)); })
 
 /* SSE4 Dword Multiply Instructions.  */
-static __inline__  __m128i DEFAULT_FN_ATTRS
+static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_mullo_epi32 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) ((__v4si)__V1 * (__v4si)__V2);
 }
 
-static __inline__  __m128i DEFAULT_FN_ATTRS
+static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_mul_epi32 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2);
@@ -150,56 +150,56 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
   (__m128d) __builtin_ia32_dppd((__v2df)__X, (__v2df)__Y, (M)); })
 
 /* SSE4 Streaming Load Hint Instruction.  */
-static __inline__  __m128i DEFAULT_FN_ATTRS
+static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_stream_load_si128 (__m128i *__V)
 {
   return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __V);
 }
 
 /* SSE4 Packed Integer Min/Max Instructions.  */
-static __inline__  __m128i DEFAULT_FN_ATTRS
+static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epi8 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);
 }
 
-static __inline__  __m128i DEFAULT_FN_ATTRS
+static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epi8 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);
 }
 
-static __inline__  __m128i DEFAULT_FN_ATTRS
+static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epu16 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);
 }
 
-static __inline__  __m128i DEFAULT_FN_ATTRS
+static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epu16 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);
 }
 
-static __inline__  __m128i DEFAULT_FN_ATTRS
+static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epi32 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);
 }
 
-static __inline__  __m128i DEFAULT_FN_ATTRS
+static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epi32 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);
 }
 
-static __inline__  __m128i DEFAULT_FN_ATTRS
+static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epu32 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);
 }
 
-static __inline__  __m128i DEFAULT_FN_ATTRS
+static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epu32 (__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);
@@ -253,19 +253,19 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
 #endif /* __x86_64 */
 
 /* SSE4 128-bit Packed Integer Comparisons.  */
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_testz_si128(__m128i __M, __m128i __V)
 {
   return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_testc_si128(__m128i __M, __m128i __V)
 {
   return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_testnzc_si128(__m128i __M, __m128i __V)
 {
   return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
@@ -276,88 +276,88 @@ _mm_testnzc_si128(__m128i __M, __m128i __V)
 #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
 
 /* SSE4 64-bit Packed Integer Comparisons.  */
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
 {
   return (__m128i)((__v2di)__V1 == (__v2di)__V2);
 }
 
 /* SSE4 Packed Integer Sign-Extension.  */
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi16(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi32(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovsxbd128((__v16qi) __V);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi64(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovsxbq128((__v16qi) __V);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi16_epi32(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovsxwd128((__v8hi) __V); 
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi16_epi64(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovsxwq128((__v8hi)__V);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi32_epi64(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovsxdq128((__v4si)__V);
 }
 
 /* SSE4 Packed Integer Zero-Extension.  */
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi16(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi32(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu8_epi64(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu16_epi32(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu16_epi64(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepu32_epi64(__m128i __V)
 {
   return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V);
 }
 
 /* SSE4 Pack with Unsigned Saturation.  */
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_packus_epi32(__m128i __V1, __m128i __V2)
 {
   return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);
@@ -369,7 +369,7 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
   __m128i __Y = (Y); \
   (__m128i) __builtin_ia32_mpsadbw128((__v16qi)__X, (__v16qi)__Y, (M)); })
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_minpos_epu16(__m128i __V)
 {
   return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V);
@@ -380,8 +380,8 @@ _mm_minpos_epu16(__m128i __V)
 /* These definitions are normally in nmmintrin.h, but gcc puts them in here
    so we'll do the same.  */
 
-#undef DEFAULT_FN_ATTRS
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
+#undef __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
 
 /* These specify the type of data that we're comparing.  */
 #define _SIDD_UBYTE_OPS                 0x00
@@ -442,40 +442,40 @@ _mm_minpos_epu16(__m128i __V)
      __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M))
 
 /* SSE4.2 Compare Packed Data -- Greater Than.  */
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
 {
   return (__m128i)((__v2di)__V1 > (__v2di)__V2);
 }
 
 /* SSE4.2 Accumulate CRC32.  */
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _mm_crc32_u8(unsigned int __C, unsigned char __D)
 {
   return __builtin_ia32_crc32qi(__C, __D);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _mm_crc32_u16(unsigned int __C, unsigned short __D)
 {
   return __builtin_ia32_crc32hi(__C, __D);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _mm_crc32_u32(unsigned int __C, unsigned int __D)
 {
   return __builtin_ia32_crc32si(__C, __D);
 }
 
 #ifdef __x86_64__
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _mm_crc32_u64(unsigned long long __C, unsigned long long __D)
 {
   return __builtin_ia32_crc32di(__C, __D);
 }
 #endif /* __x86_64__ */
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #ifdef __POPCNT__
 #include <popcntintrin.h>
diff --git a/lib/Headers/tbmintrin.h b/lib/Headers/tbmintrin.h
index 1926df9..62f613f 100644
--- a/lib/Headers/tbmintrin.h
+++ b/lib/Headers/tbmintrin.h
@@ -29,59 +29,59 @@
 #define __TBMINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("tbm")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("tbm")))
 
 #define __bextri_u32(a, b) (__builtin_ia32_bextri_u32((a), (b)))
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blcfill_u32(unsigned int a)
 {
   return a & (a + 1);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blci_u32(unsigned int a)
 {
   return a | ~(a + 1);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blcic_u32(unsigned int a)
 {
   return ~a & (a + 1);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blcmsk_u32(unsigned int a)
 {
   return a ^ (a + 1);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blcs_u32(unsigned int a)
 {
   return a | (a + 1);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blsfill_u32(unsigned int a)
 {
   return a | (a - 1);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blsic_u32(unsigned int a)
 {
   return ~a | (a - 1);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __t1mskc_u32(unsigned int a)
 {
   return ~a | (a + 1);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __tzmsk_u32(unsigned int a)
 {
   return ~a & (a - 1);
@@ -90,61 +90,61 @@ __tzmsk_u32(unsigned int a)
 #ifdef __x86_64__
 #define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b)))
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blcfill_u64(unsigned long long a)
 {
   return a & (a + 1);
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blci_u64(unsigned long long a)
 {
   return a | ~(a + 1);
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blcic_u64(unsigned long long a)
 {
   return ~a & (a + 1);
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blcmsk_u64(unsigned long long a)
 {
   return a ^ (a + 1);
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blcs_u64(unsigned long long a)
 {
   return a | (a + 1);
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blsfill_u64(unsigned long long a)
 {
   return a | (a - 1);
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blsic_u64(unsigned long long a)
 {
   return ~a | (a - 1);
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __t1mskc_u64(unsigned long long a)
 {
   return ~a | (a + 1);
 }
 
-static __inline__ unsigned long long DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __tzmsk_u64(unsigned long long a)
 {
   return ~a & (a - 1);
 }
 #endif
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __TBMINTRIN_H */
diff --git a/lib/Headers/tmmintrin.h b/lib/Headers/tmmintrin.h
index 2474c94..120d73c 100644
--- a/lib/Headers/tmmintrin.h
+++ b/lib/Headers/tmmintrin.h
@@ -27,39 +27,39 @@
 #include <pmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_abs_pi8(__m64 __a)
 {
     return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi8(__m128i __a)
 {
     return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_abs_pi16(__m64 __a)
 {
     return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi16(__m128i __a)
 {
     return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_abs_pi32(__m64 __a)
 {
     return (__m64)__builtin_ia32_pabsd((__v2si)__a);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi32(__m128i __a)
 {
     return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
@@ -75,150 +75,150 @@ _mm_abs_epi32(__m128i __a)
   __m64 __b = (b); \
   (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hadd_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hadd_epi32(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_hadd_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_hadd_pi32(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hadds_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_hadds_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hsub_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hsub_epi32(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_hsub_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_hsub_pi32(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hsubs_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_hsubs_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maddubs_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_maddubs_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mulhrs_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_mulhrs_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_shuffle_epi8(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_shuffle_pi8(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sign_epi8(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sign_epi16(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sign_epi32(__m128i __a, __m128i __b)
 {
     return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sign_pi8(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sign_pi16(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sign_pi32(__m64 __a, __m64 __b)
 {
     return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __TMMINTRIN_H */
diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h
index 0085eb4..18aa8c1 100644
--- a/lib/Headers/xmmintrin.h
+++ b/lib/Headers/xmmintrin.h
@@ -37,184 +37,184 @@ typedef float __m128 __attribute__((__vector_size__(16)));
 #endif
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse")))
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_add_ss(__m128 __a, __m128 __b)
 {
   __a[0] += __b[0];
   return __a;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_add_ps(__m128 __a, __m128 __b)
 {
   return __a + __b;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_sub_ss(__m128 __a, __m128 __b)
 {
   __a[0] -= __b[0];
   return __a;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_sub_ps(__m128 __a, __m128 __b)
 {
   return __a - __b;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mul_ss(__m128 __a, __m128 __b)
 {
   __a[0] *= __b[0];
   return __a;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mul_ps(__m128 __a, __m128 __b)
 {
   return __a * __b;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_div_ss(__m128 __a, __m128 __b)
 {
   __a[0] /= __b[0];
   return __a;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_div_ps(__m128 __a, __m128 __b)
 {
   return __a / __b;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_sqrt_ss(__m128 __a)
 {
   __m128 __c = __builtin_ia32_sqrtss(__a);
   return (__m128) { __c[0], __a[1], __a[2], __a[3] };
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_sqrt_ps(__m128 __a)
 {
   return __builtin_ia32_sqrtps(__a);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rcp_ss(__m128 __a)
 {
   __m128 __c = __builtin_ia32_rcpss(__a);
   return (__m128) { __c[0], __a[1], __a[2], __a[3] };
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rcp_ps(__m128 __a)
 {
   return __builtin_ia32_rcpps(__a);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rsqrt_ss(__m128 __a)
 {
   __m128 __c = __builtin_ia32_rsqrtss(__a);
   return (__m128) { __c[0], __a[1], __a[2], __a[3] };
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rsqrt_ps(__m128 __a)
 {
   return __builtin_ia32_rsqrtps(__a);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_min_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_minss(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_min_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_minps(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_max_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_maxss(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_max_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_maxps(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_and_ps(__m128 __a, __m128 __b)
 {
   return (__m128)((__v4si)__a & (__v4si)__b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_andnot_ps(__m128 __a, __m128 __b)
 {
   return (__m128)(~(__v4si)__a & (__v4si)__b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_or_ps(__m128 __a, __m128 __b)
 {
   return (__m128)((__v4si)__a | (__v4si)__b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_xor_ps(__m128 __a, __m128 __b)
 {
   return (__m128)((__v4si)__a ^ (__v4si)__b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpeq_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpeqss(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpeq_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpeqps(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmplt_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpltss(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmplt_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpltps(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmple_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpless(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmple_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpleps(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpgt_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
@@ -222,13 +222,13 @@ _mm_cmpgt_ss(__m128 __a, __m128 __b)
                                          4, 1, 2, 3);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpgt_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpltps(__b, __a);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpge_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
@@ -236,49 +236,49 @@ _mm_cmpge_ss(__m128 __a, __m128 __b)
                                          4, 1, 2, 3);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpge_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpleps(__b, __a);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpneq_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpneqss(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpneq_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpneqps(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpnlt_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpnltss(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpnlt_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpnltps(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpnle_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpnless(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpnle_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpnleps(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpngt_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
@@ -286,13 +286,13 @@ _mm_cmpngt_ss(__m128 __a, __m128 __b)
                                          4, 1, 2, 3);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpngt_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpnltps(__b, __a);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpnge_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
@@ -300,115 +300,115 @@ _mm_cmpnge_ss(__m128 __a, __m128 __b)
                                          4, 1, 2, 3);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpnge_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpnleps(__b, __a);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpord_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpordss(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpord_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpordps(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpunord_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpunordss(__a, __b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cmpunord_ps(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_ia32_cmpunordps(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comieq_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_comieq(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comilt_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_comilt(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comile_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_comile(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comigt_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_comigt(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comige_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_comige(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_comineq_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_comineq(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomieq_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_ucomieq(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomilt_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_ucomilt(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomile_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_ucomile(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomigt_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_ucomigt(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomige_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_ucomige(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_ucomineq_ss(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_ucomineq(__a, __b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvtss_si32(__m128 __a)
 {
   return __builtin_ia32_cvtss2si(__a);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvt_ss2si(__m128 __a)
 {
   return _mm_cvtss_si32(__a);
@@ -416,7 +416,7 @@ _mm_cvt_ss2si(__m128 __a)
 
 #ifdef __x86_64__
 
-static __inline__ long long DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvtss_si64(__m128 __a)
 {
   return __builtin_ia32_cvtss2si64(__a);
@@ -424,56 +424,56 @@ _mm_cvtss_si64(__m128 __a)
 
 #endif
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtps_pi32(__m128 __a)
 {
   return (__m64)__builtin_ia32_cvtps2pi(__a);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvt_ps2pi(__m128 __a)
 {
   return _mm_cvtps_pi32(__a);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvttss_si32(__m128 __a)
 {
   return __a[0];
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_cvtt_ss2si(__m128 __a)
 {
   return _mm_cvttss_si32(__a);
 }
 
-static __inline__ long long DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_cvttss_si64(__m128 __a)
 {
   return __a[0];
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvttps_pi32(__m128 __a)
 {
   return (__m64)__builtin_ia32_cvttps2pi(__a);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtt_ps2pi(__m128 __a)
 {
   return _mm_cvttps_pi32(__a);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtsi32_ss(__m128 __a, int __b)
 {
   __a[0] = __b;
   return __a;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvt_si2ss(__m128 __a, int __b)
 {
   return _mm_cvtsi32_ss(__a, __b);
@@ -481,7 +481,7 @@ _mm_cvt_si2ss(__m128 __a, int __b)
 
 #ifdef __x86_64__
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtsi64_ss(__m128 __a, long long __b)
 {
   __a[0] = __b;
@@ -490,25 +490,25 @@ _mm_cvtsi64_ss(__m128 __a, long long __b)
 
 #endif
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpi32_ps(__m128 __a, __m64 __b)
 {
   return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvt_pi2ps(__m128 __a, __m64 __b)
 {
   return _mm_cvtpi32_ps(__a, __b);
 }
 
-static __inline__ float DEFAULT_FN_ATTRS
+static __inline__ float __DEFAULT_FN_ATTRS
 _mm_cvtss_f32(__m128 __a)
 {
   return __a[0];
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_loadh_pi(__m128 __a, const __m64 *__p)
 {
   typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8)));
@@ -520,7 +520,7 @@ _mm_loadh_pi(__m128 __a, const __m64 *__p)
   return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_loadl_pi(__m128 __a, const __m64 *__p)
 {
   typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8)));
@@ -532,7 +532,7 @@ _mm_loadl_pi(__m128 __a, const __m64 *__p)
   return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_load_ss(const float *__p)
 {
   struct __mm_load_ss_struct {
@@ -542,7 +542,7 @@ _mm_load_ss(const float *__p)
   return (__m128){ __u, 0, 0, 0 };
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_load1_ps(const float *__p)
 {
   struct __mm_load1_ps_struct {
@@ -554,13 +554,13 @@ _mm_load1_ps(const float *__p)
 
 #define        _mm_load_ps1(p) _mm_load1_ps(p)
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_load_ps(const float *__p)
 {
   return *(__m128*)__p;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_loadu_ps(const float *__p)
 {
   struct __loadu_ps {
@@ -569,63 +569,63 @@ _mm_loadu_ps(const float *__p)
   return ((struct __loadu_ps*)__p)->__v;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_loadr_ps(const float *__p)
 {
   __m128 __a = _mm_load_ps(__p);
   return __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_set_ss(float __w)
 {
   return (__m128){ __w, 0, 0, 0 };
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_set1_ps(float __w)
 {
   return (__m128){ __w, __w, __w, __w };
 }
 
 /* Microsoft specific. */
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_set_ps1(float __w)
 {
     return _mm_set1_ps(__w);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_set_ps(float __z, float __y, float __x, float __w)
 {
   return (__m128){ __w, __x, __y, __z };
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_setr_ps(float __z, float __y, float __x, float __w)
 {
   return (__m128){ __z, __y, __x, __w };
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_setzero_ps(void)
 {
   return (__m128){ 0, 0, 0, 0 };
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storeh_pi(__m64 *__p, __m128 __a)
 {
   __builtin_ia32_storehps((__v2si *)__p, __a);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storel_pi(__m64 *__p, __m128 __a)
 {
   __builtin_ia32_storelps((__v2si *)__p, __a);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store_ss(float *__p, __m128 __a)
 {
   struct __mm_store_ss_struct {
@@ -634,32 +634,32 @@ _mm_store_ss(float *__p, __m128 __a)
   ((struct __mm_store_ss_struct*)__p)->__u = __a[0];
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storeu_ps(float *__p, __m128 __a)
 {
   __builtin_ia32_storeups(__p, __a);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store1_ps(float *__p, __m128 __a)
 {
   __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0);
   _mm_storeu_ps(__p, __a);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store_ps1(float *__p, __m128 __a)
 {
     return _mm_store1_ps(__p, __a);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store_ps(float *__p, __m128 __a)
 {
   *(__m128 *)__p = __a;
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_storer_ps(float *__p, __m128 __a)
 {
   __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
@@ -678,32 +678,32 @@ _mm_storer_ps(float *__p, __m128 __a)
 #define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
 #endif
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_pi(__m64 *__p, __m64 __a)
 {
   __builtin_ia32_movntq(__p, __a);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_stream_ps(float *__p, __m128 __a)
 {
   __builtin_ia32_movntps(__p, __a);
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_sfence(void)
 {
   __builtin_ia32_sfence();
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_extract_pi16(__m64 __a, int __n)
 {
   __v4hi __b = (__v4hi)__a;
   return (unsigned short)__b[__n & 3];
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_insert_pi16(__m64 __a, int __d, int __n)
 {
    __v4hi __b = (__v4hi)__a;
@@ -711,37 +711,37 @@ _mm_insert_pi16(__m64 __a, int __d, int __n)
    return (__m64)__b;
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_max_pi16(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_max_pu8(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_min_pi16(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_min_pu8(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_movemask_pi8(__m64 __a)
 {
   return __builtin_ia32_pmovmskb((__v8qi)__a);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_mulhi_pu16(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);
@@ -751,37 +751,37 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
   __m64 __a = (a); \
   (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); })
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
 {
   __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_avg_pu8(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_avg_pu16(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sad_pu8(__m64 __a, __m64 __b)
 {
   return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);
 }
 
-static __inline__ unsigned int DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _mm_getcsr(void)
 {
   return __builtin_ia32_stmxcsr();
 }
 
-static __inline__ void DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_setcsr(unsigned int __i)
 {
   __builtin_ia32_ldmxcsr(__i);
@@ -795,37 +795,37 @@ _mm_setcsr(unsigned int __i)
                                   (((mask) & 0x30) >> 4) + 4, \
                                   (((mask) & 0xc0) >> 6) + 4); })
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_unpackhi_ps(__m128 __a, __m128 __b)
 {
   return __builtin_shufflevector(__a, __b, 2, 6, 3, 7);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_unpacklo_ps(__m128 __a, __m128 __b)
 {
   return __builtin_shufflevector(__a, __b, 0, 4, 1, 5);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_move_ss(__m128 __a, __m128 __b)
 {
   return __builtin_shufflevector(__a, __b, 4, 1, 2, 3);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_movehl_ps(__m128 __a, __m128 __b)
 {
   return __builtin_shufflevector(__a, __b, 6, 7, 2, 3);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_movelh_ps(__m128 __a, __m128 __b)
 {
   return __builtin_shufflevector(__a, __b, 0, 1, 4, 5);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpi16_ps(__m64 __a)
 {
   __m64 __b, __c;
@@ -843,7 +843,7 @@ _mm_cvtpi16_ps(__m64 __a)
   return __r;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpu16_ps(__m64 __a)
 {
   __m64 __b, __c;
@@ -860,7 +860,7 @@ _mm_cvtpu16_ps(__m64 __a)
   return __r;
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpi8_ps(__m64 __a)
 {
   __m64 __b;
@@ -872,7 +872,7 @@ _mm_cvtpi8_ps(__m64 __a)
   return _mm_cvtpi16_ps(__b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpu8_ps(__m64 __a)
 {
   __m64 __b;
@@ -883,7 +883,7 @@ _mm_cvtpu8_ps(__m64 __a)
   return _mm_cvtpi16_ps(__b);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
 {
   __m128 __c;
@@ -895,7 +895,7 @@ _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
   return _mm_cvtpi32_ps(__c, __a);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtps_pi16(__m128 __a)
 {
   __m64 __b, __c;
@@ -907,7 +907,7 @@ _mm_cvtps_pi16(__m128 __a)
   return _mm_packs_pi32(__b, __c);
 }
 
-static __inline__ __m64 DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtps_pi8(__m128 __a)
 {
   __m64 __b, __c;
@@ -918,7 +918,7 @@ _mm_cvtps_pi8(__m128 __a)
   return _mm_packs_pi16(__b, __c);
 }
 
-static __inline__ int DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm_movemask_ps(__m128 __a)
 {
   return __builtin_ia32_movmskps(__a);
@@ -992,7 +992,7 @@ do { \
 #define _m_ _mm_
 #define _m_ _mm_
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 /* Ugly hack for backwards-compatibility (compatible with gcc) */
 #if defined(__SSE2__) && !__has_feature(modules)
diff --git a/lib/Headers/xopintrin.h b/lib/Headers/xopintrin.h
index 8417d78..86188bb 100644
--- a/lib/Headers/xopintrin.h
+++ b/lib/Headers/xopintrin.h
@@ -31,207 +31,207 @@
 #include <fma4intrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop")))
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_haddw_epi8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_haddd_epi8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_haddq_epi8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_haddd_epi16(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_haddq_epi16(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_haddq_epi32(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_haddw_epu8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_haddd_epu8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_haddq_epu8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_haddd_epu16(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_haddq_epu16(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_haddq_epu32(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hsubw_epi8(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hsubd_epi16(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hsubq_epi32(__m128i __A)
 {
   return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C);
 }
 
-static __inline__ __m256i DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
 {
   return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
 {
   return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_rot_epi8(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_rot_epi16(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_rot_epi32(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_rot_epi64(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
@@ -253,49 +253,49 @@ _mm_rot_epi64(__m128i __A, __m128i __B)
   __m128i __A = (A); \
   (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); })
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_shl_epi8(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_shl_epi16(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_shl_epi32(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_shl_epi64(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sha_epi8(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sha_epi16(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sha_epi32(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sha_epi64(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
@@ -350,385 +350,385 @@ _mm_sha_epi64(__m128i __A, __m128i __B)
 #define _MM_PCOMCTRL_FALSE 6
 #define _MM_PCOMCTRL_TRUE  7
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comlt_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comle_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comgt_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comge_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comeq_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comneq_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comfalse_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comtrue_epu8(__m128i __A, __m128i __B)
 {
   return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comlt_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comle_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comgt_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comge_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comeq_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comneq_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comfalse_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comtrue_epu16(__m128i __A, __m128i __B)
 {
   return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comlt_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comle_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comgt_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comge_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comeq_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comneq_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comfalse_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comtrue_epu32(__m128i __A, __m128i __B)
 {
   return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comlt_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comle_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comgt_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comge_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comeq_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comneq_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comfalse_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comtrue_epu64(__m128i __A, __m128i __B)
 {
   return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comlt_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comle_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comgt_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comge_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comeq_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comneq_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comfalse_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comtrue_epi8(__m128i __A, __m128i __B)
 {
   return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comlt_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comle_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comgt_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comge_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comeq_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comneq_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comfalse_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comtrue_epi16(__m128i __A, __m128i __B)
 {
   return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comlt_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comle_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comgt_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comge_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comeq_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comneq_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comfalse_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comtrue_epi32(__m128i __A, __m128i __B)
 {
   return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comlt_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comle_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comgt_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comge_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comeq_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comneq_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comfalse_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
 }
 
-static __inline__ __m128i DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_comtrue_epi64(__m128i __A, __m128i __B)
 {
   return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
@@ -762,42 +762,42 @@ _mm_comtrue_epi64(__m128i __A, __m128i __B)
   (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \
                                        (__v8si)__C, (I)); })
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_frcz_ss(__m128 __A)
 {
   return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_frcz_sd(__m128d __A)
 {
   return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
 }
 
-static __inline__ __m128 DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_frcz_ps(__m128 __A)
 {
   return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
 }
 
-static __inline__ __m128d DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_frcz_pd(__m128d __A)
 {
   return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
 }
 
-static __inline__ __m256 DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_frcz_ps(__m256 __A)
 {
   return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
 }
 
-static __inline__ __m256d DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_frcz_pd(__m256d __A)
 {
   return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
 }
 
-#undef DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __XOPINTRIN_H */
diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp
index 7a98f54..6c5c64b 100644
--- a/lib/Lex/HeaderSearch.cpp
+++ b/lib/Lex/HeaderSearch.cpp
@@ -14,6 +14,7 @@
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Lex/ExternalPreprocessorSource.h"
 #include "clang/Lex/HeaderMap.h"
 #include "clang/Lex/HeaderSearchOptions.h"
 #include "clang/Lex/LexDiagnostic.h"
@@ -33,9 +34,13 @@
 using namespace clang;
 
 const IdentifierInfo *
-HeaderFileInfo::getControllingMacro(ExternalIdentifierLookup *External) {
-  if (ControllingMacro)
+HeaderFileInfo::getControllingMacro(ExternalPreprocessorSource *External) {
+  if (ControllingMacro) {
+    if (ControllingMacro->isOutOfDate())
+      External->updateOutOfDateIdentifier(
+          *const_cast<IdentifierInfo *>(ControllingMacro));
     return ControllingMacro;
+  }
 
   if (!ControllingMacroID || !External)
     return nullptr;
@@ -527,9 +532,13 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
       // Load this framework module. If that succeeds, find the suggested module
       // for this header, if any.
       bool IsSystem = getDirCharacteristic() != SrcMgr::C_User;
-      if (HS.loadFrameworkModule(ModuleName, TopFrameworkDir, IsSystem)) {
-        *SuggestedModule = HS.findModuleForHeader(FE);
-      }
+      HS.loadFrameworkModule(ModuleName, TopFrameworkDir, IsSystem);
+
+      // FIXME: This can find a module not part of ModuleName, which is
+      // important so that we're consistent about whether this header
+      // corresponds to a module. Possibly we should lock down framework modules
+      // so that this is not possible.
+      *SuggestedModule = HS.findModuleForHeader(FE);
     } else {
       *SuggestedModule = HS.findModuleForHeader(FE);
     }
@@ -1025,7 +1034,7 @@ void HeaderSearch::MarkFileModuleHeader(const FileEntry *FE,
 
 bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
                                           const FileEntry *File,
-                                          bool isImport) {
+                                          bool isImport, Module *M) {
   ++NumIncluded; // Count # of attempted #includes.
 
   // Get information about this file.
@@ -1050,7 +1059,11 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
   // if the macro that guards it is defined, we know the #include has no effect.
   if (const IdentifierInfo *ControllingMacro
       = FileInfo.getControllingMacro(ExternalLookup))
-    if (PP.isMacroDefined(ControllingMacro)) {
+    // If the include file is part of a module, and we already know what its
+    // controlling macro is, then we've already parsed it and can safely just
+    // make it visible. This saves us needing to switch into the visibility
+    // state of the module just to check whether the macro is defined within it.
+    if (M || PP.isMacroDefined(ControllingMacro)) {
       ++NumMultiIncludeFileOptzn;
       return false;
     }
@@ -1229,6 +1242,9 @@ Module *HeaderSearch::loadFrameworkModule(StringRef Name,
   // Try to load a module map file.
   switch (loadModuleMapFile(Dir, IsSystem, /*IsFramework*/true)) {
   case LMM_InvalidModuleMap:
+    // Try to infer a module map from the framework directory.
+    if (HSOpts->ImplicitModuleMaps)
+      ModMap.inferFrameworkModule(Dir, IsSystem, /*Parent=*/nullptr);
     break;
 
   case LMM_AlreadyLoaded:
@@ -1236,15 +1252,10 @@ Module *HeaderSearch::loadFrameworkModule(StringRef Name,
     return nullptr;
 
   case LMM_NewlyLoaded:
-    return ModMap.findModule(Name);
+    break;
   }
 
-
-  // Try to infer a module map from the framework directory.
-  if (HSOpts->ImplicitModuleMaps)
-    return ModMap.inferFrameworkModule(Name, Dir, IsSystem, /*Parent=*/nullptr);
-
-  return nullptr;
+  return ModMap.findModule(Name);
 }
 
 
diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp
index 6c98d01..e6fe389 100644
--- a/lib/Lex/ModuleMap.cpp
+++ b/lib/Lex/ModuleMap.cpp
@@ -346,6 +346,9 @@ ModuleMap::KnownHeader ModuleMap::findModuleForHeader(const FileEntry *File) {
     ModuleMap::KnownHeader Result;
     // Iterate over all modules that 'File' is part of to find the best fit.
     for (KnownHeader &H : Known->second) {
+      // Prefer a header from the current module over all others.
+      if (H.getModule() == CompilingModule)
+        return MakeResult(H);
       // Cannot use a module if it is unavailable.
       if (!H.getModule()->isAvailable())
         continue;
@@ -580,19 +583,28 @@ static void inferFrameworkLink(Module *Mod, const DirectoryEntry *FrameworkDir,
   }
 }
 
-Module *
-ModuleMap::inferFrameworkModule(StringRef ModuleName,
-                                const DirectoryEntry *FrameworkDir,
-                                bool IsSystem,
-                                Module *Parent) {
+Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir,
+                                        bool IsSystem, Module *Parent) {
   Attributes Attrs;
   Attrs.IsSystem = IsSystem;
-  return inferFrameworkModule(ModuleName, FrameworkDir, Attrs, Parent);
+  return inferFrameworkModule(FrameworkDir, Attrs, Parent);
 }
 
-Module *ModuleMap::inferFrameworkModule(StringRef ModuleName,
-                                        const DirectoryEntry *FrameworkDir,
+Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir,
                                         Attributes Attrs, Module *Parent) {
+  // Note: as an egregious but useful hack we use the real path here, because
+  // we might be looking at an embedded framework that symlinks out to a
+  // top-level framework, and we need to infer as if we were naming the
+  // top-level framework.
+  StringRef FrameworkDirName =
+      SourceMgr.getFileManager().getCanonicalName(FrameworkDir);
+
+  // In case this is a case-insensitive filesystem, use the canonical
+  // directory name as the ModuleName, since modules are case-sensitive.
+  // FIXME: we should be able to give a fix-it hint for the correct spelling.
+  SmallString<32> ModuleNameStorage;
+  StringRef ModuleName = sanitizeFilenameAsIdentifier(
+      llvm::sys::path::stem(FrameworkDirName), ModuleNameStorage);
 
   // Check whether we've already found this module.
   if (Module *Mod = lookupModuleQualified(ModuleName, Parent))
@@ -605,20 +617,6 @@ Module *ModuleMap::inferFrameworkModule(StringRef ModuleName,
   const FileEntry *ModuleMapFile = nullptr;
   if (!Parent) {
     // Determine whether we're allowed to infer a module map.
-
-    // Note: as an egregious but useful hack we use the real path here, because
-    // we might be looking at an embedded framework that symlinks out to a
-    // top-level framework, and we need to infer as if we were naming the
-    // top-level framework.
-    StringRef FrameworkDirName
-      = SourceMgr.getFileManager().getCanonicalName(FrameworkDir);
-
-    // In case this is a case-insensitive filesystem, make sure the canonical
-    // directory name matches ModuleName exactly. Modules are case-sensitive.
-    // FIXME: we should be able to give a fix-it hint for the correct spelling.
-    if (llvm::sys::path::stem(FrameworkDirName) != ModuleName)
-      return nullptr;
-
     bool canInfer = false;
     if (llvm::sys::path::has_parent_path(FrameworkDirName)) {
       // Figure out the parent path.
@@ -744,10 +742,7 @@ Module *ModuleMap::inferFrameworkModule(StringRef ModuleName,
         continue;
 
       // FIXME: Do we want to warn about subframeworks without umbrella headers?
-      SmallString<32> NameBuf;
-      inferFrameworkModule(sanitizeFilenameAsIdentifier(
-                               llvm::sys::path::stem(Dir->path()), NameBuf),
-                           SubframeworkDir, Attrs, Result);
+      inferFrameworkModule(SubframeworkDir, Attrs, Result);
     }
   }
 
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index 33ce799..ce64538 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -1749,7 +1749,8 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
   // Ask HeaderInfo if we should enter this #include file.  If not, #including
   // this file will have no effect.
   if (ShouldEnter &&
-      !HeaderInfo.ShouldEnterIncludeFile(*this, File, isImport)) {
+      !HeaderInfo.ShouldEnterIncludeFile(*this, File, isImport,
+                                         SuggestedModule.getModule())) {
     ShouldEnter = false;
     if (Callbacks)
       Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index 1a35d32..c231e18 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp
@@ -644,11 +644,20 @@ void Preprocessor::EnterSubmodule(Module *M, SourceLocation ImportLoc) {
   if (FirstTime) {
     // Determine the set of starting macros for this submodule; take these
     // from the "null" module (the predefines buffer).
+    //
+    // FIXME: If we have local visibility but not modules enabled, the
+    // NullSubmoduleState is polluted by #defines in the top-level source
+    // file.
     auto &StartingMacros = NullSubmoduleState.Macros;
 
     // Restore to the starting state.
     // FIXME: Do this lazily, when each macro name is first referenced.
     for (auto &Macro : StartingMacros) {
+      // Skip uninteresting macros.
+      if (!Macro.second.getLatest() &&
+          Macro.second.getOverriddenMacros().empty())
+        continue;
+
       MacroState MS(Macro.second.getLatest());
       MS.setOverriddenMacros(*this, Macro.second.getOverriddenMacros());
       State.Macros.insert(std::make_pair(Macro.first, std::move(MS)));
@@ -732,6 +741,11 @@ void Preprocessor::LeaveSubmodule() {
     }
   }
 
+  // FIXME: Before we leave this submodule, we should parse all the other
+  // headers within it. Otherwise, we're left with an inconsistent state
+  // where we've made the module visible but don't yet have its complete
+  // contents.
+
   // Put back the outer module's state, if we're tracking it.
   if (getLangOpts().ModulesLocalVisibility)
     CurSubmoduleState = Info.OuterSubmoduleState;
@@ -739,6 +753,5 @@ void Preprocessor::LeaveSubmodule() {
   BuildingSubmoduleStack.pop_back();
 
   // A nested #include makes the included submodule visible.
-  if (!BuildingSubmoduleStack.empty() || !getLangOpts().ModulesLocalVisibility)
-    makeModuleVisible(LeavingMod, ImportLoc);
+  makeModuleVisible(LeavingMod, ImportLoc);
 }
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index 5e0b283..d52519e 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -1052,7 +1052,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("address_sanitizer",
             LangOpts.Sanitize.hasOneOf(SanitizerKind::Address |
                                        SanitizerKind::KernelAddress))
-      .Case("assume_nonnull", LangOpts.ObjC1 || LangOpts.GNUMode)
+      .Case("assume_nonnull", true)
       .Case("attribute_analyzer_noreturn", true)
       .Case("attribute_availability", true)
       .Case("attribute_availability_with_message", true)
@@ -1077,7 +1077,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("cxx_exceptions", LangOpts.CXXExceptions)
       .Case("cxx_rtti", LangOpts.RTTI)
       .Case("enumerator_attributes", true)
-      .Case("nullability", LangOpts.ObjC1 || LangOpts.GNUMode)
+      .Case("nullability", true)
       .Case("memory_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Memory))
       .Case("thread_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Thread))
       .Case("dataflow_sanitizer", LangOpts.Sanitize.has(SanitizerKind::DataFlow))
@@ -1102,6 +1102,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("objc_array_literals", LangOpts.ObjC2)
       .Case("objc_dictionary_literals", LangOpts.ObjC2)
       .Case("objc_boxed_expressions", LangOpts.ObjC2)
+      .Case("objc_boxed_nsvalue_expressions", LangOpts.ObjC2)
       .Case("arc_cf_code_audited", true)
       .Case("objc_bridge_id", true)
       .Case("objc_bridge_id_on_typedefs", true)
@@ -1225,7 +1226,6 @@ static bool HasExtension(const Preprocessor &PP, const IdentifierInfo *II) {
   // Because we inherit the feature list from HasFeature, this string switch
   // must be less restrictive than HasFeature's.
   return llvm::StringSwitch<bool>(Extension)
-           .Case("nullability", true)
            // C11 features supported by other languages as extensions.
            .Case("c_alignas", true)
            .Case("c_alignof", true)
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 7e33f1c..e2db638 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -286,6 +286,10 @@ Preprocessor::macro_begin(bool IncludeExternalMacros) const {
     ExternalSource->ReadDefinedMacros();
   }
 
+  // Make sure we cover all macros in visible modules.
+  for (const ModuleMacro &Macro : ModuleMacros)
+    CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
+
   return CurSubmoduleState->Macros.begin();
 }
 
diff --git a/lib/Parse/ParseCXXInlineMethods.cpp b/lib/Parse/ParseCXXInlineMethods.cpp
index ea67a52..ab1f97d 100644
--- a/lib/Parse/ParseCXXInlineMethods.cpp
+++ b/lib/Parse/ParseCXXInlineMethods.cpp
@@ -27,7 +27,7 @@ NamedDecl *Parser::ParseCXXInlineMethodDef(AccessSpecifier AS,
                                       ParsingDeclarator &D,
                                       const ParsedTemplateInfo &TemplateInfo,
                                       const VirtSpecifiers& VS,
-                                      ExprResult& Init) {
+                                      SourceLocation PureSpecLoc) {
   assert(D.isFunctionDeclarator() && "This isn't a function declarator!");
   assert(Tok.isOneOf(tok::l_brace, tok::colon, tok::kw_try, tok::equal) &&
          "Current token not a '{', ':', '=', or 'try'!");
@@ -47,12 +47,8 @@ NamedDecl *Parser::ParseCXXInlineMethodDef(AccessSpecifier AS,
                                            VS, ICIS_NoInit);
     if (FnD) {
       Actions.ProcessDeclAttributeList(getCurScope(), FnD, AccessAttrs);
-      bool TypeSpecContainsAuto = D.getDeclSpec().containsPlaceholderType();
-      if (Init.isUsable())
-        Actions.AddInitializerToDecl(FnD, Init.get(), false,
-                                     TypeSpecContainsAuto);
-      else
-        Actions.ActOnUninitializedDecl(FnD, TypeSpecContainsAuto);
+      if (PureSpecLoc.isValid())
+        Actions.ActOnPureSpecifier(FnD, PureSpecLoc);
     }
   }
 
diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp
index 1c52552..d843e80 100644
--- a/lib/Parse/ParseDecl.cpp
+++ b/lib/Parse/ParseDecl.cpp
@@ -691,9 +691,9 @@ void Parser::ParseNullabilityTypeSpecifiers(ParsedAttributes &attrs) {
   // Treat these like attributes, even though they're type specifiers.
   while (true) {
     switch (Tok.getKind()) {
-    case tok::kw___nonnull:
-    case tok::kw___nullable:
-    case tok::kw___null_unspecified: {
+    case tok::kw__Nonnull:
+    case tok::kw__Nullable:
+    case tok::kw__Null_unspecified: {
       IdentifierInfo *AttrName = Tok.getIdentifierInfo();
       SourceLocation AttrNameLoc = ConsumeToken();
       if (!getLangOpts().ObjC1)
@@ -2173,7 +2173,7 @@ void Parser::ParseSpecifierQualifierList(DeclSpec &DS, AccessSpecifier AS,
   }
 
   // Issue diagnostic and remove constexpr specfier if present.
-  if (DS.isConstexprSpecified()) {
+  if (DS.isConstexprSpecified() && DSC != DSC_condition) {
     Diag(DS.getConstexprSpecLoc(), diag::err_typename_invalid_constexpr);
     DS.ClearConstexprSpec();
   }
@@ -3076,9 +3076,9 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       continue;
 
     // Nullability type specifiers.
-    case tok::kw___nonnull:
-    case tok::kw___nullable:
-    case tok::kw___null_unspecified:
+    case tok::kw__Nonnull:
+    case tok::kw__Nullable:
+    case tok::kw__Null_unspecified:
       ParseNullabilityTypeSpecifiers(DS.getAttributes());
       continue;
 
@@ -3192,6 +3192,11 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       isInvalid = DS.SetConstexprSpec(Loc, PrevSpec, DiagID);
       break;
 
+    // concept
+    case tok::kw_concept:
+      isInvalid = DS.SetConceptSpec(Loc, PrevSpec, DiagID);
+      break;
+
     // type-specifier
     case tok::kw_short:
       isInvalid = DS.SetTypeSpecWidth(DeclSpec::TSW_short, Loc, PrevSpec,
@@ -4326,9 +4331,9 @@ bool Parser::isTypeSpecifierQualifier() {
   case tok::kw___pascal:
   case tok::kw___unaligned:
 
-  case tok::kw___nonnull:
-  case tok::kw___nullable:
-  case tok::kw___null_unspecified:
+  case tok::kw__Nonnull:
+  case tok::kw__Nullable:
+  case tok::kw__Null_unspecified:
 
   case tok::kw___private:
   case tok::kw___local:
@@ -4475,6 +4480,9 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
   case tok::annot_decltype:
   case tok::kw_constexpr:
 
+    // C++ Concepts TS - concept
+  case tok::kw_concept:
+
     // C11 _Atomic
   case tok::kw__Atomic:
     return true;
@@ -4503,9 +4511,9 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
   case tok::kw___pascal:
   case tok::kw___unaligned:
 
-  case tok::kw___nonnull:
-  case tok::kw___nullable:
-  case tok::kw___null_unspecified:
+  case tok::kw__Nonnull:
+  case tok::kw__Nullable:
+  case tok::kw__Null_unspecified:
 
   case tok::kw___private:
   case tok::kw___local:
@@ -4738,9 +4746,9 @@ void Parser::ParseTypeQualifierListOpt(DeclSpec &DS, unsigned AttrReqs,
       goto DoneWithTypeQuals;
 
     // Nullability type specifiers.
-    case tok::kw___nonnull:
-    case tok::kw___nullable:
-    case tok::kw___null_unspecified:
+    case tok::kw__Nonnull:
+    case tok::kw__Nullable:
+    case tok::kw__Null_unspecified:
       ParseNullabilityTypeSpecifiers(DS.getAttributes());
       continue;
 
diff --git a/lib/Parse/ParseDeclCXX.cpp b/lib/Parse/ParseDeclCXX.cpp
index 47778b8..568896d 100644
--- a/lib/Parse/ParseDeclCXX.cpp
+++ b/lib/Parse/ParseDeclCXX.cpp
@@ -2372,8 +2372,28 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
   LateParsedAttrList LateParsedAttrs;
 
   SourceLocation EqualLoc;
-  bool HasInitializer = false;
-  ExprResult Init;
+  SourceLocation PureSpecLoc;
+
+  auto TryConsumePureSpecifier = [&] (bool AllowDefinition) {
+    if (Tok.isNot(tok::equal))
+      return false;
+
+    auto &Zero = NextToken();
+    SmallString<8> Buffer;
+    if (Zero.isNot(tok::numeric_constant) || Zero.getLength() != 1 ||
+        PP.getSpelling(Zero, Buffer) != "0")
+      return false;
+
+    auto &After = GetLookAheadToken(2);
+    if (!After.isOneOf(tok::semi, tok::comma) &&
+        !(AllowDefinition &&
+          After.isOneOf(tok::l_brace, tok::colon, tok::kw_try)))
+      return false;
+
+    EqualLoc = ConsumeToken();
+    PureSpecLoc = ConsumeToken();
+    return true;
+  };
 
   SmallVector<Decl *, 8> DeclsInGroup;
   ExprResult BitfieldSize;
@@ -2390,16 +2410,8 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
   if (BitfieldSize.isUnset()) {
     // MSVC permits pure specifier on inline functions defined at class scope.
     // Hence check for =0 before checking for function definition.
-    if (getLangOpts().MicrosoftExt && Tok.is(tok::equal) &&
-        DeclaratorInfo.isFunctionDeclarator() &&
-        NextToken().is(tok::numeric_constant)) {
-      EqualLoc = ConsumeToken();
-      Init = ParseInitializer();
-      if (Init.isInvalid())
-        SkipUntil(tok::comma, StopAtSemi | StopBeforeMatch);
-      else
-        HasInitializer = true;
-    }
+    if (getLangOpts().MicrosoftExt && DeclaratorInfo.isDeclarationOfFunction())
+      TryConsumePureSpecifier(/*AllowDefinition*/ true);
 
     FunctionDefinitionKind DefinitionKind = FDK_Declaration;
     // function-definition:
@@ -2453,7 +2465,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
 
       Decl *FunDecl =
         ParseCXXInlineMethodDef(AS, AccessAttrs, DeclaratorInfo, TemplateInfo,
-                                VS, Init);
+                                VS, PureSpecLoc);
 
       if (FunDecl) {
         for (unsigned i = 0, ni = CommonLateParsedAttrs.size(); i < ni; ++i) {
@@ -2479,16 +2491,25 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
 
   while (1) {
     InClassInitStyle HasInClassInit = ICIS_NoInit;
-    if (Tok.isOneOf(tok::equal, tok::l_brace) && !HasInitializer) {
+    bool HasStaticInitializer = false;
+    if (Tok.isOneOf(tok::equal, tok::l_brace) && PureSpecLoc.isInvalid()) {
       if (BitfieldSize.get()) {
         Diag(Tok, diag::err_bitfield_member_init);
         SkipUntil(tok::comma, StopAtSemi | StopBeforeMatch);
+      } else if (DeclaratorInfo.isDeclarationOfFunction()) {
+        // It's a pure-specifier.
+        if (!TryConsumePureSpecifier(/*AllowFunctionDefinition*/ false))
+          // Parse it as an expression so that Sema can diagnose it.
+          HasStaticInitializer = true;
+      } else if (DeclaratorInfo.getDeclSpec().getStorageClassSpec() !=
+                     DeclSpec::SCS_static &&
+                 DeclaratorInfo.getDeclSpec().getStorageClassSpec() !=
+                     DeclSpec::SCS_typedef &&
+                 !DS.isFriendSpecified()) {
+        // It's a default member initializer.
+        HasInClassInit = Tok.is(tok::equal) ? ICIS_CopyInit : ICIS_ListInit;
       } else {
-        HasInitializer = true;
-        if (!DeclaratorInfo.isDeclarationOfFunction() &&
-            DeclaratorInfo.getDeclSpec().getStorageClassSpec()
-              != DeclSpec::SCS_typedef)
-          HasInClassInit = Tok.is(tok::equal) ? ICIS_CopyInit : ICIS_ListInit;
+        HasStaticInitializer = true;
       }
     }
 
@@ -2528,10 +2549,20 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
         Actions.ProcessDeclAttributeList(getCurScope(), ThisDecl, AccessAttrs);
     }
 
-    // Handle the initializer.
+    // Error recovery might have converted a non-static member into a static
+    // member.
     if (HasInClassInit != ICIS_NoInit &&
-        DeclaratorInfo.getDeclSpec().getStorageClassSpec() !=
-        DeclSpec::SCS_static) {
+        DeclaratorInfo.getDeclSpec().getStorageClassSpec() ==
+            DeclSpec::SCS_static) {
+      HasInClassInit = ICIS_NoInit;
+      HasStaticInitializer = true;
+    }
+
+    if (ThisDecl && PureSpecLoc.isValid())
+      Actions.ActOnPureSpecifier(ThisDecl, PureSpecLoc);
+
+    // Handle the initializer.
+    if (HasInClassInit != ICIS_NoInit) {
       // The initializer was deferred; parse it and cache the tokens.
       Diag(Tok, getLangOpts().CPlusPlus11
                     ? diag::warn_cxx98_compat_nonstatic_member_init
@@ -2551,11 +2582,10 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
           ThisDecl->setInvalidDecl();
       } else
         ParseCXXNonStaticMemberInitializer(ThisDecl);
-    } else if (HasInitializer) {
+    } else if (HasStaticInitializer) {
       // Normal initializer.
-      if (!Init.isUsable())
-        Init = ParseCXXMemberInitializer(
-            ThisDecl, DeclaratorInfo.isDeclarationOfFunction(), EqualLoc);
+      ExprResult Init = ParseCXXMemberInitializer(
+          ThisDecl, DeclaratorInfo.isDeclarationOfFunction(), EqualLoc);
 
       if (Init.isInvalid())
         SkipUntil(tok::comma, StopAtSemi | StopBeforeMatch);
@@ -2608,8 +2638,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
     DeclaratorInfo.clear();
     VS.clear();
     BitfieldSize = ExprResult(/*Invalid=*/false);
-    Init = ExprResult(/*Invalid=*/false);
-    HasInitializer = false;
+    EqualLoc = PureSpecLoc = SourceLocation();
     DeclaratorInfo.setCommaLoc(CommaLoc);
 
     // GNU attributes are allowed before the second and subsequent declarator.
@@ -2632,13 +2661,11 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
   Actions.FinalizeDeclaratorGroup(getCurScope(), DS, DeclsInGroup);
 }
 
-/// ParseCXXMemberInitializer - Parse the brace-or-equal-initializer or
-/// pure-specifier. Also detect and reject any attempted defaulted/deleted
-/// function definition. The location of the '=', if any, will be placed in
-/// EqualLoc.
+/// ParseCXXMemberInitializer - Parse the brace-or-equal-initializer.
+/// Also detect and reject any attempted defaulted/deleted function definition.
+/// The location of the '=', if any, will be placed in EqualLoc.
 ///
-///   pure-specifier:
-///     '= 0'
+/// This does not check for a pure-specifier; that's handled elsewhere.
 ///
 ///   brace-or-equal-initializer:
 ///     '=' initializer-expression
@@ -2742,6 +2769,11 @@ void Parser::SkipCXXMemberSpecification(SourceLocation RecordLoc,
   BalancedDelimiterTracker T(*this, tok::l_brace);
   T.consumeOpen();
   T.skipToEnd();
+
+  // Parse and discard any trailing attributes.
+  ParsedAttributes Attrs(AttrFactory);
+  if (Tok.is(tok::kw___attribute))
+    MaybeParseGNUAttributes(Attrs);
 }
 
 /// ParseCXXMemberSpecification - Parse the class definition.
@@ -3317,13 +3349,16 @@ Parser::tryParseExceptionSpecification(bool Delayed,
     T.consumeOpen();
     NoexceptType = EST_ComputedNoexcept;
     NoexceptExpr = ParseConstantExpression();
+    T.consumeClose();
     // The argument must be contextually convertible to bool. We use
     // ActOnBooleanCondition for this purpose.
-    if (!NoexceptExpr.isInvalid())
+    if (!NoexceptExpr.isInvalid()) {
       NoexceptExpr = Actions.ActOnBooleanCondition(getCurScope(), KeywordLoc,
                                                    NoexceptExpr.get());
-    T.consumeClose();
-    NoexceptRange = SourceRange(KeywordLoc, T.getCloseLocation());
+      NoexceptRange = SourceRange(KeywordLoc, T.getCloseLocation());
+    } else {
+      NoexceptType = EST_None;
+    }
   } else {
     // There is no argument.
     NoexceptType = EST_BasicNoexcept;
diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp
index da759c7..b866798 100644
--- a/lib/Parse/ParseExpr.cpp
+++ b/lib/Parse/ParseExpr.cpp
@@ -205,6 +205,24 @@ ExprResult Parser::ParseConstantExpression(TypeCastState isTypeCast) {
   return Actions.ActOnConstantExpression(Res);
 }
 
+/// \brief Parse a constraint-expression.
+///
+/// \verbatim
+///       constraint-expression: [Concepts TS temp.constr.decl p1]
+///         logical-or-expression
+/// \endverbatim
+ExprResult Parser::ParseConstraintExpression() {
+  // FIXME: this may erroneously consume a function-body as the braced
+  // initializer list of a compound literal
+  //
+  // FIXME: this may erroneously consume a parenthesized rvalue reference
+  // declarator as a parenthesized address-of-label expression
+  ExprResult LHS(ParseCastExpression(/*isUnaryExpression=*/false));
+  ExprResult Res(ParseRHSOfBinaryExpression(LHS, prec::LogicalOr));
+
+  return Res;
+}
+
 bool Parser::isNotExpressionStart() {
   tok::TokenKind K = Tok.getKind();
   if (K == tok::l_brace || K == tok::r_brace  ||
@@ -1042,6 +1060,8 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
   case tok::kw_sizeof:     // unary-expression: 'sizeof' unary-expression
                            // unary-expression: 'sizeof' '(' type-name ')'
   case tok::kw_vec_step:   // unary-expression: OpenCL 'vec_step' expression
+  // unary-expression: '__builtin_omp_required_simd_align' '(' type-name ')'
+  case tok::kw___builtin_omp_required_simd_align:
     return ParseUnaryExprOrTypeTraitExpression();
   case tok::ampamp: {      // unary-expression: '&&' identifier
     SourceLocation AmpAmpLoc = ConsumeToken();
@@ -1636,8 +1656,9 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok,
                                            ParsedType &CastTy,
                                            SourceRange &CastRange) {
 
-  assert(OpTok.isOneOf(tok::kw_typeof,  tok::kw_sizeof,   tok::kw___alignof,
-                       tok::kw_alignof, tok::kw__Alignof, tok::kw_vec_step) &&
+  assert(OpTok.isOneOf(tok::kw_typeof, tok::kw_sizeof, tok::kw___alignof,
+                       tok::kw_alignof, tok::kw__Alignof, tok::kw_vec_step,
+                       tok::kw___builtin_omp_required_simd_align) &&
          "Not a typeof/sizeof/alignof/vec_step expression!");
 
   ExprResult Operand;
@@ -1722,7 +1743,8 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok,
 /// \endverbatim
 ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
   assert(Tok.isOneOf(tok::kw_sizeof, tok::kw___alignof, tok::kw_alignof,
-                     tok::kw__Alignof, tok::kw_vec_step) &&
+                     tok::kw__Alignof, tok::kw_vec_step,
+                     tok::kw___builtin_omp_required_simd_align) &&
          "Not a sizeof/alignof/vec_step expression!");
   Token OpTok = Tok;
   ConsumeToken();
@@ -1792,6 +1814,8 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
     ExprKind = UETT_AlignOf;
   else if (OpTok.is(tok::kw_vec_step))
     ExprKind = UETT_VecStep;
+  else if (OpTok.is(tok::kw___builtin_omp_required_simd_align))
+    ExprKind = UETT_OpenMPRequiredSimdAlign;
 
   if (isCastExpr)
     return Actions.ActOnUnaryExprOrTypeTraitExpr(OpTok.getLocation(),
diff --git a/lib/Parse/ParseExprCXX.cpp b/lib/Parse/ParseExprCXX.cpp
index 7fb4b5e..02176c4 100644
--- a/lib/Parse/ParseExprCXX.cpp
+++ b/lib/Parse/ParseExprCXX.cpp
@@ -1686,7 +1686,7 @@ bool Parser::ParseCXXCondition(ExprResult &ExprOut,
   // type-specifier-seq
   DeclSpec DS(AttrFactory);
   DS.takeAttributesFrom(attrs);
-  ParseSpecifierQualifierList(DS);
+  ParseSpecifierQualifierList(DS, AS_none, DSC_condition);
 
   // declarator
   Declarator DeclaratorInfo(DS, Declarator::ConditionContext);
diff --git a/lib/Parse/ParseObjc.cpp b/lib/Parse/ParseObjc.cpp
index e4f7911..f975f87 100644
--- a/lib/Parse/ParseObjc.cpp
+++ b/lib/Parse/ParseObjc.cpp
@@ -557,14 +557,14 @@ static void diagnoseRedundantPropertyNullability(Parser &P,
                                                  SourceLocation nullabilityLoc){
   if (DS.getNullability() == nullability) {
     P.Diag(nullabilityLoc, diag::warn_nullability_duplicate)
-      << static_cast<unsigned>(nullability) << true
+      << DiagNullabilityKind(nullability, true)
       << SourceRange(DS.getNullabilityLoc());
     return;
   }
 
   P.Diag(nullabilityLoc, diag::err_nullability_conflicting)
-    << static_cast<unsigned>(nullability) << true
-    << static_cast<unsigned>(DS.getNullability()) << true
+    << DiagNullabilityKind(nullability, true)
+    << DiagNullabilityKind(DS.getNullability(), true)
     << SourceRange(DS.getNullabilityLoc());
 }
 
diff --git a/lib/Parse/ParseOpenMP.cpp b/lib/Parse/ParseOpenMP.cpp
index a3c3c5d..0113a31 100644
--- a/lib/Parse/ParseOpenMP.cpp
+++ b/lib/Parse/ParseOpenMP.cpp
@@ -30,24 +30,39 @@ static OpenMPDirectiveKind ParseOpenMPDirectiveKind(Parser &P) {
   // E.g.: OMPD_for OMPD_simd ===> OMPD_for_simd
   // TODO: add other combined directives in topological order.
   const OpenMPDirectiveKind F[][3] = {
-    { OMPD_for, OMPD_simd, OMPD_for_simd },
-    { OMPD_parallel, OMPD_for, OMPD_parallel_for },
-    { OMPD_parallel_for, OMPD_simd, OMPD_parallel_for_simd },
-    { OMPD_parallel, OMPD_sections, OMPD_parallel_sections }
-  };
+      {OMPD_unknown /*cancellation*/, OMPD_unknown /*point*/,
+       OMPD_cancellation_point},
+      {OMPD_for, OMPD_simd, OMPD_for_simd},
+      {OMPD_parallel, OMPD_for, OMPD_parallel_for},
+      {OMPD_parallel_for, OMPD_simd, OMPD_parallel_for_simd},
+      {OMPD_parallel, OMPD_sections, OMPD_parallel_sections}};
   auto Tok = P.getCurToken();
   auto DKind =
       Tok.isAnnotation()
           ? OMPD_unknown
           : getOpenMPDirectiveKind(P.getPreprocessor().getSpelling(Tok));
+  bool TokenMatched = false;
   for (unsigned i = 0; i < llvm::array_lengthof(F); ++i) {
-    if (DKind == F[i][0]) {
+    if (!Tok.isAnnotation() && DKind == OMPD_unknown) {
+      TokenMatched =
+          (i == 0) &&
+          !P.getPreprocessor().getSpelling(Tok).compare("cancellation");
+    } else {
+      TokenMatched = DKind == F[i][0] && DKind != OMPD_unknown;
+    }
+    if (TokenMatched) {
       Tok = P.getPreprocessor().LookAhead(0);
       auto SDKind =
           Tok.isAnnotation()
               ? OMPD_unknown
               : getOpenMPDirectiveKind(P.getPreprocessor().getSpelling(Tok));
-      if (SDKind == F[i][1]) {
+      if (!Tok.isAnnotation() && DKind == OMPD_unknown) {
+        TokenMatched =
+            (i == 0) && !P.getPreprocessor().getSpelling(Tok).compare("point");
+      } else {
+        TokenMatched = SDKind == F[i][1] && SDKind != OMPD_unknown;
+      }
+      if (TokenMatched) {
         P.ConsumeToken();
         DKind = F[i][2];
       }
@@ -110,6 +125,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirective() {
   case OMPD_atomic:
   case OMPD_target:
   case OMPD_teams:
+  case OMPD_cancellation_point:
+  case OMPD_cancel:
     Diag(Tok, diag::err_omp_unexpected_directive)
         << getOpenMPDirectiveName(DKind);
     break;
@@ -145,6 +162,7 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
       Scope::FnScope | Scope::DeclScope | Scope::OpenMPDirectiveScope;
   SourceLocation Loc = ConsumeToken(), EndLoc;
   auto DKind = ParseOpenMPDirectiveKind(*this);
+  OpenMPDirectiveKind CancelRegion = OMPD_unknown;
   // Name of critical directive.
   DeclarationNameInfo DirName;
   StmtResult Directive = StmtError();
@@ -178,6 +196,8 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
   case OMPD_taskyield:
   case OMPD_barrier:
   case OMPD_taskwait:
+  case OMPD_cancellation_point:
+  case OMPD_cancel:
     if (!StandAloneAllowed) {
       Diag(Tok, diag::err_omp_immediate_directive)
           << getOpenMPDirectiveName(DKind);
@@ -217,6 +237,10 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
         }
         T.consumeClose();
       }
+    } else if (DKind == OMPD_cancellation_point || DKind == OMPD_cancel) {
+      CancelRegion = ParseOpenMPDirectiveKind(*this);
+      if (Tok.isNot(tok::annot_pragma_openmp_end))
+        ConsumeToken();
     }
 
     if (isOpenMPLoopDirective(DKind))
@@ -226,13 +250,13 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
     ParseScope OMPDirectiveScope(this, ScopeFlags);
     Actions.StartOpenMPDSABlock(DKind, DirName, Actions.getCurScope(), Loc);
 
-    Actions.StartOpenMPClauses();
     while (Tok.isNot(tok::annot_pragma_openmp_end)) {
       OpenMPClauseKind CKind =
           Tok.isAnnotation()
               ? OMPC_unknown
               : FlushHasClause ? OMPC_flush
                                : getOpenMPClauseKind(PP.getSpelling(Tok));
+      Actions.StartOpenMPClause(CKind);
       FlushHasClause = false;
       OMPClause *Clause =
           ParseOpenMPClause(DKind, CKind, !FirstClauses[CKind].getInt());
@@ -245,8 +269,8 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
       // Skip ',' if any.
       if (Tok.is(tok::comma))
         ConsumeToken();
+      Actions.EndOpenMPClause();
     }
-    Actions.EndOpenMPClauses();
     // End location of the directive.
     EndLoc = Tok.getLocation();
     // Consume final annot_pragma_openmp_end.
@@ -267,7 +291,8 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
     }
     if (CreateDirective)
       Directive = Actions.ActOnOpenMPExecutableDirective(
-          DKind, DirName, Clauses, AssociatedStmt.get(), Loc, EndLoc);
+          DKind, DirName, CancelRegion, Clauses, AssociatedStmt.get(), Loc,
+          EndLoc);
 
     // Exit scope.
     Actions.EndOpenMPDSABlock(Directive.get());
@@ -453,6 +478,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
   case OMPC_copyin:
   case OMPC_copyprivate:
   case OMPC_flush:
+  case OMPC_depend:
     Clause = ParseOpenMPVarListClause(CKind);
     break;
   case OMPC_unknown:
@@ -674,6 +700,8 @@ static bool ParseReductionId(Parser &P, CXXScopeSpec &ReductionIdScopeSpec,
 ///       'copyprivate' '(' list ')'
 ///    flush-clause:
 ///       'flush' '(' list ')'
+///    depend-clause:
+///       'depend' '(' in | out | inout : list ')'
 ///
 OMPClause *Parser::ParseOpenMPVarListClause(OpenMPClauseKind Kind) {
   SourceLocation Loc = Tok.getLocation();
@@ -683,6 +711,9 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPClauseKind Kind) {
   CXXScopeSpec ReductionIdScopeSpec;
   UnqualifiedId ReductionId;
   bool InvalidReductionId = false;
+  OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown;
+  SourceLocation DepLoc;
+
   // Parse '('.
   BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
   if (T.expectAndConsume(diag::err_expected_lparen_after,
@@ -706,10 +737,30 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPClauseKind Kind) {
     } else {
       Diag(Tok, diag::warn_pragma_expected_colon) << "reduction identifier";
     }
+  } else if (Kind == OMPC_depend) {
+  // Handle dependency type for depend clause.
+    ColonProtectionRAIIObject ColonRAII(*this);
+    DepKind = static_cast<OpenMPDependClauseKind>(getOpenMPSimpleClauseType(
+        Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : ""));
+    DepLoc = Tok.getLocation();
+
+    if (DepKind == OMPC_DEPEND_unknown) {
+      SkipUntil(tok::colon, tok::r_paren, tok::annot_pragma_openmp_end,
+                StopBeforeMatch);
+    } else {
+      ConsumeToken();
+    }
+    if (Tok.is(tok::colon)) {
+      ColonLoc = ConsumeToken();
+    } else {
+      Diag(Tok, diag::warn_pragma_expected_colon) << "dependency type";
+    }
   }
 
   SmallVector<Expr *, 5> Vars;
-  bool IsComma = !InvalidReductionId;
+  bool IsComma = ((Kind != OMPC_reduction) && (Kind != OMPC_depend)) ||
+                 ((Kind == OMPC_reduction) && !InvalidReductionId) ||
+                 ((Kind == OMPC_depend) && DepKind != OMPC_DEPEND_unknown);
   const bool MayHaveTail = (Kind == OMPC_linear || Kind == OMPC_aligned);
   while (IsComma || (Tok.isNot(tok::r_paren) && Tok.isNot(tok::colon) &&
                      Tok.isNot(tok::annot_pragma_openmp_end))) {
@@ -753,13 +804,16 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPClauseKind Kind) {
 
   // Parse ')'.
   T.consumeClose();
-  if (Vars.empty() || (MustHaveTail && !TailExpr) || InvalidReductionId)
+  if ((Kind == OMPC_depend && DepKind != OMPC_DEPEND_unknown && Vars.empty()) ||
+      (Kind != OMPC_depend && Vars.empty()) || (MustHaveTail && !TailExpr) ||
+      InvalidReductionId)
     return nullptr;
 
   return Actions.ActOnOpenMPVarListClause(
       Kind, Vars, TailExpr, Loc, LOpen, ColonLoc, Tok.getLocation(),
       ReductionIdScopeSpec,
       ReductionId.isValid() ? Actions.GetNameFromUnqualifiedId(ReductionId)
-                            : DeclarationNameInfo());
+                            : DeclarationNameInfo(),
+      DepKind, DepLoc);
 }
 
diff --git a/lib/Parse/ParseTemplate.cpp b/lib/Parse/ParseTemplate.cpp
index a811678..2a9becb 100644
--- a/lib/Parse/ParseTemplate.cpp
+++ b/lib/Parse/ParseTemplate.cpp
@@ -116,7 +116,7 @@ Parser::ParseTemplateDeclarationOrSpecialization(unsigned Context,
     SmallVector<Decl*, 4> TemplateParams;
     if (ParseTemplateParameters(CurTemplateDepthTracker.getDepth(),
                                 TemplateParams, LAngleLoc, RAngleLoc)) {
-      // Skip until the semi-colon or a }.
+      // Skip until the semi-colon or a '}'.
       SkipUntil(tok::r_brace, StopAtSemi | StopBeforeMatch);
       TryConsumeToken(tok::semi);
       return nullptr;
@@ -132,6 +132,17 @@ Parser::ParseTemplateDeclarationOrSpecialization(unsigned Context,
     if (!TemplateParams.empty()) {
       isSpecialization = false;
       ++CurTemplateDepthTracker;
+
+      if (TryConsumeToken(tok::kw_requires)) {
+        ExprResult ER =
+            Actions.CorrectDelayedTyposInExpr(ParseConstraintExpression());
+        if (!ER.isUsable()) {
+          // Skip until the semi-colon or a '}'.
+          SkipUntil(tok::r_brace, StopAtSemi | StopBeforeMatch);
+          TryConsumeToken(tok::semi);
+          return nullptr;
+        }
+      }
     } else {
       LastParamListWasEmpty = true;
     }
diff --git a/lib/Parse/ParseTentative.cpp b/lib/Parse/ParseTentative.cpp
index d63cf24..368cb93 100644
--- a/lib/Parse/ParseTentative.cpp
+++ b/lib/Parse/ParseTentative.cpp
@@ -632,8 +632,8 @@ Parser::TPResult Parser::TryParsePtrOperatorSeq() {
       // ptr-operator
       ConsumeToken();
       while (Tok.isOneOf(tok::kw_const, tok::kw_volatile, tok::kw_restrict,
-                         tok::kw___nonnull, tok::kw___nullable,
-                         tok::kw___null_unspecified))
+                         tok::kw__Nonnull, tok::kw__Nullable,
+                         tok::kw__Null_unspecified))
         ConsumeToken();
     } else {
       return TPResult::True;
@@ -1213,9 +1213,11 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
     //   'friend'
     //   'typedef'
     //   'constexpr'
+    //   'concept'
   case tok::kw_friend:
   case tok::kw_typedef:
   case tok::kw_constexpr:
+  case tok::kw_concept:
     // storage-class-specifier
   case tok::kw_register:
   case tok::kw_static:
@@ -1276,9 +1278,9 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
   case tok::kw___ptr32:
   case tok::kw___forceinline:
   case tok::kw___unaligned:
-  case tok::kw___nonnull:
-  case tok::kw___nullable:
-  case tok::kw___null_unspecified:
+  case tok::kw__Nonnull:
+  case tok::kw__Nullable:
+  case tok::kw__Null_unspecified:
     return TPResult::True;
 
     // Borland
diff --git a/lib/Sema/AnalysisBasedWarnings.cpp b/lib/Sema/AnalysisBasedWarnings.cpp
index 36030b9..f2ff48a 100644
--- a/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/lib/Sema/AnalysisBasedWarnings.cpp
@@ -130,11 +130,10 @@ public:
       return true;
 
     // Recurse to children.
-    for (ConstStmtRange SubStmts = E->children(); SubStmts; ++SubStmts)
-      if (*SubStmts)
-        if (const Expr *SubExpr = dyn_cast<Expr>(*SubStmts))
-          if (HasMacroID(SubExpr))
-            return true;
+    for (const Stmt *SubStmt : E->children())
+      if (const Expr *SubExpr = dyn_cast_or_null<Expr>(SubStmt))
+        if (HasMacroID(SubExpr))
+          return true;
 
     return false;
   }
diff --git a/lib/Sema/DeclSpec.cpp b/lib/Sema/DeclSpec.cpp
index 1e7fc75..d5c8871 100644
--- a/lib/Sema/DeclSpec.cpp
+++ b/lib/Sema/DeclSpec.cpp
@@ -893,6 +893,18 @@ bool DeclSpec::SetConstexprSpec(SourceLocation Loc, const char *&PrevSpec,
   return false;
 }
 
+bool DeclSpec::SetConceptSpec(SourceLocation Loc, const char *&PrevSpec,
+                              unsigned &DiagID) {
+  if (Concept_specified) {
+    DiagID = diag::ext_duplicate_declspec;
+    PrevSpec = "concept";
+    return true;
+  }
+  Concept_specified = true;
+  ConceptLoc = Loc;
+  return false;
+}
+
 void DeclSpec::setProtocolQualifiers(Decl * const *Protos,
                                      unsigned NP,
                                      SourceLocation *ProtoLocs,
diff --git a/lib/Sema/JumpDiagnostics.cpp b/lib/Sema/JumpDiagnostics.cpp
index 6b9eb2a..775fe85 100644
--- a/lib/Sema/JumpDiagnostics.cpp
+++ b/lib/Sema/JumpDiagnostics.cpp
@@ -372,13 +372,12 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S, unsigned &origParentScope)
     break;
   }
 
-  for (Stmt::child_range CI = S->children(); CI; ++CI) {
+  for (Stmt *SubStmt : S->children()) {
     if (SkipFirstSubStmt) {
       SkipFirstSubStmt = false;
       continue;
     }
 
-    Stmt *SubStmt = *CI;
     if (!SubStmt) continue;
 
     // Cases, labels, and defaults aren't "scope parents".  It's also
diff --git a/lib/Sema/Sema.cpp b/lib/Sema/Sema.cpp
index db1251f..18d7e9d 100644
--- a/lib/Sema/Sema.cpp
+++ b/lib/Sema/Sema.cpp
@@ -91,8 +91,9 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
     LateTemplateParserCleanup(nullptr),
     OpaqueParser(nullptr), IdResolver(pp), StdInitializerList(nullptr),
     CXXTypeInfoDecl(nullptr), MSVCGuidDecl(nullptr),
-    NSNumberDecl(nullptr),
+    NSNumberDecl(nullptr), NSValueDecl(nullptr),
     NSStringDecl(nullptr), StringWithUTF8StringMethod(nullptr),
+    ValueWithBytesObjCTypeMethod(nullptr),
     NSArrayDecl(nullptr), ArrayWithObjectsMethod(nullptr),
     NSDictionaryDecl(nullptr), DictionaryWithObjectsMethod(nullptr),
     MSAsmLabelNameCounter(0),
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index f76727c..bdfff80 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -1031,6 +1031,8 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   unsigned i = 0, l = 0, u = 0;
   switch (BuiltinID) {
   default: return false;
+  case X86::BI__builtin_cpu_supports:
+    return SemaBuiltinCpuSupports(TheCall);
   case X86::BI_mm_prefetch: i = 1; l = 0; u = 3; break;
   case X86::BI__builtin_ia32_sha1rnds4: i = 2, l = 0; u = 3; break;
   case X86::BI__builtin_ia32_vpermil2pd:
@@ -2782,6 +2784,26 @@ bool Sema::SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
   return false;
 }
 
+/// SemaBuiltinCpuSupports - Handle __builtin_cpu_supports(char *).
+/// This checks that the target supports __builtin_cpu_supports and
+/// that the string argument is constant and valid.
+bool Sema::SemaBuiltinCpuSupports(CallExpr *TheCall) {
+  Expr *Arg = TheCall->getArg(0);
+
+  // Check if the argument is a string literal.
+  if (!isa<StringLiteral>(Arg->IgnoreParenImpCasts()))
+    return Diag(TheCall->getLocStart(), diag::err_expr_not_string_literal)
+           << Arg->getSourceRange();
+
+  // Check the contents of the string.
+  StringRef Feature =
+      cast<StringLiteral>(Arg->IgnoreParenImpCasts())->getString();
+  if (!Context.getTargetInfo().validateCpuSupports(Feature))
+    return Diag(TheCall->getLocStart(), diag::err_invalid_cpu_supports)
+           << Arg->getSourceRange();
+  return false;
+}
+
 /// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val).
 /// This checks that the target supports __builtin_longjmp and
 /// that val is a constant 1.
@@ -3550,8 +3572,18 @@ public:
                          const char *startSpecifier, unsigned specifierLen);
   bool checkForCStrMembers(const analyze_printf::ArgType &AT,
                            const Expr *E);
+                           
+  void HandleEmptyObjCModifierFlag(const char *startFlag,
+                                   unsigned flagLen) override;
 
-};  
+  void HandleInvalidObjCModifierFlag(const char *startFlag,
+                                            unsigned flagLen) override;
+
+  void HandleObjCFlagsWithNonObjCConversion(const char *flagsStart,
+                                           const char *flagsEnd,
+                                           const char *conversionPosition) 
+                                             override;
+};
 }
 
 bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier(
@@ -3671,6 +3703,41 @@ void CheckPrintfHandler::HandleIgnoredFlag(
                          getSpecifierRange(ignoredFlag.getPosition(), 1)));
 }
 
+//  void EmitFormatDiagnostic(PartialDiagnostic PDiag, SourceLocation StringLoc,
+//                            bool IsStringLocation, Range StringRange,
+//                            ArrayRef<FixItHint> Fixit = None);
+                            
+void CheckPrintfHandler::HandleEmptyObjCModifierFlag(const char *startFlag,
+                                                     unsigned flagLen) {
+  // Warn about an empty flag.
+  EmitFormatDiagnostic(S.PDiag(diag::warn_printf_empty_objc_flag),
+                       getLocationOfByte(startFlag),
+                       /*IsStringLocation*/true,
+                       getSpecifierRange(startFlag, flagLen));
+}
+
+void CheckPrintfHandler::HandleInvalidObjCModifierFlag(const char *startFlag,
+                                                       unsigned flagLen) {
+  // Warn about an invalid flag.
+  auto Range = getSpecifierRange(startFlag, flagLen);
+  StringRef flag(startFlag, flagLen);
+  EmitFormatDiagnostic(S.PDiag(diag::warn_printf_invalid_objc_flag) << flag,
+                      getLocationOfByte(startFlag),
+                      /*IsStringLocation*/true,
+                      Range, FixItHint::CreateRemoval(Range));
+}
+
+void CheckPrintfHandler::HandleObjCFlagsWithNonObjCConversion(
+    const char *flagsStart, const char *flagsEnd, const char *conversionPosition) {
+    // Warn about using '[...]' without a '@' conversion.
+    auto Range = getSpecifierRange(flagsStart, flagsEnd - flagsStart + 1);
+    auto diag = diag::warn_printf_ObjCflags_without_ObjCConversion;
+    EmitFormatDiagnostic(S.PDiag(diag) << StringRef(conversionPosition, 1),
+                         getLocationOfByte(conversionPosition),
+                         /*IsStringLocation*/true,
+                         Range, FixItHint::CreateRemoval(Range));
+}
+
 // Determines if the specified is a C++ class or struct containing
 // a member with the specified name and kind (e.g. a CXXMethodDecl named
 // "c_str()").
@@ -7173,8 +7240,8 @@ void AnalyzeImplicitConversions(Sema &S, Expr *OrigE, SourceLocation CC) {
   CC = E->getExprLoc();
   BinaryOperator *BO = dyn_cast<BinaryOperator>(E);
   bool IsLogicalAndOperator = BO && BO->getOpcode() == BO_LAnd;
-  for (Stmt::child_range I = E->children(); I; ++I) {
-    Expr *ChildExpr = dyn_cast_or_null<Expr>(*I);
+  for (Stmt *SubStmt : E->children()) {
+    Expr *ChildExpr = dyn_cast_or_null<Expr>(SubStmt);
     if (!ChildExpr)
       continue;
 
diff --git a/lib/Sema/SemaCodeComplete.cpp b/lib/Sema/SemaCodeComplete.cpp
index ebb6bbc..ff6a3ee 100644
--- a/lib/Sema/SemaCodeComplete.cpp
+++ b/lib/Sema/SemaCodeComplete.cpp
@@ -1343,9 +1343,9 @@ static void AddTypeSpecifierResults(const LangOptions &LangOpts,
   }
 
   // Nullability
-  Results.AddResult(Result("__nonnull", CCP_Type));
-  Results.AddResult(Result("__null_unspecified", CCP_Type));
-  Results.AddResult(Result("__nullable", CCP_Type));
+  Results.AddResult(Result("_Nonnull", CCP_Type));
+  Results.AddResult(Result("_Null_unspecified", CCP_Type));
+  Results.AddResult(Result("_Nullable", CCP_Type));
 }
 
 static void AddStorageSpecifiers(Sema::ParserCompletionContext CCC,
diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp
index aa006b3..6508d6f 100644
--- a/lib/Sema/SemaDecl.cpp
+++ b/lib/Sema/SemaDecl.cpp
@@ -1804,7 +1804,8 @@ static void filterNonConflictingPreviousDecls(Sema &S,
                                               NamedDecl *decl,
                                               LookupResult &previous){
   // This is only interesting when modules are enabled.
-  if (!S.getLangOpts().Modules && !S.getLangOpts().ModulesLocalVisibility)
+  if ((!S.getLangOpts().Modules && !S.getLangOpts().ModulesLocalVisibility) ||
+      !S.getLangOpts().ModulesHideInternalLinkage)
     return;
 
   // Empty sets are uninteresting.
@@ -2471,17 +2472,18 @@ static void mergeParamDeclTypes(ParmVarDecl *NewParam,
   if (auto Oldnullability = OldParam->getType()->getNullability(S.Context)) {
     if (auto Newnullability = NewParam->getType()->getNullability(S.Context)) {
       if (*Oldnullability != *Newnullability) {
-        unsigned unsNewnullability = static_cast<unsigned>(*Newnullability);
-        unsigned unsOldnullability = static_cast<unsigned>(*Oldnullability);
         S.Diag(NewParam->getLocation(), diag::warn_mismatched_nullability_attr)
-          << unsNewnullability
-          << ((NewParam->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability) != 0)
-          << unsOldnullability
-          << ((OldParam->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability) != 0);
+          << DiagNullabilityKind(
+               *Newnullability,
+               ((NewParam->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability)
+                != 0))
+          << DiagNullabilityKind(
+               *Oldnullability,
+               ((OldParam->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability)
+                != 0));
         S.Diag(OldParam->getLocation(), diag::note_previous_declaration);
       }
-    }
-    else {
+    } else {
       QualType NewT = NewParam->getType();
       NewT = S.Context.getAttributedType(
                          AttributedType::getNullabilityAttrKind(*Oldnullability),
@@ -3454,8 +3456,9 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
       New->isThisDeclarationADefinition() == VarDecl::Definition &&
       (Def = Old->getDefinition())) {
     NamedDecl *Hidden = nullptr;
-    if (!hasVisibleDefinition(Def, &Hidden) && 
-        (New->getDescribedVarTemplate() ||
+    if (!hasVisibleDefinition(Def, &Hidden) &&
+        (New->getFormalLinkage() == InternalLinkage ||
+         New->getDescribedVarTemplate() ||
          New->getNumTemplateParameterLists() ||
          New->getDeclContext()->isDependentContext())) {
       // The previous definition is hidden, and multiple definitions are
@@ -3577,6 +3580,23 @@ void Sema::setTagNameForLinkagePurposes(TagDecl *TagFromDeclSpec,
   TagFromDeclSpec->setTypedefNameForAnonDecl(NewTD);
 }
 
+static unsigned GetDiagnosticTypeSpecifierID(DeclSpec::TST T) {
+  switch (T) {
+  case DeclSpec::TST_class:
+    return 0;
+  case DeclSpec::TST_struct:
+    return 1;
+  case DeclSpec::TST_interface:
+    return 2;
+  case DeclSpec::TST_union:
+    return 3;
+  case DeclSpec::TST_enum:
+    return 4;
+  default:
+    llvm_unreachable("unexpected type specifier");
+  }
+}
+
 /// ParsedFreeStandingDeclSpec - This method is invoked when a declspec with
 /// no declarator (e.g. "struct foo;") is parsed. It also accepts template
 /// parameters to cope with template friend declarations.
@@ -3626,10 +3646,7 @@ Decl *Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS,
     // and definitions of functions and variables.
     if (Tag)
       Diag(DS.getConstexprSpecLoc(), diag::err_constexpr_tag)
-        << (DS.getTypeSpecType() == DeclSpec::TST_class ? 0 :
-            DS.getTypeSpecType() == DeclSpec::TST_struct ? 1 :
-            DS.getTypeSpecType() == DeclSpec::TST_interface ? 2 :
-            DS.getTypeSpecType() == DeclSpec::TST_union ? 3 : 4);
+          << GetDiagnosticTypeSpecifierID(DS.getTypeSpecType());
     else
       Diag(DS.getConstexprSpecLoc(), diag::err_constexpr_no_declarators);
     // Don't emit warnings after this error.
@@ -3656,11 +3673,7 @@ Decl *Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS,
     // or an explicit specialization.
     // Per C++ [dcl.enum]p1, an opaque-enum-declaration can't either.
     Diag(SS.getBeginLoc(), diag::err_standalone_class_nested_name_specifier)
-      << (DS.getTypeSpecType() == DeclSpec::TST_class ? 0 :
-          DS.getTypeSpecType() == DeclSpec::TST_struct ? 1 :
-          DS.getTypeSpecType() == DeclSpec::TST_interface ? 2 :
-          DS.getTypeSpecType() == DeclSpec::TST_union ? 3 : 4)
-      << SS.getRange();
+        << GetDiagnosticTypeSpecifierID(DS.getTypeSpecType()) << SS.getRange();
     return nullptr;
   }
 
@@ -3808,16 +3821,10 @@ Decl *Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS,
         TypeSpecType == DeclSpec::TST_interface ||
         TypeSpecType == DeclSpec::TST_union ||
         TypeSpecType == DeclSpec::TST_enum) {
-      AttributeList* attrs = DS.getAttributes().getList();
-      while (attrs) {
+      for (AttributeList* attrs = DS.getAttributes().getList(); attrs;
+           attrs = attrs->getNext())
         Diag(attrs->getLoc(), diag::warn_declspec_attribute_ignored)
-        << attrs->getName()
-        << (TypeSpecType == DeclSpec::TST_class ? 0 :
-            TypeSpecType == DeclSpec::TST_struct ? 1 :
-            TypeSpecType == DeclSpec::TST_union ? 2 :
-            TypeSpecType == DeclSpec::TST_interface ? 3 : 4);
-        attrs = attrs->getNext();
-      }
+            << attrs->getName() << GetDiagnosticTypeSpecifierID(TypeSpecType);
     }
   }
 
@@ -5518,6 +5525,19 @@ bool Sema::adjustContextForLocalExternDecl(DeclContext *&DC) {
   return true;
 }
 
+/// \brief Returns true if given declaration is TU-scoped and externally
+/// visible.
+static bool isDeclTUScopedExternallyVisible(const Decl *D) {
+  if (auto *FD = dyn_cast<FunctionDecl>(D))
+    return (FD->getDeclContext()->isTranslationUnit() || FD->isExternC()) &&
+           FD->hasExternalFormalLinkage();
+  else if (auto *VD = dyn_cast<VarDecl>(D))
+    return (VD->getDeclContext()->isTranslationUnit() || VD->isExternC()) &&
+           VD->hasExternalFormalLinkage();
+
+  llvm_unreachable("Unknown type of decl!");
+}
+
 NamedDecl *
 Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
                               TypeSourceInfo *TInfo, LookupResult &Previous,
@@ -5942,7 +5962,8 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
 
     NewVD->addAttr(::new (Context) AsmLabelAttr(SE->getStrTokenLoc(0),
                                                 Context, Label, 0));
-  } else if (!ExtnameUndeclaredIdentifiers.empty()) {
+  } else if (!ExtnameUndeclaredIdentifiers.empty() &&
+             isDeclTUScopedExternallyVisible(NewVD)) {
     llvm::DenseMap<IdentifierInfo*,AsmLabelAttr*>::iterator I =
       ExtnameUndeclaredIdentifiers.find(NewVD->getIdentifier());
     if (I != ExtnameUndeclaredIdentifiers.end()) {
@@ -7181,6 +7202,11 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
           dyn_cast<CXXRecordDecl>(NewFD->getDeclContext())) {
       if (Parent->isInterface() && cast<CXXMethodDecl>(NewFD)->isUserProvided())
         NewFD->setPure(true);
+
+      // C++ [class.union]p2
+      //   A union can have member functions, but not virtual functions.
+      if (isVirtual && Parent->isUnion())
+        Diag(D.getDeclSpec().getVirtualSpecLoc(), diag::err_virtual_in_union);
     }
 
     SetNestedNameSpecifier(NewFD, D);
@@ -7464,7 +7490,8 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     StringLiteral *SE = cast<StringLiteral>(E);
     NewFD->addAttr(::new (Context) AsmLabelAttr(SE->getStrTokenLoc(0), Context,
                                                 SE->getString(), 0));
-  } else if (!ExtnameUndeclaredIdentifiers.empty()) {
+  } else if (!ExtnameUndeclaredIdentifiers.empty() &&
+             isDeclTUScopedExternallyVisible(NewFD)) {
     llvm::DenseMap<IdentifierInfo*,AsmLabelAttr*>::iterator I =
       ExtnameUndeclaredIdentifiers.find(NewFD->getIdentifier());
     if (I != ExtnameUndeclaredIdentifiers.end()) {
@@ -8769,18 +8796,10 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
   }
 
   if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(RealDecl)) {
-    // With declarators parsed the way they are, the parser cannot
-    // distinguish between a normal initializer and a pure-specifier.
-    // Thus this grotesque test.
-    IntegerLiteral *IL;
-    if ((IL = dyn_cast<IntegerLiteral>(Init)) && IL->getValue() == 0 &&
-        Context.getCanonicalType(IL->getType()) == Context.IntTy)
-      CheckPureMethod(Method, Init->getSourceRange());
-    else {
-      Diag(Method->getLocation(), diag::err_member_function_initialization)
-        << Method->getDeclName() << Init->getSourceRange();
-      Method->setInvalidDecl();
-    }
+    // Pure-specifiers are handled in ActOnPureSpecifier.
+    Diag(Method->getLocation(), diag::err_member_function_initialization)
+      << Method->getDeclName() << Init->getSourceRange();
+    Method->setInvalidDecl();
     return;
   }
 
@@ -8943,7 +8962,8 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
   if ((Def = VDecl->getDefinition()) && Def != VDecl) {
     NamedDecl *Hidden = nullptr;
     if (!hasVisibleDefinition(Def, &Hidden) && 
-        (VDecl->getDescribedVarTemplate() ||
+        (VDecl->getFormalLinkage() == InternalLinkage ||
+         VDecl->getDescribedVarTemplate() ||
          VDecl->getNumTemplateParameterLists() ||
          VDecl->getDeclContext()->isDependentContext())) {
       // The previous definition is hidden, and multiple definitions are
@@ -10364,7 +10384,8 @@ Sema::CheckForFunctionRedefinition(FunctionDecl *FD,
   // in this case? That may be necessary for functions that return local types
   // through a deduced return type, or instantiate templates with local types.
   if (!hasVisibleDefinition(Definition) &&
-      (Definition->isInlineSpecified() ||
+      (Definition->getFormalLinkage() == InternalLinkage ||
+       Definition->isInlined() ||
        Definition->getDescribedFunctionTemplate() ||
        Definition->getNumTemplateParameterLists()))
     return;
diff --git a/lib/Sema/SemaDeclAttr.cpp b/lib/Sema/SemaDeclAttr.cpp
index 43790c2..b8d0830 100644
--- a/lib/Sema/SemaDeclAttr.cpp
+++ b/lib/Sema/SemaDeclAttr.cpp
@@ -20,6 +20,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/Mangle.h"
+#include "clang/AST/ASTMutationListener.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
@@ -3990,6 +3991,34 @@ static void handleObjCRuntimeName(Sema &S, Decl *D,
                                  Attr.getAttributeSpellingListIndex()));
 }
 
+// when a user wants to use objc_boxable with a union or struct
+// but she doesn't have access to the declaration (legacy/third-party code)
+// then she can 'enable' this feature via trick with a typedef
+// e.g.:
+// typedef struct __attribute((objc_boxable)) legacy_struct legacy_struct;
+static void handleObjCBoxable(Sema &S, Decl *D, const AttributeList &Attr) {
+  bool notify = false;
+
+  RecordDecl *RD = dyn_cast<RecordDecl>(D);
+  if (RD && RD->getDefinition()) {
+    RD = RD->getDefinition();
+    notify = true;
+  }
+
+  if (RD) {
+    ObjCBoxableAttr *BoxableAttr = ::new (S.Context)
+                          ObjCBoxableAttr(Attr.getRange(), S.Context,
+                                          Attr.getAttributeSpellingListIndex());
+    RD->addAttr(BoxableAttr);
+    if (notify) {
+      // we need to notify ASTReader/ASTWriter about
+      // modification of existing declaration
+      if (ASTMutationListener *L = S.getASTMutationListener())
+        L->AddedAttributeToRecord(BoxableAttr, RD);
+    }
+  }
+}
+
 static void handleObjCOwnershipAttr(Sema &S, Decl *D,
                                     const AttributeList &Attr) {
   if (hasDeclarator(D)) return;
@@ -4758,6 +4787,10 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case AttributeList::AT_ObjCRuntimeName:
     handleObjCRuntimeName(S, D, Attr);
     break;
+
+  case AttributeList::AT_ObjCBoxable:
+    handleObjCBoxable(S, D, Attr);
+    break;
           
   case AttributeList::AT_CFAuditedTransfer:
     handleCFAuditedTransferAttr(S, D, Attr);
diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp
index 7ed9bfc..0d7cbf4 100644
--- a/lib/Sema/SemaDeclCXX.cpp
+++ b/lib/Sema/SemaDeclCXX.cpp
@@ -73,8 +73,8 @@ namespace {
   /// VisitExpr - Visit all of the children of this expression.
   bool CheckDefaultArgumentVisitor::VisitExpr(Expr *Node) {
     bool IsInvalid = false;
-    for (Stmt::child_range I = Node->children(); I; ++I)
-      IsInvalid |= Visit(*I);
+    for (Stmt *SubStmt : Node->children())
+      IsInvalid |= Visit(SubStmt);
     return IsInvalid;
   }
 
@@ -1091,9 +1091,9 @@ CheckConstexprFunctionStmt(Sema &SemaRef, const FunctionDecl *Dcl, Stmt *S,
       break;
     if (!Cxx1yLoc.isValid())
       Cxx1yLoc = S->getLocStart();
-    for (Stmt::child_range Children = S->children(); Children; ++Children)
-      if (*Children &&
-          !CheckConstexprFunctionStmt(SemaRef, Dcl, *Children, ReturnStmts,
+    for (Stmt *SubStmt : S->children())
+      if (SubStmt &&
+          !CheckConstexprFunctionStmt(SemaRef, Dcl, SubStmt, ReturnStmts,
                                       Cxx1yLoc))
         return false;
     return true;
@@ -1106,9 +1106,9 @@ CheckConstexprFunctionStmt(Sema &SemaRef, const FunctionDecl *Dcl, Stmt *S,
     // mutation, we can reasonably allow them in C++11 as an extension.
     if (!Cxx1yLoc.isValid())
       Cxx1yLoc = S->getLocStart();
-    for (Stmt::child_range Children = S->children(); Children; ++Children)
-      if (*Children &&
-          !CheckConstexprFunctionStmt(SemaRef, Dcl, *Children, ReturnStmts,
+    for (Stmt *SubStmt : S->children())
+      if (SubStmt &&
+          !CheckConstexprFunctionStmt(SemaRef, Dcl, SubStmt, ReturnStmts,
                                       Cxx1yLoc))
         return false;
     return true;
@@ -2186,9 +2186,6 @@ Sema::ActOnCXXMemberDeclarator(Scope *S, AccessSpecifier AS, Declarator &D,
       assert(Member && "HandleField never returns null");
     }
   } else {
-    assert(InitStyle == ICIS_NoInit ||
-           D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static);
-
     Member = HandleDeclarator(S, D, TemplateParameterLists);
     if (!Member)
       return nullptr;
@@ -12890,8 +12887,7 @@ void Sema::SetDeclDefaulted(Decl *Dcl, SourceLocation DefaultLoc) {
 }
 
 static void SearchForReturnInStmt(Sema &Self, Stmt *S) {
-  for (Stmt::child_range CI = S->children(); CI; ++CI) {
-    Stmt *SubStmt = *CI;
+  for (Stmt *SubStmt : S->children()) {
     if (!SubStmt)
       continue;
     if (isa<ReturnStmt>(SubStmt))
@@ -13060,6 +13056,15 @@ bool Sema::CheckPureMethod(CXXMethodDecl *Method, SourceRange InitRange) {
   return true;
 }
 
+void Sema::ActOnPureSpecifier(Decl *D, SourceLocation ZeroLoc) {
+  if (D->getFriendObjectKind())
+    Diag(D->getLocation(), diag::err_pure_friend);
+  else if (auto *M = dyn_cast<CXXMethodDecl>(D))
+    CheckPureMethod(M, ZeroLoc);
+  else
+    Diag(D->getLocation(), diag::err_illegal_initializer);
+}
+
 /// \brief Determine whether the given declaration is a static data member.
 static bool isStaticDataMember(const Decl *D) {
   if (const VarDecl *Var = dyn_cast_or_null<VarDecl>(D))
diff --git a/lib/Sema/SemaDeclObjC.cpp b/lib/Sema/SemaDeclObjC.cpp
index 543566f..d0b2998 100644
--- a/lib/Sema/SemaDeclObjC.cpp
+++ b/lib/Sema/SemaDeclObjC.cpp
@@ -1400,16 +1400,20 @@ static bool CheckMethodOverrideReturn(Sema &S,
       !S.Context.hasSameNullabilityTypeQualifier(MethodImpl->getReturnType(),
                                                  MethodDecl->getReturnType(),
                                                  false)) {
-    unsigned unsNullabilityMethodImpl =
-      static_cast<unsigned>(*MethodImpl->getReturnType()->getNullability(S.Context));
-    unsigned unsNullabilityMethodDecl =
-      static_cast<unsigned>(*MethodDecl->getReturnType()->getNullability(S.Context));
+    auto nullabilityMethodImpl =
+      *MethodImpl->getReturnType()->getNullability(S.Context);
+    auto nullabilityMethodDecl =
+      *MethodDecl->getReturnType()->getNullability(S.Context);
       S.Diag(MethodImpl->getLocation(),
              diag::warn_conflicting_nullability_attr_overriding_ret_types)
-        << unsNullabilityMethodImpl
-        << ((MethodImpl->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability) != 0)
-        << unsNullabilityMethodDecl
-        << ((MethodDecl->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability) != 0);
+        << DiagNullabilityKind(
+             nullabilityMethodImpl,
+             ((MethodImpl->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability)
+              != 0))
+        << DiagNullabilityKind(
+             nullabilityMethodDecl,
+             ((MethodDecl->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability)
+                != 0));
       S.Diag(MethodDecl->getLocation(), diag::note_previous_declaration);
   }
     
@@ -1486,15 +1490,17 @@ static bool CheckMethodOverrideParam(Sema &S,
   if (Warn && IsOverridingMode &&
       !isa<ObjCImplementationDecl>(MethodImpl->getDeclContext()) &&
       !S.Context.hasSameNullabilityTypeQualifier(ImplTy, IfaceTy, true)) {
-    unsigned unsImplTy = static_cast<unsigned>(*ImplTy->getNullability(S.Context));
-    unsigned unsIfaceTy = static_cast<unsigned>(*IfaceTy->getNullability(S.Context));
     S.Diag(ImplVar->getLocation(),
            diag::warn_conflicting_nullability_attr_overriding_param_types)
-        << unsImplTy
-        << ((ImplVar->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability) != 0)
-        << unsIfaceTy
-        << ((IfaceVar->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability) != 0);
-        S.Diag(IfaceVar->getLocation(), diag::note_previous_declaration);
+      << DiagNullabilityKind(
+           *ImplTy->getNullability(S.Context),
+           ((ImplVar->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability)
+            != 0))
+      << DiagNullabilityKind(
+           *IfaceTy->getNullability(S.Context),
+           ((IfaceVar->getObjCDeclQualifier() & Decl::OBJC_TQ_CSNullability)
+            != 0));
+    S.Diag(IfaceVar->getLocation(), diag::note_previous_declaration);
   }
   if (S.Context.hasSameUnqualifiedType(ImplTy, IfaceTy))
     return true;
@@ -3184,8 +3190,8 @@ static QualType mergeTypeNullabilityForRedecl(Sema &S, SourceLocation loc,
 
     // Complain about mismatched nullability.
     S.Diag(loc, diag::err_nullability_conflicting)
-      << static_cast<unsigned>(*nullability) << usesCSKeyword
-      << static_cast<unsigned>(*prevNullability) << prevUsesCSKeyword;
+      << DiagNullabilityKind(*nullability, usesCSKeyword)
+      << DiagNullabilityKind(*prevNullability, prevUsesCSKeyword);
     return type;
   }
 
diff --git a/lib/Sema/SemaExceptionSpec.cpp b/lib/Sema/SemaExceptionSpec.cpp
index f3bcf76..2e3e63e 100644
--- a/lib/Sema/SemaExceptionSpec.cpp
+++ b/lib/Sema/SemaExceptionSpec.cpp
@@ -837,11 +837,13 @@ bool Sema::CheckOverridingFunctionExceptionSpec(const CXXMethodDecl *New,
                                   New->getLocation());
 }
 
-static CanThrowResult canSubExprsThrow(Sema &S, const Expr *CE) {
-  Expr *E = const_cast<Expr*>(CE);
+static CanThrowResult canSubExprsThrow(Sema &S, const Expr *E) {
   CanThrowResult R = CT_Cannot;
-  for (Expr::child_range I = E->children(); I && R != CT_Can; ++I)
-    R = mergeCanThrow(R, S.canThrow(cast<Expr>(*I)));
+  for (const Stmt *SubStmt : E->children()) {
+    R = mergeCanThrow(R, S.canThrow(cast<Expr>(SubStmt)));
+    if (R == CT_Can)
+      break;
+  }
   return R;
 }
 
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp
index b0bc231..c023c85 100644
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -3689,7 +3689,7 @@ bool Sema::CheckUnaryExprOrTypeTraitOperand(QualType ExprType,
   // C11 6.5.3.4/3, C++11 [expr.alignof]p3:
   //   When alignof or _Alignof is applied to an array type, the result
   //   is the alignment of the element type.
-  if (ExprKind == UETT_AlignOf)
+  if (ExprKind == UETT_AlignOf || ExprKind == UETT_OpenMPRequiredSimdAlign)
     ExprType = Context.getBaseElementType(ExprType);
 
   if (ExprKind == UETT_VecStep)
@@ -3824,6 +3824,9 @@ Sema::CreateUnaryExprOrTypeTraitExpr(Expr *E, SourceLocation OpLoc,
     isInvalid = CheckAlignOfExpr(*this, E);
   } else if (ExprKind == UETT_VecStep) {
     isInvalid = CheckVecStepExpr(E);
+  } else if (ExprKind == UETT_OpenMPRequiredSimdAlign) {
+      Diag(E->getExprLoc(), diag::err_openmp_default_simd_align_expr);
+      isInvalid = true;
   } else if (E->refersToBitField()) {  // C99 6.5.3.4p1.
     Diag(E->getExprLoc(), diag::err_sizeof_alignof_bitfield) << 0;
     isInvalid = true;
@@ -4314,7 +4317,6 @@ Sema::ConvertArgumentsForCall(CallExpr *Call, Expr *Fn,
                               SourceLocation RParenLoc,
                               bool IsExecConfig) {
   // Bail out early if calling a builtin with custom typechecking.
-  // We don't need to do this in the 
   if (FDecl)
     if (unsigned ID = FDecl->getBuiltinID())
       if (Context.BuiltinInfo.hasCustomTypechecking(ID))
@@ -4938,6 +4940,17 @@ Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl,
     TheCall = new (Context) CallExpr(Context, Fn, Args, Context.BoolTy,
                                      VK_RValue, RParenLoc);
 
+  if (!getLangOpts().CPlusPlus) {
+    // C cannot always handle TypoExpr nodes in builtin calls and direct
+    // function calls as their argument checking don't necessarily handle
+    // dependent types properly, so make sure any TypoExprs have been
+    // dealt with.
+    ExprResult Result = CorrectDelayedTyposInExpr(TheCall);
+    if (!Result.isUsable()) return ExprError();
+    TheCall = dyn_cast<CallExpr>(Result.get());
+    if (!TheCall) return Result;
+  }
+
   // Bail out early if calling a builtin with custom typechecking.
   if (BuiltinID && Context.BuiltinInfo.hasCustomTypechecking(BuiltinID))
     return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall);
@@ -12742,6 +12755,7 @@ bool Sema::tryCaptureVariable(
   bool Nested = false;
   bool Explicit = (Kind != TryCapture_Implicit);
   unsigned FunctionScopesIndex = MaxFunctionScopesIndex;
+  unsigned OpenMPLevel = 0;
   do {
     // Only block literals, captured statements, and lambda expressions can
     // capture; other scopes don't work.
@@ -12768,6 +12782,20 @@ bool Sema::tryCaptureVariable(
     if (isVariableAlreadyCapturedInScopeInfo(CSI, Var, Nested, CaptureType, 
                                              DeclRefType)) 
       break;
+    if (getLangOpts().OpenMP) {
+      if (auto *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
+        // OpenMP private variables should not be captured in outer scope, so
+        // just break here.
+        if (RSI->CapRegionKind == CR_OpenMP) {
+          if (isOpenMPPrivateVar(Var, OpenMPLevel)) {
+            Nested = true;
+            CaptureType = Context.getLValueReferenceType(DeclRefType);
+            break;
+          }
+          ++OpenMPLevel;
+        }
+      }
+    }
     // If we are instantiating a generic lambda call operator body, 
     // we do not want to capture new variables.  What was captured
     // during either a lambdas transformation or initial parsing
diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp
index 6c839f3..6608d7c 100644
--- a/lib/Sema/SemaExprCXX.cpp
+++ b/lib/Sema/SemaExprCXX.cpp
@@ -6506,6 +6506,11 @@ public:
     // with the same edit length that pass all the checks and filters.
     // TODO: Properly handle various permutations of possible corrections when
     // there is more than one potentially ambiguous typo correction.
+    // Also, disable typo correction while attempting the transform when
+    // handling potentially ambiguous typo corrections as any new TypoExprs will
+    // have been introduced by the application of one of the correction
+    // candidates and add little to no value if corrected.
+    SemaRef.DisableTypoCorrection = true;
     while (!AmbiguousTypoExprs.empty()) {
       auto TE  = AmbiguousTypoExprs.back();
       auto Cached = TransformCache[TE];
@@ -6522,6 +6527,7 @@ public:
       State.Consumer->restoreSavedPosition();
       TransformCache[TE] = Cached;
     }
+    SemaRef.DisableTypoCorrection = false;
 
     // Ensure that all of the TypoExprs within the current Expr have been found.
     if (!Res.isUsable())
diff --git a/lib/Sema/SemaExprObjC.cpp b/lib/Sema/SemaExprObjC.cpp
index 9947fad..c52b6f5 100644
--- a/lib/Sema/SemaExprObjC.cpp
+++ b/lib/Sema/SemaExprObjC.cpp
@@ -563,7 +563,6 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
     // Look for the appropriate method within NSNumber.
     BoxingMethod = getNSNumberFactoryMethod(*this, SR.getBegin(), ValueType);
     BoxedType = NSNumberPointer;
-
   } else if (const EnumType *ET = ValueType->getAs<EnumType>()) {
     if (!ET->getDecl()->isComplete()) {
       Diag(SR.getBegin(), diag::err_objc_incomplete_boxed_expression_type)
@@ -574,6 +573,109 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
     BoxingMethod = getNSNumberFactoryMethod(*this, SR.getBegin(),
                                             ET->getDecl()->getIntegerType());
     BoxedType = NSNumberPointer;
+  } else if (ValueType->isObjCBoxableRecordType()) {
+    // Support for structure types, that marked as objc_boxable
+    // struct __attribute__((objc_boxable)) s { ... };
+    
+    // Look up the NSValue class, if we haven't done so already. It's cached
+    // in the Sema instance.
+    if (!NSValueDecl) {
+      IdentifierInfo *NSValueId =
+        NSAPIObj->getNSClassId(NSAPI::ClassId_NSValue);
+      NamedDecl *IF = LookupSingleName(TUScope, NSValueId,
+                                       SR.getBegin(), Sema::LookupOrdinaryName);
+      NSValueDecl = dyn_cast_or_null<ObjCInterfaceDecl>(IF);
+      if (!NSValueDecl) {
+        if (getLangOpts().DebuggerObjCLiteral) {
+          // Create a stub definition of NSValue.
+          DeclContext *TU = Context.getTranslationUnitDecl();
+          NSValueDecl = ObjCInterfaceDecl::Create(Context, TU,
+                                                  SourceLocation(), NSValueId,
+                                                  nullptr, SourceLocation());
+        } else {
+          // Otherwise, require a declaration of NSValue.
+          Diag(SR.getBegin(), diag::err_undeclared_nsvalue);
+          return ExprError();
+        }
+      } else if (!NSValueDecl->hasDefinition()) {
+        Diag(SR.getBegin(), diag::err_undeclared_nsvalue);
+        return ExprError();
+      }
+      
+      // generate the pointer to NSValue type.
+      QualType NSValueObject = Context.getObjCInterfaceType(NSValueDecl);
+      NSValuePointer = Context.getObjCObjectPointerType(NSValueObject);
+    }
+    
+    if (!ValueWithBytesObjCTypeMethod) {
+      IdentifierInfo *II[] = {
+        &Context.Idents.get("valueWithBytes"),
+        &Context.Idents.get("objCType")
+      };
+      Selector ValueWithBytesObjCType = Context.Selectors.getSelector(2, II);
+      
+      // Look for the appropriate method within NSValue.
+      BoxingMethod = NSValueDecl->lookupClassMethod(ValueWithBytesObjCType);
+      if (!BoxingMethod && getLangOpts().DebuggerObjCLiteral) {
+        // Debugger needs to work even if NSValue hasn't been defined.
+        TypeSourceInfo *ReturnTInfo = nullptr;
+        ObjCMethodDecl *M = ObjCMethodDecl::Create(
+                                               Context,
+                                               SourceLocation(),
+                                               SourceLocation(),
+                                               ValueWithBytesObjCType,
+                                               NSValuePointer,
+                                               ReturnTInfo,
+                                               NSValueDecl,
+                                               /*isInstance=*/false,
+                                               /*isVariadic=*/false,
+                                               /*isPropertyAccessor=*/false,
+                                               /*isImplicitlyDeclared=*/true,
+                                               /*isDefined=*/false,
+                                               ObjCMethodDecl::Required,
+                                               /*HasRelatedResultType=*/false);
+        
+        SmallVector<ParmVarDecl *, 2> Params;
+        
+        ParmVarDecl *bytes =
+        ParmVarDecl::Create(Context, M,
+                            SourceLocation(), SourceLocation(),
+                            &Context.Idents.get("bytes"),
+                            Context.VoidPtrTy.withConst(),
+                            /*TInfo=*/nullptr,
+                            SC_None, nullptr);
+        Params.push_back(bytes);
+        
+        QualType ConstCharType = Context.CharTy.withConst();
+        ParmVarDecl *type =
+        ParmVarDecl::Create(Context, M,
+                            SourceLocation(), SourceLocation(),
+                            &Context.Idents.get("type"),
+                            Context.getPointerType(ConstCharType),
+                            /*TInfo=*/nullptr,
+                            SC_None, nullptr);
+        Params.push_back(type);
+        
+        M->setMethodParams(Context, Params, None);
+        BoxingMethod = M;
+      }
+      
+      if (!validateBoxingMethod(*this, SR.getBegin(), NSValueDecl,
+                                ValueWithBytesObjCType, BoxingMethod))
+        return ExprError();
+      
+      ValueWithBytesObjCTypeMethod = BoxingMethod;
+    }
+    
+    if (!ValueType.isTriviallyCopyableType(Context)) {
+      Diag(SR.getBegin(), 
+           diag::err_objc_non_trivially_copyable_boxed_expression_type)
+        << ValueType << ValueExpr->getSourceRange();
+      return ExprError();
+    }
+
+    BoxingMethod = ValueWithBytesObjCTypeMethod;
+    BoxedType = NSValuePointer;
   }
 
   if (!BoxingMethod) {
@@ -582,13 +684,22 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
     return ExprError();
   }
   
-  // Convert the expression to the type that the parameter requires.
-  ParmVarDecl *ParamDecl = BoxingMethod->parameters()[0];
-  InitializedEntity Entity = InitializedEntity::InitializeParameter(Context,
-                                                                    ParamDecl);
-  ExprResult ConvertedValueExpr = PerformCopyInitialization(Entity,
-                                                            SourceLocation(),
-                                                            ValueExpr);
+  DiagnoseUseOfDecl(BoxingMethod, SR.getBegin());
+
+  ExprResult ConvertedValueExpr;
+  if (ValueType->isObjCBoxableRecordType()) {
+    InitializedEntity IE = InitializedEntity::InitializeTemporary(ValueType);
+    ConvertedValueExpr = PerformCopyInitialization(IE, ValueExpr->getExprLoc(), 
+                                                   ValueExpr);
+  } else {
+    // Convert the expression to the type that the parameter requires.
+    ParmVarDecl *ParamDecl = BoxingMethod->parameters()[0];
+    InitializedEntity IE = InitializedEntity::InitializeParameter(Context,
+                                                                  ParamDecl);
+    ConvertedValueExpr = PerformCopyInitialization(IE, SourceLocation(),
+                                                   ValueExpr);
+  }
+  
   if (ConvertedValueExpr.isInvalid())
     return ExprError();
   ValueExpr = ConvertedValueExpr.get();
diff --git a/lib/Sema/SemaInit.cpp b/lib/Sema/SemaInit.cpp
index 821d7f6..f0f7cb9 100644
--- a/lib/Sema/SemaInit.cpp
+++ b/lib/Sema/SemaInit.cpp
@@ -5088,10 +5088,8 @@ void InitializationSequence::InitializeFrom(Sema &S,
 }
 
 InitializationSequence::~InitializationSequence() {
-  for (SmallVectorImpl<Step>::iterator Step = Steps.begin(),
-                                          StepEnd = Steps.end();
-       Step != StepEnd; ++Step)
-    Step->Destroy();
+  for (auto &S : Steps)
+    S.Destroy();
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Sema/SemaObjCProperty.cpp b/lib/Sema/SemaObjCProperty.cpp
index 87fb5b6..0f88abc 100644
--- a/lib/Sema/SemaObjCProperty.cpp
+++ b/lib/Sema/SemaObjCProperty.cpp
@@ -1758,7 +1758,7 @@ void Sema::DiagnoseUnimplementedProperties(Scope *S, ObjCImplDecl* IMPDecl,
   }
 }
 
-void Sema::diagnoseNullResettableSynthesizedSetters(ObjCImplDecl *impDecl) {
+void Sema::diagnoseNullResettableSynthesizedSetters(const ObjCImplDecl *impDecl) {
   for (const auto *propertyImpl : impDecl->property_impls()) {
     const auto *property = propertyImpl->getPropertyDecl();
 
@@ -2025,7 +2025,7 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property,
     if (property->getPropertyAttributes() &
         ObjCPropertyDecl::OBJC_PR_null_resettable) {
       QualType modifiedTy = resultTy;
-      if (auto nullability = AttributedType::stripOuterNullability(modifiedTy)){
+      if (auto nullability = AttributedType::stripOuterNullability(modifiedTy)) {
         if (*nullability == NullabilityKind::Unspecified)
           resultTy = Context.getAttributedType(AttributedType::attr_nonnull,
                                                modifiedTy, modifiedTy);
diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp
index e609fcf..867cb9f 100644
--- a/lib/Sema/SemaOpenMP.cpp
+++ b/lib/Sema/SemaOpenMP.cpp
@@ -95,19 +95,20 @@ private:
     Scope *CurScope;
     SourceLocation ConstructLoc;
     bool OrderedRegion;
+    bool NowaitRegion;
     unsigned CollapseNumber;
     SourceLocation InnerTeamsRegionLoc;
     SharingMapTy(OpenMPDirectiveKind DKind, DeclarationNameInfo Name,
                  Scope *CurScope, SourceLocation Loc)
         : SharingMap(), AlignedMap(), LCVSet(), DefaultAttr(DSA_unspecified),
           Directive(DKind), DirectiveName(std::move(Name)), CurScope(CurScope),
-          ConstructLoc(Loc), OrderedRegion(false), CollapseNumber(1),
-          InnerTeamsRegionLoc() {}
+          ConstructLoc(Loc), OrderedRegion(false), NowaitRegion(false),
+          CollapseNumber(1), InnerTeamsRegionLoc() {}
     SharingMapTy()
         : SharingMap(), AlignedMap(), LCVSet(), DefaultAttr(DSA_unspecified),
           Directive(OMPD_unknown), DirectiveName(), CurScope(nullptr),
-          ConstructLoc(), OrderedRegion(false), CollapseNumber(1),
-          InnerTeamsRegionLoc() {}
+          ConstructLoc(), OrderedRegion(false), NowaitRegion(false),
+          CollapseNumber(1), InnerTeamsRegionLoc() {}
   };
 
   typedef SmallVector<SharingMapTy, 64> StackTy;
@@ -116,7 +117,7 @@ private:
   StackTy Stack;
   /// \brief true, if check for DSA must be from parent directive, false, if
   /// from current directive.
-  bool FromParent;
+  OpenMPClauseKind ClauseKindMode;
   Sema &SemaRef;
 
   typedef SmallVector<SharingMapTy, 8>::reverse_iterator reverse_iterator;
@@ -127,10 +128,11 @@ private:
   bool isOpenMPLocal(VarDecl *D, StackTy::reverse_iterator Iter);
 
 public:
-  explicit DSAStackTy(Sema &S) : Stack(1), FromParent(false), SemaRef(S) {}
+  explicit DSAStackTy(Sema &S)
+      : Stack(1), ClauseKindMode(OMPC_unknown), SemaRef(S) {}
 
-  bool isFromParent() const { return FromParent; }
-  void setFromParent(bool Flag) { FromParent = Flag; }
+  bool isClauseParsingMode() const { return ClauseKindMode != OMPC_unknown; }
+  void setClauseParsingMode(OpenMPClauseKind K) { ClauseKindMode = K; }
 
   void push(OpenMPDirectiveKind DKind, const DeclarationNameInfo &DirName,
             Scope *CurScope, SourceLocation Loc) {
@@ -175,6 +177,12 @@ public:
   DSAVarData hasInnermostDSA(VarDecl *D, ClausesPredicate CPred,
                              DirectivesPredicate DPred,
                              bool FromParent);
+  /// \brief Checks if the specified variables has explicit data-sharing
+  /// attributes which match specified \a CPred predicate at the specified
+  /// OpenMP region.
+  bool hasExplicitDSA(VarDecl *D,
+                      const llvm::function_ref<bool(OpenMPClauseKind)> &CPred,
+                      unsigned Level);
   /// \brief Finds a directive which matches specified \a DPred predicate.
   template <class NamedDirectivesPredicate>
   bool hasDirective(NamedDirectivesPredicate DPred, bool FromParent);
@@ -225,6 +233,17 @@ public:
       return Stack[Stack.size() - 2].OrderedRegion;
     return false;
   }
+  /// \brief Marks current region as nowait (it has a 'nowait' clause).
+  void setNowaitRegion(bool IsNowait = true) {
+    Stack.back().NowaitRegion = IsNowait;
+  }
+  /// \brief Returns true, if parent region is nowait (has associated
+  /// 'nowait' clause), false - otherwise.
+  bool isParentNowaitRegion() const {
+    if (Stack.size() > 2)
+      return Stack[Stack.size() - 2].NowaitRegion;
+    return false;
+  }
 
   /// \brief Set collapse value for the region.
   void setCollapseNumber(unsigned Val) { Stack.back().CollapseNumber = Val; }
@@ -589,6 +608,23 @@ DSAStackTy::hasInnermostDSA(VarDecl *D, ClausesPredicate CPred,
   return DSAVarData();
 }
 
+bool DSAStackTy::hasExplicitDSA(
+    VarDecl *D, const llvm::function_ref<bool(OpenMPClauseKind)> &CPred,
+    unsigned Level) {
+  if (CPred(ClauseKindMode))
+    return true;
+  if (isClauseParsingMode())
+    ++Level;
+  D = D->getCanonicalDecl();
+  auto StartI = Stack.rbegin();
+  auto EndI = std::prev(Stack.rend());
+  if (std::distance(StartI, EndI) <= (int)Level)
+    return false;
+  std::advance(StartI, Level);
+  return (StartI->SharingMap.count(D) > 0) && StartI->SharingMap[D].RefExpr &&
+         CPred(StartI->SharingMap[D].Attributes);
+}
+
 template <class NamedDirectivesPredicate>
 bool DSAStackTy::hasDirective(NamedDirectivesPredicate DPred, bool FromParent) {
   auto StartI = std::next(Stack.rbegin());
@@ -617,16 +653,22 @@ bool Sema::IsOpenMPCapturedVar(VarDecl *VD) {
         (VD->hasLocalStorage() &&
          isParallelOrTaskRegion(DSAStack->getCurrentDirective())))
       return true;
-    auto DVarPrivate = DSAStack->getTopDSA(VD, DSAStack->isFromParent());
+    auto DVarPrivate = DSAStack->getTopDSA(VD, DSAStack->isClauseParsingMode());
     if (DVarPrivate.CKind != OMPC_unknown && isOpenMPPrivate(DVarPrivate.CKind))
       return true;
     DVarPrivate = DSAStack->hasDSA(VD, isOpenMPPrivate, MatchesAlways(),
-                                   DSAStack->isFromParent());
+                                   DSAStack->isClauseParsingMode());
     return DVarPrivate.CKind != OMPC_unknown;
   }
   return false;
 }
 
+bool Sema::isOpenMPPrivateVar(VarDecl *VD, unsigned Level) {
+  assert(LangOpts.OpenMP && "OpenMP is not allowed");
+  return DSAStack->hasExplicitDSA(
+      VD, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level);
+}
+
 void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; }
 
 void Sema::StartOpenMPDSABlock(OpenMPDirectiveKind DKind,
@@ -636,12 +678,12 @@ void Sema::StartOpenMPDSABlock(OpenMPDirectiveKind DKind,
   PushExpressionEvaluationContext(PotentiallyEvaluated);
 }
 
-void Sema::StartOpenMPClauses() {
-  DSAStack->setFromParent(/*Flag=*/true);
+void Sema::StartOpenMPClause(OpenMPClauseKind K) {
+  DSAStack->setClauseParsingMode(K);
 }
 
-void Sema::EndOpenMPClauses() {
-  DSAStack->setFromParent(/*Flag=*/false);
+void Sema::EndOpenMPClause() {
+  DSAStack->setClauseParsingMode(/*K=*/OMPC_unknown);
 }
 
 void Sema::EndOpenMPDSABlock(Stmt *CurDirective) {
@@ -1282,6 +1324,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
   case OMPD_taskyield:
   case OMPD_barrier:
   case OMPD_taskwait:
+  case OMPD_cancellation_point:
+  case OMPD_cancel:
   case OMPD_flush:
     llvm_unreachable("OpenMP Directive is not allowed");
   case OMPD_unknown:
@@ -1322,6 +1366,7 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
 static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
                                   OpenMPDirectiveKind CurrentRegion,
                                   const DeclarationNameInfo &CurrentName,
+                                  OpenMPDirectiveKind CancelRegion,
                                   SourceLocation StartLoc) {
   // Allowed nesting of constructs
   // +------------------+-----------------+------------------------------------+
@@ -1349,6 +1394,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel         | atomic          | *                                  |
   // | parallel         | target          | *                                  |
   // | parallel         | teams           | +                                  |
+  // | parallel         | cancellation    |                                    |
+  // |                  | point           | !                                  |
+  // | parallel         | cancel          | !                                  |
   // +------------------+-----------------+------------------------------------+
   // | for              | parallel        | *                                  |
   // | for              | for             | +                                  |
@@ -1372,6 +1420,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | for              | atomic          | *                                  |
   // | for              | target          | *                                  |
   // | for              | teams           | +                                  |
+  // | for              | cancellation    |                                    |
+  // |                  | point           | !                                  |
+  // | for              | cancel          | !                                  |
   // +------------------+-----------------+------------------------------------+
   // | master           | parallel        | *                                  |
   // | master           | for             | +                                  |
@@ -1395,6 +1446,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | master           | atomic          | *                                  |
   // | master           | target          | *                                  |
   // | master           | teams           | +                                  |
+  // | master           | cancellation    |                                    |
+  // |                  | point           |                                    |
+  // | master           | cancel          |                                    |
   // +------------------+-----------------+------------------------------------+
   // | critical         | parallel        | *                                  |
   // | critical         | for             | +                                  |
@@ -1417,6 +1471,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | critical         | atomic          | *                                  |
   // | critical         | target          | *                                  |
   // | critical         | teams           | +                                  |
+  // | critical         | cancellation    |                                    |
+  // |                  | point           |                                    |
+  // | critical         | cancel          |                                    |
   // +------------------+-----------------+------------------------------------+
   // | simd             | parallel        |                                    |
   // | simd             | for             |                                    |
@@ -1440,6 +1497,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | simd             | atomic          |                                    |
   // | simd             | target          |                                    |
   // | simd             | teams           |                                    |
+  // | simd             | cancellation    |                                    |
+  // |                  | point           |                                    |
+  // | simd             | cancel          |                                    |
   // +------------------+-----------------+------------------------------------+
   // | for simd         | parallel        |                                    |
   // | for simd         | for             |                                    |
@@ -1463,6 +1523,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | for simd         | atomic          |                                    |
   // | for simd         | target          |                                    |
   // | for simd         | teams           |                                    |
+  // | for simd         | cancellation    |                                    |
+  // |                  | point           |                                    |
+  // | for simd         | cancel          |                                    |
   // +------------------+-----------------+------------------------------------+
   // | parallel for simd| parallel        |                                    |
   // | parallel for simd| for             |                                    |
@@ -1486,6 +1549,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel for simd| atomic          |                                    |
   // | parallel for simd| target          |                                    |
   // | parallel for simd| teams           |                                    |
+  // | parallel for simd| cancellation    |                                    |
+  // |                  | point           |                                    |
+  // | parallel for simd| cancel          |                                    |
   // +------------------+-----------------+------------------------------------+
   // | sections         | parallel        | *                                  |
   // | sections         | for             | +                                  |
@@ -1509,6 +1575,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | sections         | atomic          | *                                  |
   // | sections         | target          | *                                  |
   // | sections         | teams           | +                                  |
+  // | sections         | cancellation    |                                    |
+  // |                  | point           | !                                  |
+  // | sections         | cancel          | !                                  |
   // +------------------+-----------------+------------------------------------+
   // | section          | parallel        | *                                  |
   // | section          | for             | +                                  |
@@ -1532,6 +1601,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | section          | atomic          | *                                  |
   // | section          | target          | *                                  |
   // | section          | teams           | +                                  |
+  // | section          | cancellation    |                                    |
+  // |                  | point           | !                                  |
+  // | section          | cancel          | !                                  |
   // +------------------+-----------------+------------------------------------+
   // | single           | parallel        | *                                  |
   // | single           | for             | +                                  |
@@ -1555,6 +1627,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | single           | atomic          | *                                  |
   // | single           | target          | *                                  |
   // | single           | teams           | +                                  |
+  // | single           | cancellation    |                                    |
+  // |                  | point           |                                    |
+  // | single           | cancel          |                                    |
   // +------------------+-----------------+------------------------------------+
   // | parallel for     | parallel        | *                                  |
   // | parallel for     | for             | +                                  |
@@ -1578,6 +1653,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel for     | atomic          | *                                  |
   // | parallel for     | target          | *                                  |
   // | parallel for     | teams           | +                                  |
+  // | parallel for     | cancellation    |                                    |
+  // |                  | point           | !                                  |
+  // | parallel for     | cancel          | !                                  |
   // +------------------+-----------------+------------------------------------+
   // | parallel sections| parallel        | *                                  |
   // | parallel sections| for             | +                                  |
@@ -1601,6 +1679,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel sections| atomic          | *                                  |
   // | parallel sections| target          | *                                  |
   // | parallel sections| teams           | +                                  |
+  // | parallel sections| cancellation    |                                    |
+  // |                  | point           | !                                  |
+  // | parallel sections| cancel          | !                                  |
   // +------------------+-----------------+------------------------------------+
   // | task             | parallel        | *                                  |
   // | task             | for             | +                                  |
@@ -1624,6 +1705,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | task             | atomic          | *                                  |
   // | task             | target          | *                                  |
   // | task             | teams           | +                                  |
+  // | task             | cancellation    |                                    |
+  // |                  | point           | !                                  |
+  // | task             | cancel          | !                                  |
   // +------------------+-----------------+------------------------------------+
   // | ordered          | parallel        | *                                  |
   // | ordered          | for             | +                                  |
@@ -1647,6 +1731,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | ordered          | atomic          | *                                  |
   // | ordered          | target          | *                                  |
   // | ordered          | teams           | +                                  |
+  // | ordered          | cancellation    |                                    |
+  // |                  | point           |                                    |
+  // | ordered          | cancel          |                                    |
   // +------------------+-----------------+------------------------------------+
   // | atomic           | parallel        |                                    |
   // | atomic           | for             |                                    |
@@ -1670,6 +1757,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | atomic           | atomic          |                                    |
   // | atomic           | target          |                                    |
   // | atomic           | teams           |                                    |
+  // | atomic           | cancellation    |                                    |
+  // |                  | point           |                                    |
+  // | atomic           | cancel          |                                    |
   // +------------------+-----------------+------------------------------------+
   // | target           | parallel        | *                                  |
   // | target           | for             | *                                  |
@@ -1693,6 +1783,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | target           | atomic          | *                                  |
   // | target           | target          | *                                  |
   // | target           | teams           | *                                  |
+  // | target           | cancellation    |                                    |
+  // |                  | point           |                                    |
+  // | target           | cancel          |                                    |
   // +------------------+-----------------+------------------------------------+
   // | teams            | parallel        | *                                  |
   // | teams            | for             | +                                  |
@@ -1710,12 +1803,15 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | teams            | taskyield       | +                                  |
   // | teams            | barrier         | +                                  |
   // | teams            | taskwait        | +                                  |
-  // | teams            | taskgroup        | +                                  |
+  // | teams            | taskgroup       | +                                  |
   // | teams            | flush           | +                                  |
   // | teams            | ordered         | +                                  |
   // | teams            | atomic          | +                                  |
   // | teams            | target          | +                                  |
   // | teams            | teams           | +                                  |
+  // | teams            | cancellation    |                                    |
+  // |                  | point           |                                    |
+  // | teams            | cancel          |                                    |
   // +------------------+-----------------+------------------------------------+
   if (Stack->getCurScope()) {
     auto ParentRegion = Stack->getParentDirective();
@@ -1757,7 +1853,26 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
     // called from OpenMP regions with the required preconditions).
     if (ParentRegion == OMPD_unknown)
       return false;
-    if (CurrentRegion == OMPD_master) {
+    if (CurrentRegion == OMPD_cancellation_point ||
+        CurrentRegion == OMPD_cancel) {
+      // OpenMP [2.16, Nesting of Regions]
+      // A cancellation point construct for which construct-type-clause is
+      // taskgroup must be nested inside a task construct. A cancellation
+      // point construct for which construct-type-clause is not taskgroup must
+      // be closely nested inside an OpenMP construct that matches the type
+      // specified in construct-type-clause.
+      // A cancel construct for which construct-type-clause is taskgroup must be
+      // nested inside a task construct. A cancel construct for which
+      // construct-type-clause is not taskgroup must be closely nested inside an
+      // OpenMP construct that matches the type specified in
+      // construct-type-clause.
+      NestingProhibited =
+          !((CancelRegion == OMPD_parallel && ParentRegion == OMPD_parallel) ||
+            (CancelRegion == OMPD_for && ParentRegion == OMPD_for) ||
+            (CancelRegion == OMPD_taskgroup && ParentRegion == OMPD_task) ||
+            (CancelRegion == OMPD_sections &&
+             (ParentRegion == OMPD_section || ParentRegion == OMPD_sections)));
+    } else if (CurrentRegion == OMPD_master) {
       // OpenMP [2.16, Nesting of Regions]
       // A master region may not be closely nested inside a worksharing,
       // atomic, or explicit task region.
@@ -1847,14 +1962,13 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   return false;
 }
 
-StmtResult Sema::ActOnOpenMPExecutableDirective(OpenMPDirectiveKind Kind,
-                                                const DeclarationNameInfo &DirName,
-                                                ArrayRef<OMPClause *> Clauses,
-                                                Stmt *AStmt,
-                                                SourceLocation StartLoc,
-                                                SourceLocation EndLoc) {
+StmtResult Sema::ActOnOpenMPExecutableDirective(
+    OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName,
+    OpenMPDirectiveKind CancelRegion, ArrayRef<OMPClause *> Clauses,
+    Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) {
   StmtResult Res = StmtError();
-  if (CheckNestingOfRegions(*this, DSAStack, Kind, DirName, StartLoc))
+  if (CheckNestingOfRegions(*this, DSAStack, Kind, DirName, CancelRegion,
+                            StartLoc))
     return StmtError();
 
   llvm::SmallVector<OMPClause *, 8> ClausesWithImplicit;
@@ -1988,6 +2102,20 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(OpenMPDirectiveKind Kind,
     Res = ActOnOpenMPTargetDirective(ClausesWithImplicit, AStmt, StartLoc,
                                      EndLoc);
     break;
+  case OMPD_cancellation_point:
+    assert(ClausesWithImplicit.empty() &&
+           "No clauses are allowed for 'omp cancellation point' directive");
+    assert(AStmt == nullptr && "No associated statement allowed for 'omp "
+                               "cancellation point' directive");
+    Res = ActOnOpenMPCancellationPointDirective(StartLoc, EndLoc, CancelRegion);
+    break;
+  case OMPD_cancel:
+    assert(ClausesWithImplicit.empty() &&
+           "No clauses are allowed for 'omp cancel' directive");
+    assert(AStmt == nullptr &&
+           "No associated statement allowed for 'omp cancel' directive");
+    Res = ActOnOpenMPCancelDirective(StartLoc, EndLoc, CancelRegion);
+    break;
   case OMPD_threadprivate:
     llvm_unreachable("OpenMP Directive is not allowed");
   case OMPD_unknown:
@@ -3216,8 +3344,7 @@ StmtResult Sema::ActOnOpenMPSectionsDirective(ArrayRef<OMPClause *> Clauses,
       return StmtError();
     // All associated statements must be '#pragma omp section' except for
     // the first one.
-    for (++S; S; ++S) {
-      auto SectionStmt = *S;
+    for (Stmt *SectionStmt : ++S) {
       if (!SectionStmt || !isa<OMPSectionDirective>(SectionStmt)) {
         if (SectionStmt)
           Diag(SectionStmt->getLocStart(),
@@ -3375,8 +3502,7 @@ Sema::ActOnOpenMPParallelSectionsDirective(ArrayRef<OMPClause *> Clauses,
       return StmtError();
     // All associated statements must be '#pragma omp section' except for
     // the first one.
-    for (++S; S; ++S) {
-      auto SectionStmt = *S;
+    for (Stmt *SectionStmt : ++S) {
       if (!SectionStmt || !isa<OMPSectionDirective>(SectionStmt)) {
         if (SectionStmt)
           Diag(SectionStmt->getLocStart(),
@@ -4174,6 +4300,48 @@ StmtResult Sema::ActOnOpenMPTeamsDirective(ArrayRef<OMPClause *> Clauses,
   return OMPTeamsDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt);
 }
 
+StmtResult
+Sema::ActOnOpenMPCancellationPointDirective(SourceLocation StartLoc,
+                                            SourceLocation EndLoc,
+                                            OpenMPDirectiveKind CancelRegion) {
+  if (CancelRegion != OMPD_parallel && CancelRegion != OMPD_for &&
+      CancelRegion != OMPD_sections && CancelRegion != OMPD_taskgroup) {
+    Diag(StartLoc, diag::err_omp_wrong_cancel_region)
+        << getOpenMPDirectiveName(CancelRegion);
+    return StmtError();
+  }
+  if (DSAStack->isParentNowaitRegion()) {
+    Diag(StartLoc, diag::err_omp_parent_cancel_region_nowait) << 0;
+    return StmtError();
+  }
+  if (DSAStack->isParentOrderedRegion()) {
+    Diag(StartLoc, diag::err_omp_parent_cancel_region_ordered) << 0;
+    return StmtError();
+  }
+  return OMPCancellationPointDirective::Create(Context, StartLoc, EndLoc,
+                                               CancelRegion);
+}
+
+StmtResult Sema::ActOnOpenMPCancelDirective(SourceLocation StartLoc,
+                                            SourceLocation EndLoc,
+                                            OpenMPDirectiveKind CancelRegion) {
+  if (CancelRegion != OMPD_parallel && CancelRegion != OMPD_for &&
+      CancelRegion != OMPD_sections && CancelRegion != OMPD_taskgroup) {
+    Diag(StartLoc, diag::err_omp_wrong_cancel_region)
+        << getOpenMPDirectiveName(CancelRegion);
+    return StmtError();
+  }
+  if (DSAStack->isParentNowaitRegion()) {
+    Diag(StartLoc, diag::err_omp_parent_cancel_region_nowait) << 1;
+    return StmtError();
+  }
+  if (DSAStack->isParentOrderedRegion()) {
+    Diag(StartLoc, diag::err_omp_parent_cancel_region_ordered) << 1;
+    return StmtError();
+  }
+  return OMPCancelDirective::Create(Context, StartLoc, EndLoc, CancelRegion);
+}
+
 OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
                                              SourceLocation StartLoc,
                                              SourceLocation LParenLoc,
@@ -4218,6 +4386,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
   case OMPC_update:
   case OMPC_capture:
   case OMPC_seq_cst:
+  case OMPC_depend:
   case OMPC_unknown:
     llvm_unreachable("Clause is not allowed.");
   }
@@ -4432,6 +4601,7 @@ OMPClause *Sema::ActOnOpenMPSimpleClause(
   case OMPC_update:
   case OMPC_capture:
   case OMPC_seq_cst:
+  case OMPC_depend:
   case OMPC_unknown:
     llvm_unreachable("Clause is not allowed.");
   }
@@ -4552,6 +4722,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause(
   case OMPC_update:
   case OMPC_capture:
   case OMPC_seq_cst:
+  case OMPC_depend:
   case OMPC_unknown:
     llvm_unreachable("Clause is not allowed.");
   }
@@ -4674,6 +4845,7 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
   case OMPC_proc_bind:
   case OMPC_threadprivate:
   case OMPC_flush:
+  case OMPC_depend:
   case OMPC_unknown:
     llvm_unreachable("Clause is not allowed.");
   }
@@ -4688,6 +4860,7 @@ OMPClause *Sema::ActOnOpenMPOrderedClause(SourceLocation StartLoc,
 
 OMPClause *Sema::ActOnOpenMPNowaitClause(SourceLocation StartLoc,
                                          SourceLocation EndLoc) {
+  DSAStack->setNowaitRegion();
   return new (Context) OMPNowaitClause(StartLoc, EndLoc);
 }
 
@@ -4730,7 +4903,8 @@ OMPClause *Sema::ActOnOpenMPVarListClause(
     OpenMPClauseKind Kind, ArrayRef<Expr *> VarList, Expr *TailExpr,
     SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ColonLoc,
     SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec,
-    const DeclarationNameInfo &ReductionId) {
+    const DeclarationNameInfo &ReductionId, OpenMPDependClauseKind DepKind,
+    SourceLocation DepLoc) {
   OMPClause *Res = nullptr;
   switch (Kind) {
   case OMPC_private:
@@ -4766,6 +4940,10 @@ OMPClause *Sema::ActOnOpenMPVarListClause(
   case OMPC_flush:
     Res = ActOnOpenMPFlushClause(VarList, StartLoc, LParenLoc, EndLoc);
     break;
+  case OMPC_depend:
+    Res = ActOnOpenMPDependClause(DepKind, DepLoc, ColonLoc, VarList, StartLoc,
+                                  LParenLoc, EndLoc);
+    break;
   case OMPC_if:
   case OMPC_final:
   case OMPC_num_threads:
@@ -6316,3 +6494,66 @@ OMPClause *Sema::ActOnOpenMPFlushClause(ArrayRef<Expr *> VarList,
   return OMPFlushClause::Create(Context, StartLoc, LParenLoc, EndLoc, VarList);
 }
 
+OMPClause *
+Sema::ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind,
+                              SourceLocation DepLoc, SourceLocation ColonLoc,
+                              ArrayRef<Expr *> VarList, SourceLocation StartLoc,
+                              SourceLocation LParenLoc, SourceLocation EndLoc) {
+  if (DepKind == OMPC_DEPEND_unknown) {
+    std::string Values;
+    std::string Sep(", ");
+    for (unsigned i = 0; i < OMPC_DEPEND_unknown; ++i) {
+      Values += "'";
+      Values += getOpenMPSimpleClauseTypeName(OMPC_depend, i);
+      Values += "'";
+      switch (i) {
+      case OMPC_DEPEND_unknown - 2:
+        Values += " or ";
+        break;
+      case OMPC_DEPEND_unknown - 1:
+        break;
+      default:
+        Values += Sep;
+        break;
+      }
+    }
+    Diag(DepLoc, diag::err_omp_unexpected_clause_value)
+        << Values << getOpenMPClauseName(OMPC_depend);
+    return nullptr;
+  }
+  SmallVector<Expr *, 8> Vars;
+  for (auto &RefExpr : VarList) {
+    assert(RefExpr && "NULL expr in OpenMP shared clause.");
+    if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
+      // It will be analyzed later.
+      Vars.push_back(RefExpr);
+      continue;
+    }
+
+    SourceLocation ELoc = RefExpr->getExprLoc();
+    // OpenMP  [2.11.1.1, Restrictions, p.3]
+    //  A variable that is part of another variable (such as a field of a
+    //  structure) but is not an array element or an array section cannot appear
+    //  in a depend clause.
+    auto *SimpleExpr = RefExpr->IgnoreParenCasts();
+    DeclRefExpr *DE = dyn_cast<DeclRefExpr>(SimpleExpr);
+    ArraySubscriptExpr *ASE = dyn_cast<ArraySubscriptExpr>(SimpleExpr);
+    if (!RefExpr->IgnoreParenImpCasts()->isLValue() || (!ASE && !DE) ||
+        (DE && !isa<VarDecl>(DE->getDecl())) ||
+        (ASE && !ASE->getBase()->getType()->isAnyPointerType() &&
+         !ASE->getBase()->getType()->isArrayType())) {
+      Diag(ELoc, diag::err_omp_expected_var_name_or_array_item)
+          << RefExpr->getSourceRange();
+      continue;
+    }
+
+    Vars.push_back(RefExpr->IgnoreParenImpCasts());
+  }
+
+  if (Vars.empty())
+    return nullptr;
+
+  return OMPDependClause::Create(Context, StartLoc, LParenLoc, EndLoc, DepKind,
+                                 DepLoc, ColonLoc, Vars);
+}
+
diff --git a/lib/Sema/SemaStmt.cpp b/lib/Sema/SemaStmt.cpp
index 50e4345..6fca974 100644
--- a/lib/Sema/SemaStmt.cpp
+++ b/lib/Sema/SemaStmt.cpp
@@ -495,6 +495,7 @@ Sema::ActOnIfStmt(SourceLocation IfLoc, FullExprArg CondVal, Decl *CondVar,
   if (CondVar) {
     ConditionVar = cast<VarDecl>(CondVar);
     CondResult = CheckConditionVariable(ConditionVar, IfLoc, true);
+    CondResult = ActOnFinishFullExpr(CondResult.get(), IfLoc);
     if (CondResult.isInvalid())
       return StmtError();
   }
@@ -649,12 +650,10 @@ Sema::ActOnStartOfSwitchStmt(SourceLocation SwitchLoc, Expr *Cond,
   if (CondResult.isInvalid()) return StmtError();
   Cond = CondResult.get();
 
-  if (!CondVar) {
-    CondResult = ActOnFinishFullExpr(Cond, SwitchLoc);
-    if (CondResult.isInvalid())
-      return StmtError();
-    Cond = CondResult.get();
-  }
+  CondResult = ActOnFinishFullExpr(Cond, SwitchLoc);
+  if (CondResult.isInvalid())
+    return StmtError();
+  Cond = CondResult.get();
 
   getCurFunction()->setHasBranchIntoScope();
 
@@ -1229,6 +1228,7 @@ Sema::ActOnWhileStmt(SourceLocation WhileLoc, FullExprArg Cond,
   if (CondVar) {
     ConditionVar = cast<VarDecl>(CondVar);
     CondResult = CheckConditionVariable(ConditionVar, WhileLoc, true);
+    CondResult = ActOnFinishFullExpr(CondResult.get(), WhileLoc);
     if (CondResult.isInvalid())
       return StmtError();
   }
@@ -1634,6 +1634,7 @@ Sema::ActOnForStmt(SourceLocation ForLoc, SourceLocation LParenLoc,
   if (secondVar) {
     ConditionVar = cast<VarDecl>(secondVar);
     SecondResult = CheckConditionVariable(ConditionVar, ForLoc, true);
+    SecondResult = ActOnFinishFullExpr(SecondResult.get(), ForLoc);
     if (SecondResult.isInvalid())
       return StmtError();
   }
diff --git a/lib/Sema/SemaTemplateDeduction.cpp b/lib/Sema/SemaTemplateDeduction.cpp
index 6f676ad..ae8157e 100644
--- a/lib/Sema/SemaTemplateDeduction.cpp
+++ b/lib/Sema/SemaTemplateDeduction.cpp
@@ -3190,6 +3190,40 @@ static bool
 hasDeducibleTemplateParameters(Sema &S, FunctionTemplateDecl *FunctionTemplate,
                                QualType T);
 
+static Sema::TemplateDeductionResult DeduceTemplateArgumentByListElement(
+    Sema &S, TemplateParameterList *TemplateParams, QualType ParamType,
+    Expr *Arg, TemplateDeductionInfo &Info,
+    SmallVectorImpl<DeducedTemplateArgument> &Deduced, unsigned TDF);
+
+/// \brief Attempt template argument deduction from an initializer list
+///        deemed to be an argument in a function call.
+static bool
+DeduceFromInitializerList(Sema &S, TemplateParameterList *TemplateParams,
+                          QualType AdjustedParamType, InitListExpr *ILE,
+                          TemplateDeductionInfo &Info,
+                          SmallVectorImpl<DeducedTemplateArgument> &Deduced,
+                          unsigned TDF, Sema::TemplateDeductionResult &Result) {
+  // If the argument is an initializer list then the parameter is an undeduced
+  // context, unless the parameter type is (reference to cv)
+  // std::initializer_list<P'>, in which case deduction is done for each element
+  // of the initializer list as-if it were an argument in a function call, and
+  // the result is the deduced type if it's the same for all elements.
+  QualType X;
+  if (!S.isStdInitializerList(AdjustedParamType, &X))
+    return false;
+
+  Result = Sema::TDK_Success;
+
+  // Recurse down into the init list.
+  for (unsigned i = 0, e = ILE->getNumInits(); i < e; ++i) {
+    if ((Result = DeduceTemplateArgumentByListElement(
+             S, TemplateParams, X, ILE->getInit(i), Info, Deduced, TDF)))
+      return true;
+  }
+
+  return true;
+}
+
 /// \brief Perform template argument deduction by matching a parameter type
 ///        against a single expression, where the expression is an element of
 ///        an initializer list that was originally matched against a parameter
@@ -3204,19 +3238,13 @@ DeduceTemplateArgumentByListElement(Sema &S,
   // Handle the case where an init list contains another init list as the
   // element.
   if (InitListExpr *ILE = dyn_cast<InitListExpr>(Arg)) {
-    QualType X;
-    if (!S.isStdInitializerList(ParamType.getNonReferenceType(), &X))
+    Sema::TemplateDeductionResult Result;
+    if (!DeduceFromInitializerList(S, TemplateParams,
+                                   ParamType.getNonReferenceType(), ILE, Info,
+                                   Deduced, TDF, Result))
       return Sema::TDK_Success; // Just ignore this expression.
 
-    // Recurse down into the init list.
-    for (unsigned i = 0, e = ILE->getNumInits(); i < e; ++i) {
-      if (Sema::TemplateDeductionResult Result =
-            DeduceTemplateArgumentByListElement(S, TemplateParams, X,
-                                                 ILE->getInit(i),
-                                                 Info, Deduced, TDF))
-        return Result;
-    }
-    return Sema::TDK_Success;
+    return Result;
   }
 
   // For all other cases, just match by type.
@@ -3335,22 +3363,14 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments(
 
       // If the argument is an initializer list ...
       if (InitListExpr *ILE = dyn_cast<InitListExpr>(Arg)) {
-        // ... then the parameter is an undeduced context, unless the parameter
-        // type is (reference to cv) std::initializer_list<P'>, in which case
-        // deduction is done for each element of the initializer list, and the
-        // result is the deduced type if it's the same for all elements.
-        QualType X;
+        TemplateDeductionResult Result;
         // Removing references was already done.
-        if (!isStdInitializerList(ParamType, &X))
+        if (!DeduceFromInitializerList(*this, TemplateParams, ParamType, ILE,
+                                       Info, Deduced, TDF, Result))
           continue;
 
-        for (unsigned i = 0, e = ILE->getNumInits(); i < e; ++i) {
-          if (TemplateDeductionResult Result =
-                DeduceTemplateArgumentByListElement(*this, TemplateParams, X,
-                                                     ILE->getInit(i),
-                                                     Info, Deduced, TDF))
-            return Result;
-        }
+        if (Result)
+          return Result;
         // Don't track the argument type, since an initializer list has none.
         continue;
       }
@@ -3406,19 +3426,15 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments(
 
       // As above, initializer lists need special handling.
       if (InitListExpr *ILE = dyn_cast<InitListExpr>(Arg)) {
-        QualType X;
-        if (!isStdInitializerList(ParamType, &X)) {
+        TemplateDeductionResult Result;
+        if (!DeduceFromInitializerList(*this, TemplateParams, ParamType, ILE,
+                                       Info, Deduced, TDF, Result)) {
           ++ArgIdx;
           break;
         }
 
-        for (unsigned i = 0, e = ILE->getNumInits(); i < e; ++i) {
-          if (TemplateDeductionResult Result =
-                DeduceTemplateArgumentsByTypeMatch(*this, TemplateParams, X,
-                                                   ILE->getInit(i)->getType(),
-                                                   Info, Deduced, TDF))
-            return Result;
-        }
+        if (Result)
+          return Result;
       } else {
 
         // Keep track of the argument type and corresponding argument index,
@@ -3952,6 +3968,8 @@ Sema::DeduceAutoType(TypeLoc Type, Expr *&Init, QualType &Result) {
       }
 
       QualType Deduced = BuildDecltypeType(Init, Init->getLocStart(), false);
+      if (Deduced.isNull())
+        return DAR_FailedAlreadyDiagnosed;
       // FIXME: Support a non-canonical deduced type for 'auto'.
       Deduced = Context.getCanonicalType(Deduced);
       Result = SubstituteAutoTransform(*this, Deduced).Apply(Type);
diff --git a/lib/Sema/SemaTemplateInstantiate.cpp b/lib/Sema/SemaTemplateInstantiate.cpp
index 82ff7c0..7d58017 100644
--- a/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/lib/Sema/SemaTemplateInstantiate.cpp
@@ -1680,11 +1680,24 @@ ParmVarDecl *Sema::SubstParmVarDecl(ParmVarDecl *OldParm,
   } else if (OldParm->hasUnparsedDefaultArg()) {
     NewParm->setUnparsedDefaultArg();
     UnparsedDefaultArgInstantiations[OldParm].push_back(NewParm);
-  } else if (Expr *Arg = OldParm->getDefaultArg())
-    // FIXME: if we non-lazily instantiated non-dependent default args for
-    // non-dependent parameter types we could remove a bunch of duplicate
-    // conversion warnings for such arguments.
-    NewParm->setUninstantiatedDefaultArg(Arg);
+  } else if (Expr *Arg = OldParm->getDefaultArg()) {
+    FunctionDecl *OwningFunc = cast<FunctionDecl>(OldParm->getDeclContext());
+    CXXRecordDecl *ClassD = dyn_cast<CXXRecordDecl>(OwningFunc->getDeclContext());
+    if (ClassD && ClassD->isLocalClass() && !ClassD->isLambda()) {
+      // If this is a method of a local class, as per DR1484 its default
+      // arguments must be instantiated.
+      Sema::ContextRAII SavedContext(*this, ClassD);
+      LocalInstantiationScope Local(*this);
+      ExprResult NewArg = SubstExpr(Arg, TemplateArgs);
+      if (NewArg.isUsable())
+        NewParm->setDefaultArg(NewArg.get());
+    } else {
+      // FIXME: if we non-lazily instantiated non-dependent default args for
+      // non-dependent parameter types we could remove a bunch of duplicate
+      // conversion warnings for such arguments.
+      NewParm->setUninstantiatedDefaultArg(Arg);
+    }
+  }
 
   NewParm->setHasInheritedDefaultArg(OldParm->hasInheritedDefaultArg());
   
@@ -2269,33 +2282,6 @@ bool Sema::InstantiateClassTemplateSpecialization(
   // Perform the actual instantiation on the canonical declaration.
   ClassTemplateSpec = cast<ClassTemplateSpecializationDecl>(
                                          ClassTemplateSpec->getCanonicalDecl());
-
-  // Check whether we have already instantiated or specialized this class
-  // template specialization.
-  if (ClassTemplateSpec->getSpecializationKind() != TSK_Undeclared) {
-    if (ClassTemplateSpec->getSpecializationKind() == 
-          TSK_ExplicitInstantiationDeclaration &&
-        TSK == TSK_ExplicitInstantiationDefinition) {
-      // An explicit instantiation definition follows an explicit instantiation
-      // declaration (C++0x [temp.explicit]p10); go ahead and perform the
-      // explicit instantiation.
-      ClassTemplateSpec->setSpecializationKind(TSK);
-      
-      // If this is an explicit instantiation definition, mark the
-      // vtable as used.
-      if (TSK == TSK_ExplicitInstantiationDefinition &&
-          !ClassTemplateSpec->isInvalidDecl())
-        MarkVTableUsed(PointOfInstantiation, ClassTemplateSpec, true);
-
-      return false;
-    }
-    
-    // We can only instantiate something that hasn't already been
-    // instantiated or specialized. Fail without any diagnostics: our
-    // caller will provide an error message.    
-    return true;
-  }
-
   if (ClassTemplateSpec->isInvalidDecl())
     return true;
   
diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp
index f35d1aa..aff2d1c 100644
--- a/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -2094,14 +2094,13 @@ static void collectUnexpandedParameterPacks(
     Sema &S,
     TemplateParameterList *Params,
     SmallVectorImpl<UnexpandedParameterPack> &Unexpanded) {
-  for (TemplateParameterList::const_iterator I = Params->begin(),
-                                             E = Params->end(); I != E; ++I) {
-    if ((*I)->isTemplateParameterPack())
+  for (const auto &P : *Params) {
+    if (P->isTemplateParameterPack())
       continue;
-    if (NonTypeTemplateParmDecl *NTTP = dyn_cast<NonTypeTemplateParmDecl>(*I))
+    if (NonTypeTemplateParmDecl *NTTP = dyn_cast<NonTypeTemplateParmDecl>(P))
       S.collectUnexpandedParameterPacks(NTTP->getTypeSourceInfo()->getTypeLoc(),
                                         Unexpanded);
-    if (TemplateTemplateParmDecl *TTP = dyn_cast<TemplateTemplateParmDecl>(*I))
+    if (TemplateTemplateParmDecl *TTP = dyn_cast<TemplateTemplateParmDecl>(P))
       collectUnexpandedParameterPacks(S, TTP->getTemplateParameters(),
                                       Unexpanded);
   }
@@ -2731,9 +2730,8 @@ TemplateDeclInstantiator::SubstTemplateParams(TemplateParameterList *L) {
   typedef SmallVector<NamedDecl *, 8> ParamVector;
   ParamVector Params;
   Params.reserve(N);
-  for (TemplateParameterList::iterator PI = L->begin(), PE = L->end();
-       PI != PE; ++PI) {
-    NamedDecl *D = cast_or_null<NamedDecl>(Visit(*PI));
+  for (auto &P : *L) {
+    NamedDecl *D = cast_or_null<NamedDecl>(Visit(P));
     Params.push_back(D);
     Invalid = Invalid || !D || D->isInvalidDecl();
   }
@@ -3246,10 +3244,18 @@ TemplateDeclInstantiator::InitFunctionInstantiation(FunctionDecl *New,
 
     // DR1330: In C++11, defer instantiation of a non-trivial
     // exception specification.
+    // DR1484: Local classes and their members are instantiated along with the
+    // containing function.
+    bool RequireInstantiation = false;
+    if (CXXRecordDecl *Cls = dyn_cast<CXXRecordDecl>(Tmpl->getDeclContext())) {
+      if (Cls->isLocalClass())
+        RequireInstantiation = true;
+    }
     if (SemaRef.getLangOpts().CPlusPlus11 &&
         EPI.ExceptionSpec.Type != EST_None &&
         EPI.ExceptionSpec.Type != EST_DynamicNone &&
-        EPI.ExceptionSpec.Type != EST_BasicNoexcept) {
+        EPI.ExceptionSpec.Type != EST_BasicNoexcept &&
+        !RequireInstantiation) {
       FunctionDecl *ExceptionSpecTemplate = Tmpl;
       if (EPI.ExceptionSpec.Type == EST_Uninstantiated)
         ExceptionSpecTemplate = EPI.ExceptionSpec.SourceTemplate;
diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp
index d72f259..8d76f69 100644
--- a/lib/Sema/SemaType.cpp
+++ b/lib/Sema/SemaType.cpp
@@ -2567,19 +2567,19 @@ namespace {
 IdentifierInfo *Sema::getNullabilityKeyword(NullabilityKind nullability) {
   switch (nullability) {
   case NullabilityKind::NonNull:
-    if (!Ident___nonnull)
-      Ident___nonnull = PP.getIdentifierInfo("__nonnull");
-    return Ident___nonnull;
+    if (!Ident__Nonnull)
+      Ident__Nonnull = PP.getIdentifierInfo("_Nonnull");
+    return Ident__Nonnull;
 
   case NullabilityKind::Nullable:
-    if (!Ident___nullable)
-      Ident___nullable = PP.getIdentifierInfo("__nullable");
-    return Ident___nullable;
+    if (!Ident__Nullable)
+      Ident__Nullable = PP.getIdentifierInfo("_Nullable");
+    return Ident__Nullable;
 
   case NullabilityKind::Unspecified:
-    if (!Ident___null_unspecified)
-      Ident___null_unspecified = PP.getIdentifierInfo("__null_unspecified");
-    return Ident___null_unspecified;
+    if (!Ident__Null_unspecified)
+      Ident__Null_unspecified = PP.getIdentifierInfo("_Null_unspecified");
+    return Ident__Null_unspecified;
   }
   llvm_unreachable("Unknown nullability kind.");
 }
@@ -2900,7 +2900,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
     }
   }
 
-  // Determine whether we should infer __nonnull on pointer types.
+  // Determine whether we should infer _Nonnull on pointer types.
   Optional<NullabilityKind> inferNullability;
   bool inferNullabilityCS = false;
   bool inferNullabilityInnerOnly = false;
@@ -3003,7 +3003,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
         break;
 
       case PointerDeclaratorKind::SingleLevelPointer:
-        // Infer __nonnull if we are in an assumes-nonnull region.
+        // Infer _Nonnull if we are in an assumes-nonnull region.
         if (inAssumeNonNullRegion) {
           inferNullability = NullabilityKind::NonNull;
           inferNullabilityCS = (context == Declarator::ObjCParameterContext ||
@@ -3013,7 +3013,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
 
       case PointerDeclaratorKind::CFErrorRefPointer:
       case PointerDeclaratorKind::NSErrorPointerPointer:
-        // Within a function or method signature, infer __nullable at both
+        // Within a function or method signature, infer _Nullable at both
         // levels.
         if (isFunctionOrMethod && inAssumeNonNullRegion)
           inferNullability = NullabilityKind::Nullable;
@@ -3023,7 +3023,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
         if (isFunctionOrMethod) {
           // On pointer-to-pointer parameters marked cf_returns_retained or
           // cf_returns_not_retained, if the outer pointer is explicit then
-          // infer the inner pointer as __nullable.
+          // infer the inner pointer as _Nullable.
           auto hasCFReturnsAttr = [](const AttributeList *NextAttr) -> bool {
             while (NextAttr) {
               if (NextAttr->getKind() == AttributeList::AT_CFReturnsRetained ||
@@ -3070,7 +3070,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
   }
 
   // Local function that checks the nullability for a given pointer declarator.
-  // Returns true if __nonnull was inferred.
+  // Returns true if _Nonnull was inferred.
   auto inferPointerNullability = [&](SimplePointerKind pointerKind,
                                      SourceLocation pointerLoc,
                                      AttributeList *&attrs) -> AttributeList * {
@@ -5084,8 +5084,7 @@ bool Sema::checkNullabilityTypeSpecifier(QualType &type,
       // Duplicated nullability.
       if (nullability == *existingNullability) {
         Diag(nullabilityLoc, diag::warn_nullability_duplicate)
-          << static_cast<unsigned>(nullability)
-          << isContextSensitive
+          << DiagNullabilityKind(nullability, isContextSensitive)
           << FixItHint::CreateRemoval(nullabilityLoc);
 
         break;
@@ -5093,10 +5092,8 @@ bool Sema::checkNullabilityTypeSpecifier(QualType &type,
 
       // Conflicting nullability.
       Diag(nullabilityLoc, diag::err_nullability_conflicting)
-        << static_cast<unsigned>(nullability) 
-        << isContextSensitive
-        << static_cast<unsigned>(*existingNullability)
-        << false;
+        << DiagNullabilityKind(nullability, isContextSensitive)
+        << DiagNullabilityKind(*existingNullability, false);
       return true;
     }
 
@@ -5110,10 +5107,8 @@ bool Sema::checkNullabilityTypeSpecifier(QualType &type,
   if (auto existingNullability = desugared->getNullability(Context)) {
     if (nullability != *existingNullability) {
       Diag(nullabilityLoc, diag::err_nullability_conflicting)
-        << static_cast<unsigned>(nullability)
-        << isContextSensitive
-        << static_cast<unsigned>(*existingNullability)
-        << false;
+        << DiagNullabilityKind(nullability, isContextSensitive)
+        << DiagNullabilityKind(*existingNullability, false);
 
       // Try to find the typedef with the existing nullability specifier.
       if (auto typedefType = desugared->getAs<TypedefType>()) {
@@ -5123,7 +5118,7 @@ bool Sema::checkNullabilityTypeSpecifier(QualType &type,
               = AttributedType::stripOuterNullability(underlyingType)) {
           if (*typedefNullability == *existingNullability) {
             Diag(typedefDecl->getLocation(), diag::note_nullability_here)
-              << static_cast<unsigned>(*existingNullability);
+              << DiagNullabilityKind(*existingNullability, false);
           }
         }
       }
@@ -5135,7 +5130,7 @@ bool Sema::checkNullabilityTypeSpecifier(QualType &type,
   // If this definitely isn't a pointer type, reject the specifier.
   if (!desugared->canHaveNullability()) {
     Diag(nullabilityLoc, diag::err_nullability_nonpointer)
-      << static_cast<unsigned>(nullability) << isContextSensitive << type;
+      << DiagNullabilityKind(nullability, isContextSensitive) << type;
     return true;
   }
   
@@ -5148,10 +5143,10 @@ bool Sema::checkNullabilityTypeSpecifier(QualType &type,
         pointeeType->isObjCObjectPointerType() ||
         pointeeType->isMemberPointerType()) {
       Diag(nullabilityLoc, diag::err_nullability_cs_multilevel)
-        << static_cast<unsigned>(nullability)
+        << DiagNullabilityKind(nullability, true)
         << type;
       Diag(nullabilityLoc, diag::note_nullability_type_specifier)
-        << static_cast<unsigned>(nullability)
+        << DiagNullabilityKind(nullability, false)
         << type
         << FixItHint::CreateReplacement(nullabilityLoc,
                                         getNullabilitySpelling(nullability));
@@ -5202,16 +5197,24 @@ static bool distributeNullabilityTypeAttr(TypeProcessingState &state,
 
     // Complain about the nullability qualifier being in the wrong
     // place.
-    unsigned pointerKind
-      = chunk.Kind == DeclaratorChunk::Pointer ? (inFunction ? 3 : 0)
-        : chunk.Kind == DeclaratorChunk::BlockPointer ? 1
-        : inFunction? 4 : 2;
+    enum {
+      PK_Pointer,
+      PK_BlockPointer,
+      PK_MemberPointer,
+      PK_FunctionPointer,
+      PK_MemberFunctionPointer,
+    } pointerKind
+      = chunk.Kind == DeclaratorChunk::Pointer ? (inFunction ? PK_FunctionPointer
+                                                             : PK_Pointer)
+        : chunk.Kind == DeclaratorChunk::BlockPointer ? PK_BlockPointer
+        : inFunction? PK_MemberFunctionPointer : PK_MemberPointer;
 
     auto diag = state.getSema().Diag(attr.getLoc(),
                                      diag::warn_nullability_declspec)
-      << static_cast<unsigned>(mapNullabilityAttrKind(attr.getKind()))
+      << DiagNullabilityKind(mapNullabilityAttrKind(attr.getKind()),
+                             attr.isContextSensitiveKeywordAttribute())
       << type
-      << pointerKind;
+      << static_cast<unsigned>(pointerKind);
 
     // FIXME: MemberPointer chunks don't carry the location of the *.
     if (chunk.Kind != DeclaratorChunk::MemberPointer) {
diff --git a/lib/Sema/TreeTransform.h b/lib/Sema/TreeTransform.h
index 73dde7c..80896be 100644
--- a/lib/Sema/TreeTransform.h
+++ b/lib/Sema/TreeTransform.h
@@ -1321,11 +1321,12 @@ public:
   /// Subclasses may override this routine to provide different behavior.
   StmtResult RebuildOMPExecutableDirective(OpenMPDirectiveKind Kind,
                                            DeclarationNameInfo DirName,
+                                           OpenMPDirectiveKind CancelRegion,
                                            ArrayRef<OMPClause *> Clauses,
                                            Stmt *AStmt, SourceLocation StartLoc,
                                            SourceLocation EndLoc) {
-    return getSema().ActOnOpenMPExecutableDirective(Kind, DirName, Clauses,
-                                                    AStmt, StartLoc, EndLoc);
+    return getSema().ActOnOpenMPExecutableDirective(
+        Kind, DirName, CancelRegion, Clauses, AStmt, StartLoc, EndLoc);
   }
 
   /// \brief Build a new OpenMP 'if' clause.
@@ -1551,6 +1552,19 @@ public:
                                             EndLoc);
   }
 
+  /// \brief Build a new OpenMP 'depend' pseudo clause.
+  ///
+  /// By default, performs semantic analysis to build the new OpenMP clause.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *
+  RebuildOMPDependClause(OpenMPDependClauseKind DepKind, SourceLocation DepLoc,
+                         SourceLocation ColonLoc, ArrayRef<Expr *> VarList,
+                         SourceLocation StartLoc, SourceLocation LParenLoc,
+                         SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPDependClause(DepKind, DepLoc, ColonLoc, VarList,
+                                             StartLoc, LParenLoc, EndLoc);
+  }
+
   /// \brief Rebuild the operand to an Objective-C \@synchronized statement.
   ///
   /// By default, performs semantic analysis to build the new statement.
@@ -5392,7 +5406,7 @@ QualType TreeTransform<Derived>::TransformAttributedType(
     if (auto nullability = oldType->getImmediateNullability()) {
       if (!modifiedType->canHaveNullability()) {
         SemaRef.Diag(TL.getAttrNameLoc(), diag::err_nullability_nonpointer)
-          << static_cast<unsigned>(*nullability) << false << modifiedType;
+          << DiagNullabilityKind(*nullability, false) << modifiedType;
         return QualType();
       }
     }
@@ -6664,7 +6678,9 @@ StmtResult TreeTransform<Derived>::TransformOMPExecutableDirective(
   for (ArrayRef<OMPClause *>::iterator I = Clauses.begin(), E = Clauses.end();
        I != E; ++I) {
     if (*I) {
+      getDerived().getSema().StartOpenMPClause((*I)->getClauseKind());
       OMPClause *Clause = getDerived().TransformOMPClause(*I);
+      getDerived().getSema().EndOpenMPClause();
       if (Clause)
         TClauses.push_back(Clause);
     } else {
@@ -6700,10 +6716,16 @@ StmtResult TreeTransform<Derived>::TransformOMPExecutableDirective(
     DirName = cast<OMPCriticalDirective>(D)->getDirectiveName();
     DirName = getDerived().TransformDeclarationNameInfo(DirName);
   }
+  OpenMPDirectiveKind CancelRegion = OMPD_unknown;
+  if (D->getDirectiveKind() == OMPD_cancellation_point) {
+    CancelRegion = cast<OMPCancellationPointDirective>(D)->getCancelRegion();
+  } else if (D->getDirectiveKind() == OMPD_cancel) {
+    CancelRegion = cast<OMPCancelDirective>(D)->getCancelRegion();
+  }
 
   return getDerived().RebuildOMPExecutableDirective(
-      D->getDirectiveKind(), DirName, TClauses, AssociatedStmt.get(),
-      D->getLocStart(), D->getLocEnd());
+      D->getDirectiveKind(), DirName, CancelRegion, TClauses,
+      AssociatedStmt.get(), D->getLocStart(), D->getLocEnd());
 }
 
 template <typename Derived>
@@ -6947,6 +6969,28 @@ TreeTransform<Derived>::TransformOMPTeamsDirective(OMPTeamsDirective *D) {
   return Res;
 }
 
+template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPCancellationPointDirective(
+    OMPCancellationPointDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_cancellation_point, DirName,
+                                             nullptr, D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
+template <typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformOMPCancelDirective(OMPCancelDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_cancel, DirName, nullptr,
+                                             D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
 //===----------------------------------------------------------------------===//
 // OpenMP clause transformation
 //===----------------------------------------------------------------------===//
@@ -7253,6 +7297,22 @@ OMPClause *TreeTransform<Derived>::TransformOMPFlushClause(OMPFlushClause *C) {
                                             C->getLParenLoc(), C->getLocEnd());
 }
 
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPDependClause(OMPDependClause *C) {
+  llvm::SmallVector<Expr *, 16> Vars;
+  Vars.reserve(C->varlist_size());
+  for (auto *VE : C->varlists()) {
+    ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
+    if (EVar.isInvalid())
+      return nullptr;
+    Vars.push_back(EVar.get());
+  }
+  return getDerived().RebuildOMPDependClause(
+      C->getDependencyKind(), C->getDependencyLoc(), C->getColonLoc(), Vars,
+      C->getLocStart(), C->getLParenLoc(), C->getLocEnd());
+}
+
 //===----------------------------------------------------------------------===//
 // Expression transformation
 //===----------------------------------------------------------------------===//
@@ -9368,7 +9428,8 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
     }
 
     // Capture the transformed variable.
-    getSema().tryCaptureVariable(CapturedVar, C->getLocation(), Kind);
+    getSema().tryCaptureVariable(CapturedVar, C->getLocation(), Kind,
+                                 EllipsisLoc);
   }
   if (!FinishedExplicitCaptures)
     getSema().finishLambdaExplicitCaptures(LSI);
diff --git a/lib/Serialization/ASTCommon.h b/lib/Serialization/ASTCommon.h
index 79d1817..f21e8a7 100644
--- a/lib/Serialization/ASTCommon.h
+++ b/lib/Serialization/ASTCommon.h
@@ -36,7 +36,8 @@ enum DeclUpdateKind {
   UPD_MANGLING_NUMBER,
   UPD_STATIC_LOCAL_NUMBER,
   UPD_DECL_MARKED_OPENMP_THREADPRIVATE,
-  UPD_DECL_EXPORTED
+  UPD_DECL_EXPORTED,
+  UPD_ADDED_ATTR_TO_RECORD
 };
 
 TypeIdx TypeIdxFromBuiltin(const BuiltinType *BT);
diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp
index d75b5eb..48a898c 100644
--- a/lib/Serialization/ASTReader.cpp
+++ b/lib/Serialization/ASTReader.cpp
@@ -209,6 +209,12 @@ static bool checkLanguageOptions(const LangOptions &LangOpts,
 #define BENIGN_ENUM_LANGOPT(Name, Type, Bits, Default, Description)
 #include "clang/Basic/LangOptions.def"
 
+  if (ExistingLangOpts.ModuleFeatures != LangOpts.ModuleFeatures) {
+    if (Diags)
+      Diags->Report(diag::err_pch_langopt_value_mismatch) << "module features";
+    return true;
+  }
+
   if (ExistingLangOpts.ObjCRuntime != LangOpts.ObjCRuntime) {
     if (Diags)
       Diags->Report(diag::err_pch_langopt_value_mismatch)
@@ -2307,7 +2313,8 @@ ASTReader::ReadControlBlock(ModuleFile &F,
     case INPUT_FILE_OFFSETS:
       NumInputs = Record[0];
       NumUserInputs = Record[1];
-      F.InputFileOffsets = (const uint64_t *)Blob.data();
+      F.InputFileOffsets =
+          (const llvm::support::unaligned_uint64_t *)Blob.data();
       F.InputFilesLoaded.resize(NumInputs);
       break;
     }
@@ -4262,6 +4269,7 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
         CurrentModule->setASTFile(F.File);
       }
 
+      CurrentModule->Signature = F.Signature;
       CurrentModule->IsFromModuleFile = true;
       CurrentModule->IsSystem = IsSystem || CurrentModule->IsSystem;
       CurrentModule->IsExternC = IsExternC;
@@ -4436,15 +4444,14 @@ bool ASTReader::ParseLanguageOptions(const RecordData &Record,
   LangOpts.Sanitize.set(SanitizerKind::ID, Record[Idx++]);
 #include "clang/Basic/Sanitizers.def"
 
+  for (unsigned N = Record[Idx++]; N; --N)
+    LangOpts.ModuleFeatures.push_back(ReadString(Record, Idx));
+
   ObjCRuntime::Kind runtimeKind = (ObjCRuntime::Kind) Record[Idx++];
   VersionTuple runtimeVersion = ReadVersionTuple(Record, Idx);
   LangOpts.ObjCRuntime = ObjCRuntime(runtimeKind, runtimeVersion);
-  
-  unsigned Length = Record[Idx++];
-  LangOpts.CurrentModule.assign(Record.begin() + Idx, 
-                                Record.begin() + Idx + Length);
 
-  Idx += Length;
+  LangOpts.CurrentModule = ReadString(Record, Idx);
 
   // Comment options.
   for (unsigned N = Record[Idx++]; N; --N) {
@@ -7360,6 +7367,37 @@ Module *ASTReader::getModule(unsigned ID) {
   return getSubmodule(ID);
 }
 
+ExternalASTSource::ASTSourceDescriptor
+ASTReader::getSourceDescriptor(const Module &M) {
+  StringRef Dir, Filename;
+  if (M.Directory)
+    Dir = M.Directory->getName();
+  if (auto *File = M.getASTFile())
+    Filename = File->getName();
+  return ASTReader::ASTSourceDescriptor{
+             M.getFullModuleName(), Dir, Filename,
+             M.Signature
+         };
+}
+
+llvm::Optional<ExternalASTSource::ASTSourceDescriptor>
+ASTReader::getSourceDescriptor(unsigned ID) {
+  if (const Module *M = getSubmodule(ID))
+    return getSourceDescriptor(*M);
+
+  // If there is only a single PCH, return it instead.
+  // Chained PCH are not suported.
+  if (ModuleMgr.size() == 1) {
+    ModuleFile &MF = ModuleMgr.getPrimaryModule();
+    return ASTReader::ASTSourceDescriptor{
+      MF.OriginalSourceFileName, MF.OriginalDir,
+      MF.FileName,
+      MF.Signature
+    };
+  }
+  return None;
+}
+
 Selector ASTReader::getLocalSelector(ModuleFile &M, unsigned LocalID) {
   return DecodeSelector(getGlobalSelectorID(M, LocalID));
 }
diff --git a/lib/Serialization/ASTReaderDecl.cpp b/lib/Serialization/ASTReaderDecl.cpp
index 00ebd3e..b23c33c 100644
--- a/lib/Serialization/ASTReaderDecl.cpp
+++ b/lib/Serialization/ASTReaderDecl.cpp
@@ -810,14 +810,14 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
       FunctionTemplateSpecializationInfo::Profile(ID, TemplArgs, C);
       void *InsertPos = nullptr;
       FunctionTemplateDecl::Common *CommonPtr = CanonTemplate->getCommonPtr();
-      CommonPtr->Specializations.FindNodeOrInsertPos(ID, InsertPos);
+      FunctionTemplateSpecializationInfo *ExistingInfo =
+          CommonPtr->Specializations.FindNodeOrInsertPos(ID, InsertPos);
       if (InsertPos)
         CommonPtr->Specializations.InsertNode(FTInfo, InsertPos);
       else {
         assert(Reader.getContext().getLangOpts().Modules &&
                "already deserialized this template specialization");
-        // FIXME: This specialization is a redeclaration of one from another
-        // module. Merge it.
+        mergeRedeclarable(FD, ExistingInfo->Function, Redecl);
       }
     }
     break;
@@ -839,8 +839,8 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
     
     FD->setDependentTemplateSpecialization(Reader.getContext(),
                                            TemplDecls, TemplArgs);
-
-    // FIXME: Merging.
+    // These are not merged; we don't need to merge redeclarations of dependent
+    // template friends.
     break;
   }
   }
@@ -1045,12 +1045,10 @@ void ASTDeclReader::VisitObjCPropertyDecl(ObjCPropertyDecl *D) {
   QualType T = Reader.readType(F, Record, Idx);
   TypeSourceInfo *TSI = GetTypeSourceInfo(Record, Idx);
   D->setType(T, TSI);
-  // FIXME: stable encoding
   D->setPropertyAttributes(
                       (ObjCPropertyDecl::PropertyAttributeKind)Record[Idx++]);
   D->setPropertyAttributesAsWritten(
                       (ObjCPropertyDecl::PropertyAttributeKind)Record[Idx++]);
-  // FIXME: stable encoding
   D->setPropertyImplementation(
                             (ObjCPropertyDecl::PropertyControl)Record[Idx++]);
   D->setGetterName(Reader.ReadDeclarationName(F,Record, Idx).getObjCSelector());
@@ -1768,9 +1766,6 @@ DeclID ASTDeclReader::VisitTemplateDecl(TemplateDecl *D) {
       = Reader.ReadTemplateParameterList(F, Record, Idx); 
   D->init(TemplatedDecl, TemplateParams);
 
-  // FIXME: If this is a redeclaration of a template from another module, handle
-  // inheritance of default template arguments.
-
   return PatternID;
 }
 
@@ -2622,7 +2617,6 @@ static bool isSameEntity(NamedDecl *X, NamedDecl *Y) {
     return NAX->getNamespace()->Equals(NAY->getNamespace());
   }
 
-  // FIXME: Many other cases to implement.
   return false;
 }
 
@@ -2772,9 +2766,6 @@ ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) {
     return Result;
   }
 
-  // FIXME: Bail out for non-canonical declarations. We will have performed any
-  // necessary merging already.
-
   DeclContext *DC = D->getDeclContext()->getRedeclContext();
   if (TypedefNameForLinkage) {
     auto It = Reader.ImportedTypedefNamesForLinkage.find(
@@ -3888,7 +3879,7 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile,
           Reader.Context, ReadSourceRange(Record, Idx)));
       break;
 
-    case UPD_DECL_EXPORTED:
+    case UPD_DECL_EXPORTED: {
       unsigned SubmoduleID = readSubmoduleID(Record, Idx);
       auto *Exported = cast<NamedDecl>(D);
       if (auto *TD = dyn_cast<TagDecl>(Exported))
@@ -3897,18 +3888,28 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile,
       if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
         // FIXME: This doesn't send the right notifications if there are
         // ASTMutationListeners other than an ASTWriter.
-        Reader.getContext().mergeDefinitionIntoModule(cast<NamedDecl>(D), Owner,
-                                                      /*NotifyListeners*/false);
-        Reader.PendingMergedDefinitionsToDeduplicate.insert(cast<NamedDecl>(D));
+        Reader.getContext().mergeDefinitionIntoModule(
+            cast<NamedDecl>(Exported), Owner,
+            /*NotifyListeners*/ false);
+        Reader.PendingMergedDefinitionsToDeduplicate.insert(
+            cast<NamedDecl>(Exported));
       } else if (Owner && Owner->NameVisibility != Module::AllVisible) {
         // If Owner is made visible at some later point, make this declaration
         // visible too.
-        Reader.HiddenNamesMap[Owner].push_back(D);
+        Reader.HiddenNamesMap[Owner].push_back(Exported);
       } else {
         // The declaration is now visible.
-        D->Hidden = false;
+        Exported->Hidden = false;
       }
       break;
     }
+
+    case UPD_ADDED_ATTR_TO_RECORD:
+      AttrVec Attrs;
+      Reader.ReadAttributes(F, Attrs, Record, Idx);
+      assert(Attrs.size() == 1);
+      D->addAttr(Attrs[0]);
+      break;
+    }
   }
 }
diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp
index c15f6b0..ca5d0be 100644
--- a/lib/Serialization/ASTReaderStmt.cpp
+++ b/lib/Serialization/ASTReaderStmt.cpp
@@ -1785,6 +1785,9 @@ OMPClause *OMPClauseReader::readClause() {
   case OMPC_flush:
     C = OMPFlushClause::CreateEmpty(Context, Record[Idx++]);
     break;
+  case OMPC_depend:
+    C = OMPDependClause::CreateEmpty(Context, Record[Idx++]);
+    break;
   }
   Visit(C);
   C->setLocStart(Reader->ReadSourceLocation(Record, Idx));
@@ -2049,6 +2052,19 @@ void OMPClauseReader::VisitOMPFlushClause(OMPFlushClause *C) {
   C->setVarRefs(Vars);
 }
 
+void OMPClauseReader::VisitOMPDependClause(OMPDependClause *C) {
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+  C->setDependencyKind(static_cast<OpenMPDependClauseKind>(Record[Idx++]));
+  C->setDependencyLoc(Reader->ReadSourceLocation(Record, Idx));
+  C->setColonLoc(Reader->ReadSourceLocation(Record, Idx));
+  unsigned NumVars = C->varlist_size();
+  SmallVector<Expr *, 16> Vars;
+  Vars.reserve(NumVars);
+  for (unsigned i = 0; i != NumVars; ++i)
+    Vars.push_back(Reader->Reader.ReadSubExpr());
+  C->setVarRefs(Vars);
+}
+
 //===----------------------------------------------------------------------===//
 // OpenMP Directives.
 //===----------------------------------------------------------------------===//
@@ -2233,6 +2249,19 @@ void ASTStmtReader::VisitOMPTeamsDirective(OMPTeamsDirective *D) {
   VisitOMPExecutableDirective(D);
 }
 
+void ASTStmtReader::VisitOMPCancellationPointDirective(
+    OMPCancellationPointDirective *D) {
+  VisitStmt(D);
+  VisitOMPExecutableDirective(D);
+  D->setCancelRegion(static_cast<OpenMPDirectiveKind>(Record[Idx++]));
+}
+
+void ASTStmtReader::VisitOMPCancelDirective(OMPCancelDirective *D) {
+  VisitStmt(D);
+  VisitOMPExecutableDirective(D);
+  D->setCancelRegion(static_cast<OpenMPDirectiveKind>(Record[Idx++]));
+}
+
 //===----------------------------------------------------------------------===//
 // ASTReader Implementation
 //===----------------------------------------------------------------------===//
@@ -2836,6 +2865,14 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
           Context, Record[ASTStmtReader::NumStmtFields], Empty);
       break;
 
+    case STMT_OMP_CANCELLATION_POINT_DIRECTIVE:
+      S = OMPCancellationPointDirective::CreateEmpty(Context, Empty);
+      break;
+
+    case STMT_OMP_CANCEL_DIRECTIVE:
+      S = OMPCancelDirective::CreateEmpty(Context, Empty);
+      break;
+
     case EXPR_CXX_OPERATOR_CALL:
       S = new (Context) CXXOperatorCallExpr(Context, Empty);
       break;
diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp
index 5bb0bec..0d15b17 100644
--- a/lib/Serialization/ASTWriter.cpp
+++ b/lib/Serialization/ASTWriter.cpp
@@ -1266,11 +1266,14 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
   Record.push_back(LangOpts.Sanitize.has(SanitizerKind::ID));
 #include "clang/Basic/Sanitizers.def"
 
+  Record.push_back(LangOpts.ModuleFeatures.size());
+  for (StringRef Feature : LangOpts.ModuleFeatures)
+    AddString(Feature, Record);
+
   Record.push_back((unsigned) LangOpts.ObjCRuntime.getKind());
   AddVersionTuple(LangOpts.ObjCRuntime.getVersion(), Record);
-  
-  Record.push_back(LangOpts.CurrentModule.size());
-  Record.append(LangOpts.CurrentModule.begin(), LangOpts.CurrentModule.end());
+
+  AddString(LangOpts.CurrentModule, Record);
 
   // Comment options.
   Record.push_back(LangOpts.CommentOpts.BlockCommandNames.size());
@@ -4618,6 +4621,10 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
       case UPD_DECL_EXPORTED:
         Record.push_back(getSubmoduleID(Update.getModule()));
         break;
+
+      case UPD_ADDED_ATTR_TO_RECORD:
+        WriteAttributes(llvm::makeArrayRef(Update.getAttr()), Record);
+        break;
       }
     }
 
@@ -5766,3 +5773,11 @@ void ASTWriter::RedefinedHiddenDefinition(const NamedDecl *D, Module *M) {
   assert(D->isHidden() && "expected a hidden declaration");
   DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_EXPORTED, M));
 }
+
+void ASTWriter::AddedAttributeToRecord(const Attr *Attr,
+                                       const RecordDecl *Record) {
+  assert(!WritingAST && "Already writing the AST!");
+  if (!Record->isFromASTFile())
+    return;
+  DeclUpdates[Record].push_back(DeclUpdate(UPD_ADDED_ATTR_TO_RECORD, Attr));
+}
diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp
index a461d3f..cddefac 100644
--- a/lib/Serialization/ASTWriterStmt.cpp
+++ b/lib/Serialization/ASTWriterStmt.cpp
@@ -1906,6 +1906,16 @@ void OMPClauseWriter::VisitOMPFlushClause(OMPFlushClause *C) {
     Writer->Writer.AddStmt(VE);
 }
 
+void OMPClauseWriter::VisitOMPDependClause(OMPDependClause *C) {
+  Record.push_back(C->varlist_size());
+  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.push_back(C->getDependencyKind());
+  Writer->Writer.AddSourceLocation(C->getDependencyLoc(), Record);
+  Writer->Writer.AddSourceLocation(C->getColonLoc(), Record);
+  for (auto *VE : C->varlists())
+    Writer->Writer.AddStmt(VE);
+}
+
 //===----------------------------------------------------------------------===//
 // OpenMP Directives.
 //===----------------------------------------------------------------------===//
@@ -2097,6 +2107,21 @@ void ASTStmtWriter::VisitOMPTeamsDirective(OMPTeamsDirective *D) {
   Code = serialization::STMT_OMP_TEAMS_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPCancellationPointDirective(
+    OMPCancellationPointDirective *D) {
+  VisitStmt(D);
+  VisitOMPExecutableDirective(D);
+  Record.push_back(D->getCancelRegion());
+  Code = serialization::STMT_OMP_CANCELLATION_POINT_DIRECTIVE;
+}
+
+void ASTStmtWriter::VisitOMPCancelDirective(OMPCancelDirective *D) {
+  VisitStmt(D);
+  VisitOMPExecutableDirective(D);
+  Record.push_back(D->getCancelRegion());
+  Code = serialization::STMT_OMP_CANCEL_DIRECTIVE;
+}
+
 //===----------------------------------------------------------------------===//
 // ASTWriter Implementation
 //===----------------------------------------------------------------------===//
diff --git a/lib/Serialization/GeneratePCH.cpp b/lib/Serialization/GeneratePCH.cpp
index c50b3f9..c461fe9 100644
--- a/lib/Serialization/GeneratePCH.cpp
+++ b/lib/Serialization/GeneratePCH.cpp
@@ -19,7 +19,6 @@
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/SemaConsumer.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
-#include "llvm/Support/raw_ostream.h"
 #include <string>
 
 using namespace clang;
diff --git a/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index cb5b010..557439b 100644
--- a/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -76,11 +76,10 @@ void ArrayBoundChecker::checkLocation(SVal l, bool isLoad, const Stmt* LoadS,
     // reference is outside the range.
 
     // Generate a report for this bug.
-    BugReport *report = 
-      new BugReport(*BT, BT->getDescription(), N);
+    auto report = llvm::make_unique<BugReport>(*BT, BT->getDescription(), N);
 
     report->addRange(LoadS->getSourceRange());
-    C.emitReport(report);
+    C.emitReport(std::move(report));
     return;
   }
   
diff --git a/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index e462e2b..d8dc2aa 100644
--- a/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -207,7 +207,8 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
     break;
   }
 
-  checkerContext.emitReport(new BugReport(*BT, os.str(), errorNode));
+  checkerContext.emitReport(
+      llvm::make_unique<BugReport>(*BT, os.str(), errorNode));
 }
 
 void RegionRawOffsetV2::dump() const {
diff --git a/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
index 3fd5576..f763284 100644
--- a/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
+++ b/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
@@ -207,10 +207,10 @@ void NilArgChecker::generateBugReport(ExplodedNode *N,
   if (!BT)
     BT.reset(new APIMisuse(this, "nil argument"));
 
-  BugReport *R = new BugReport(*BT, Msg, N);
+  auto R = llvm::make_unique<BugReport>(*BT, Msg, N);
   R->addRange(Range);
   bugreporter::trackNullOrUndefValue(N, E, *R);
-  C.emitReport(R);
+  C.emitReport(std::move(R));
 }
 
 void NilArgChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
@@ -516,9 +516,9 @@ void CFNumberCreateChecker::checkPreStmt(const CallExpr *CE,
     if (!BT)
       BT.reset(new APIMisuse(this, "Bad use of CFNumberCreate"));
 
-    BugReport *report = new BugReport(*BT, os.str(), N);
+    auto report = llvm::make_unique<BugReport>(*BT, os.str(), N);
     report->addRange(CE->getArg(2)->getSourceRange());
-    C.emitReport(report);
+    C.emitReport(std::move(report));
   }
 }
 
@@ -605,10 +605,10 @@ void CFRetainReleaseChecker::checkPreStmt(const CallExpr *CE,
     else
       llvm_unreachable("impossible case");
 
-    BugReport *report = new BugReport(*BT, description, N);
+    auto report = llvm::make_unique<BugReport>(*BT, description, N);
     report->addRange(Arg->getSourceRange());
     bugreporter::trackNullOrUndefValue(N, Arg, *report);
-    C.emitReport(report);
+    C.emitReport(std::move(report));
     return;
   }
 
@@ -666,9 +666,9 @@ void ClassReleaseChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
           "of class '" << Class->getName()
        << "' and not the class directly";
   
-    BugReport *report = new BugReport(*BT, os.str(), N);
+    auto report = llvm::make_unique<BugReport>(*BT, os.str(), N);
     report->addRange(msg.getSourceRange());
-    C.emitReport(report);
+    C.emitReport(std::move(report));
   }
 }
 
@@ -819,9 +819,9 @@ void VariadicMethodTypeChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
     ArgTy.print(os, C.getLangOpts());
     os << "'";
 
-    BugReport *R = new BugReport(*BT, os.str(), errorNode.getValue());
+    auto R = llvm::make_unique<BugReport>(*BT, os.str(), errorNode.getValue());
     R->addRange(msg.getArgSourceRange(I));
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp b/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp
index 83a37c9..e945c38 100644
--- a/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp
@@ -35,7 +35,7 @@ void BoolAssignmentChecker::emitReport(ProgramStateRef state,
   if (ExplodedNode *N = C.addTransition(state)) {
     if (!BT)
       BT.reset(new BuiltinBug(this, "Assignment of a non-Boolean value"));
-    C.emitReport(new BugReport(*BT, BT->getDescription(), N));
+    C.emitReport(llvm::make_unique<BugReport>(*BT, BT->getDescription(), N));
   }
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 0f5741b..54b1241 100644
--- a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -245,11 +245,11 @@ ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
 
     // Generate a report for this bug.
     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
-    BugReport *report = new BugReport(*BT, os.str(), N);
+    auto report = llvm::make_unique<BugReport>(*BT, os.str(), N);
 
     report->addRange(S->getSourceRange());
     bugreporter::trackNullOrUndefValue(N, S, *report);
-    C.emitReport(report);
+    C.emitReport(std::move(report));
     return nullptr;
   }
 
@@ -304,9 +304,9 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
 
     // Generate a report for this bug.
-    BugReport *report;
+    std::unique_ptr<BugReport> report;
     if (warningMsg) {
-      report = new BugReport(*BT, warningMsg, N);
+      report = llvm::make_unique<BugReport>(*BT, warningMsg, N);
     } else {
       assert(CurrentFunctionDescription);
       assert(CurrentFunctionDescription[0] != '\0');
@@ -316,7 +316,7 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
       os << toUppercase(CurrentFunctionDescription[0])
          << &CurrentFunctionDescription[1]
          << " accesses out-of-bound array element";
-      report = new BugReport(*BT, os.str(), N);      
+      report = llvm::make_unique<BugReport>(*BT, os.str(), N);
     }
 
     // FIXME: It would be nice to eventually make this diagnostic more clear,
@@ -324,7 +324,7 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
     // reference is outside the range.
 
     report->addRange(S->getSourceRange());
-    C.emitReport(report);
+    C.emitReport(std::move(report));
     return nullptr;
   }
   
@@ -534,13 +534,12 @@ void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
                                  categories::UnixAPI, "Improper arguments"));
 
   // Generate a report for this bug.
-  BugReport *report = 
-    new BugReport(*BT_Overlap,
-      "Arguments must not be overlapping buffers", N);
+  auto report = llvm::make_unique<BugReport>(
+      *BT_Overlap, "Arguments must not be overlapping buffers", N);
   report->addRange(First->getSourceRange());
   report->addRange(Second->getSourceRange());
 
-  C.emitReport(report);
+  C.emitReport(std::move(report));
 }
 
 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
@@ -603,8 +602,8 @@ ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
         "be represented as a size_t";
 
       // Generate a report for this bug.
-      BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N);
-      C.emitReport(report);        
+      C.emitReport(
+          llvm::make_unique<BugReport>(*BT_AdditionOverflow, warning, N));
 
       return nullptr;
     }
@@ -721,10 +720,10 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
            << "', which is not a null-terminated string";
 
         // Generate a report for this bug.
-        BugReport *report = new BugReport(*BT_NotCString, os.str(), N);
+        auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
 
         report->addRange(Ex->getSourceRange());
-        C.emitReport(report);        
+        C.emitReport(std::move(report));        
       }
       return UndefinedVal();
 
@@ -785,11 +784,10 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
         os << "not a null-terminated string";
 
       // Generate a report for this bug.
-      BugReport *report = new BugReport(*BT_NotCString,
-                                                        os.str(), N);
+      auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
 
       report->addRange(Ex->getSourceRange());
-      C.emitReport(report);        
+      C.emitReport(std::move(report));        
     }
 
     return UndefinedVal();
diff --git a/lib/StaticAnalyzer/Checkers/CStringSyntaxChecker.cpp b/lib/StaticAnalyzer/Checkers/CStringSyntaxChecker.cpp
index abfb971d..3db1994 100644
--- a/lib/StaticAnalyzer/Checkers/CStringSyntaxChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CStringSyntaxChecker.cpp
@@ -168,10 +168,9 @@ void WalkAST::VisitCallExpr(CallExpr *CE) {
 }
 
 void WalkAST::VisitChildren(Stmt *S) {
-  for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I != E;
-      ++I)
-    if (Stmt *child = *I)
-      Visit(child);
+  for (Stmt *Child : S->children())
+    if (Child)
+      Visit(Child);
 }
 
 namespace {
diff --git a/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp b/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
index adb7a54..26423b7 100644
--- a/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
@@ -94,14 +94,14 @@ void CallAndMessageChecker::emitBadCall(BugType *BT, CheckerContext &C,
   if (!N)
     return;
 
-  BugReport *R = new BugReport(*BT, BT->getName(), N);
+  auto R = llvm::make_unique<BugReport>(*BT, BT->getName(), N);
   if (BadE) {
     R->addRange(BadE->getSourceRange());
     if (BadE->isGLValue())
       BadE = bugreporter::getDerefExpr(BadE);
     bugreporter::trackNullOrUndefValue(N, BadE, *R);
   }
-  C.emitReport(R);
+  C.emitReport(std::move(R));
 }
 
 static StringRef describeUninitializedArgumentInCall(const CallEvent &Call,
@@ -164,12 +164,12 @@ bool CallAndMessageChecker::uninitRefOrPointer(CheckerContext &C,
     if (PSV.isUndef()) {
       if (ExplodedNode *N = C.generateSink()) {
         LazyInit_BT(BD, BT);
-        BugReport *R = new BugReport(*BT, Message, N);
+        auto R = llvm::make_unique<BugReport>(*BT, Message, N);
         R->addRange(ArgRange);
         if (ArgEx) {
           bugreporter::trackNullOrUndefValue(N, ArgEx, *R);
         }
-        C.emitReport(R);
+        C.emitReport(std::move(R));
       }
       return true;
     }
@@ -199,11 +199,11 @@ bool CallAndMessageChecker::PreVisitProcessArg(CheckerContext &C,
       // Generate a report for this bug.
       StringRef Desc =
           describeUninitializedArgumentInCall(Call, IsFirstArgument);
-      BugReport *R = new BugReport(*BT, Desc, N);
+      auto R = llvm::make_unique<BugReport>(*BT, Desc, N);
       R->addRange(ArgRange);
       if (ArgEx)
         bugreporter::trackNullOrUndefValue(N, ArgEx, *R);
-      C.emitReport(R);
+      C.emitReport(std::move(R));
     }
     return true;
   }
@@ -281,12 +281,12 @@ bool CallAndMessageChecker::PreVisitProcessArg(CheckerContext &C,
         }
 
         // Generate a report for this bug.
-        BugReport *R = new BugReport(*BT, os.str(), N);
+        auto R = llvm::make_unique<BugReport>(*BT, os.str(), N);
         R->addRange(ArgRange);
 
         // FIXME: enhance track back for uninitialized value for arbitrary
         // memregions
-        C.emitReport(R);
+        C.emitReport(std::move(R));
       }
       return true;
     }
@@ -342,9 +342,9 @@ void CallAndMessageChecker::checkPreStmt(const CXXDeleteExpr *DE,
     else
       Desc = "Argument to 'delete' is uninitialized";
     BugType *BT = BT_cxx_delete_undef.get();
-    BugReport *R = new BugReport(*BT, Desc, N);
+    auto R = llvm::make_unique<BugReport>(*BT, Desc, N);
     bugreporter::trackNullOrUndefValue(N, DE, *R);
-    C.emitReport(R);
+    C.emitReport(std::move(R));
     return;
   }
 }
@@ -400,8 +400,8 @@ void CallAndMessageChecker::checkPreCall(const CallEvent &Call,
          << (Params == 1 ? "" : "s") << " is called with less ("
          << Call.getNumArgs() << ")";
 
-      BugReport *R = new BugReport(*BT_call_few_args, os.str(), N);
-      C.emitReport(R);
+      C.emitReport(
+          llvm::make_unique<BugReport>(*BT_call_few_args, os.str(), N));
     }
   }
 
@@ -461,14 +461,14 @@ void CallAndMessageChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
       }
       assert(BT && "Unknown message kind.");
 
-      BugReport *R = new BugReport(*BT, BT->getName(), N);
+      auto R = llvm::make_unique<BugReport>(*BT, BT->getName(), N);
       const ObjCMessageExpr *ME = msg.getOriginExpr();
       R->addRange(ME->getReceiverRange());
 
       // FIXME: getTrackNullOrUndefValueVisitor can't handle "super" yet.
       if (const Expr *ReceiverE = ME->getInstanceReceiver())
         bugreporter::trackNullOrUndefValue(N, ReceiverE, *R);
-      C.emitReport(R);
+      C.emitReport(std::move(R));
     }
     return;
   } else {
@@ -512,13 +512,13 @@ void CallAndMessageChecker::emitNilReceiverBug(CheckerContext &C,
     os << "' that will be garbage";
   }
 
-  BugReport *report = new BugReport(*BT_msg_ret, os.str(), N);
+  auto report = llvm::make_unique<BugReport>(*BT_msg_ret, os.str(), N);
   report->addRange(ME->getReceiverRange());
   // FIXME: This won't track "self" in messages to super.
   if (const Expr *receiver = ME->getInstanceReceiver()) {
     bugreporter::trackNullOrUndefValue(N, receiver, *report);
   }
-  C.emitReport(report);
+  C.emitReport(std::move(report));
 }
 
 static bool supportsNilWithFloatRet(const llvm::Triple &triple) {
diff --git a/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp b/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp
index 3ba063d..0d683f9 100644
--- a/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp
@@ -136,9 +136,9 @@ void CastSizeChecker::checkPreStmt(const CastExpr *CE,CheckerContext &C) const {
       BT.reset(new BuiltinBug(this, "Cast region with wrong size.",
                                     "Cast a region whose size is not a multiple"
                                     " of the destination type size."));
-    BugReport *R = new BugReport(*BT, BT->getDescription(), errorNode);
+    auto R = llvm::make_unique<BugReport>(*BT, BT->getDescription(), errorNode);
     R->addRange(CE->getSourceRange());
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp b/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp
index d765315..ba3024d 100644
--- a/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp
@@ -63,9 +63,9 @@ void CastToStructChecker::checkPreStmt(const CastExpr *CE,
                            "Casting a non-structure type to a structure type "
                            "and accessing a field can lead to memory access "
                            "errors or data corruption."));
-      BugReport *R = new BugReport(*BT,BT->getDescription(), N);
+      auto R = llvm::make_unique<BugReport>(*BT, BT->getDescription(), N);
       R->addRange(CE->getSourceRange());
-      C.emitReport(R);
+      C.emitReport(std::move(R));
     }
   }
 }
diff --git a/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp b/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp
index 339af8f..12eb0bd 100644
--- a/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp
+++ b/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp
@@ -69,8 +69,8 @@ static bool scan_ivar_release(Stmt *S, ObjCIvarDecl *ID,
             }
 
   // Recurse to children.
-  for (Stmt::child_iterator I = S->child_begin(), E= S->child_end(); I!=E; ++I)
-    if (*I && scan_ivar_release(*I, ID, PD, Release, SelfII, Ctx))
+  for (Stmt *SubStmt : S->children())
+    if (SubStmt && scan_ivar_release(SubStmt, ID, PD, Release, SelfII, Ctx))
       return true;
 
   return false;
diff --git a/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp b/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
index 0beb917..e0c113c 100644
--- a/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
+++ b/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
@@ -109,9 +109,9 @@ public:
 //===----------------------------------------------------------------------===//
 
 void WalkAST::VisitChildren(Stmt *S) {
-  for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I!=E; ++I)
-    if (Stmt *child = *I)
-      Visit(child);
+  for (Stmt *Child : S->children())
+    if (Child)
+      Visit(Child);
 }
 
 void WalkAST::VisitCallExpr(CallExpr *CE) {
@@ -162,11 +162,11 @@ void WalkAST::VisitCallExpr(CallExpr *CE) {
 }
 
 void WalkAST::VisitCompoundStmt(CompoundStmt *S) {
-  for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I!=E; ++I)
-    if (Stmt *child = *I) {
-      if (CallExpr *CE = dyn_cast<CallExpr>(child))
+  for (Stmt *Child : S->children())
+    if (Child) {
+      if (CallExpr *CE = dyn_cast<CallExpr>(Child))
         checkUncheckedReturnValue(CE);
-      Visit(child);
+      Visit(Child);
     }
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp b/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp
index a61e658..81a2006 100644
--- a/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp
+++ b/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp
@@ -37,9 +37,9 @@ public:
 }
 
 void WalkAST::VisitChildren(Stmt *S) {
-  for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I!=E; ++I)
-    if (Stmt *child = *I)
-      Visit(child);
+  for (Stmt *Child : S->children())
+    if (Child)
+      Visit(Child);
 }
 
 // CWE-467: Use of sizeof() on a Pointer Type
diff --git a/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp b/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
index ad41577..804e83c 100644
--- a/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
@@ -145,11 +145,10 @@ void ChrootChecker::checkPreStmt(const CallExpr *CE, CheckerContext &C) const {
           BT_BreakJail.reset(new BuiltinBug(
               this, "Break out of jail", "No call of chdir(\"/\") immediately "
                                          "after chroot"));
-        BugReport *R = new BugReport(*BT_BreakJail, 
-                                     BT_BreakJail->getDescription(), N);
-        C.emitReport(R);
+        C.emitReport(llvm::make_unique<BugReport>(
+            *BT_BreakJail, BT_BreakJail->getDescription(), N));
       }
-  
+
   return;
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp b/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
index 2e442c7..2ba7ea4 100644
--- a/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
@@ -160,10 +160,8 @@ void DereferenceChecker::reportBug(ProgramStateRef State, const Stmt *S,
   }
 
   os.flush();
-  BugReport *report =
-    new BugReport(*BT_null,
-                  buf.empty() ? BT_null->getDescription() : StringRef(buf),
-                  N);
+  auto report = llvm::make_unique<BugReport>(
+      *BT_null, buf.empty() ? BT_null->getDescription() : StringRef(buf), N);
 
   bugreporter::trackNullOrUndefValue(N, bugreporter::getDerefExpr(S), *report);
 
@@ -171,7 +169,7 @@ void DereferenceChecker::reportBug(ProgramStateRef State, const Stmt *S,
        I = Ranges.begin(), E = Ranges.end(); I!=E; ++I)
     report->addRange(*I);
 
-  C.emitReport(report);
+  C.emitReport(std::move(report));
 }
 
 void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
@@ -183,11 +181,11 @@ void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
         BT_undef.reset(
             new BuiltinBug(this, "Dereference of undefined pointer value"));
 
-      BugReport *report =
-        new BugReport(*BT_undef, BT_undef->getDescription(), N);
+      auto report =
+          llvm::make_unique<BugReport>(*BT_undef, BT_undef->getDescription(), N);
       bugreporter::trackNullOrUndefValue(N, bugreporter::getDerefExpr(S),
                                          *report);
-      C.emitReport(report);
+      C.emitReport(std::move(report));
     }
     return;
   }
diff --git a/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp b/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp
index 0bcebf6..a71def2 100644
--- a/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp
+++ b/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp
@@ -78,9 +78,9 @@ class DirectIvarAssignment :
     void VisitBinaryOperator(const BinaryOperator *BO);
 
     void VisitChildren(const Stmt *S) {
-      for (Stmt::const_child_range I = S->children(); I; ++I)
-        if (*I)
-         this->Visit(*I);
+      for (const Stmt *Child : S->children())
+        if (Child)
+          this->Visit(Child);
     }
   };
 
diff --git a/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp b/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
index e060c36..79f9479b 100644
--- a/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -39,9 +39,9 @@ void DivZeroChecker::reportBug(const char *Msg,
     if (!BT)
       BT.reset(new BuiltinBug(this, "Division by zero"));
 
-    BugReport *R = new BugReport(*BT, Msg, N);
+    auto R = llvm::make_unique<BugReport>(*BT, Msg, N);
     bugreporter::trackNullOrUndefValue(N, bugreporter::GetDenomExpr(N), *R);
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp b/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
index f36ec2c..7dc0a87 100644
--- a/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
@@ -97,8 +97,8 @@ void ExprInspectionChecker::analyzerEval(const CallExpr *CE,
   if (!BT)
     BT.reset(new BugType(this, "Checking analyzer assumptions", "debug"));
 
-  BugReport *R = new BugReport(*BT, getArgumentValueString(CE, C), N);
-  C.emitReport(R);
+  C.emitReport(
+      llvm::make_unique<BugReport>(*BT, getArgumentValueString(CE, C), N));
 }
 
 void ExprInspectionChecker::analyzerWarnIfReached(const CallExpr *CE,
@@ -108,8 +108,7 @@ void ExprInspectionChecker::analyzerWarnIfReached(const CallExpr *CE,
   if (!BT)
     BT.reset(new BugType(this, "Checking analyzer assumptions", "debug"));
 
-  BugReport *R = new BugReport(*BT, "REACHABLE", N);
-  C.emitReport(R);
+  C.emitReport(llvm::make_unique<BugReport>(*BT, "REACHABLE", N));
 }
 
 void ExprInspectionChecker::analyzerCheckInlined(const CallExpr *CE,
@@ -128,8 +127,8 @@ void ExprInspectionChecker::analyzerCheckInlined(const CallExpr *CE,
   if (!BT)
     BT.reset(new BugType(this, "Checking analyzer assumptions", "debug"));
 
-  BugReport *R = new BugReport(*BT, getArgumentValueString(CE, C), N);
-  C.emitReport(R);
+  C.emitReport(
+      llvm::make_unique<BugReport>(*BT, getArgumentValueString(CE, C), N));
 }
 
 void ExprInspectionChecker::analyzerCrash(const CallExpr *CE,
diff --git a/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp b/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp
index 60bb036..48d6bd4 100644
--- a/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp
@@ -57,9 +57,9 @@ void FixedAddressChecker::checkPreStmt(const BinaryOperator *B,
                          "Using a fixed address is not portable because that "
                          "address will probably not be valid in all "
                          "environments or platforms."));
-    BugReport *R = new BugReport(*BT, BT->getDescription(), N);
+    auto R = llvm::make_unique<BugReport>(*BT, BT->getDescription(), N);
     R->addRange(B->getRHS()->getSourceRange());
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 275481f..2cf508f 100644
--- a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -642,9 +642,9 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
   // Generate diagnostic.
   if (ExplodedNode *N = C.addTransition()) {
     initBugType();
-    BugReport *report = new BugReport(*BT, Msg, N);
+    auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
     report->addRange(E->getSourceRange());
-    C.emitReport(report);
+    C.emitReport(std::move(report));
     return true;
   }
   return false;
diff --git a/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp b/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp
index 02c1209..3df5fa0 100644
--- a/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp
@@ -167,9 +167,9 @@ class IvarInvalidationCheckerImpl {
     void VisitObjCMessageExpr(const ObjCMessageExpr *ME);
 
     void VisitChildren(const Stmt *S) {
-      for (Stmt::const_child_range I = S->children(); I; ++I) {
-        if (*I)
-          this->Visit(*I);
+      for (const Stmt *Child : S->children()) {
+        if (Child)
+          this->Visit(Child);
         if (CalledAnotherInvalidationMethod)
           return;
       }
diff --git a/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp b/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp
index 0b7375a..4e3f9b7 100644
--- a/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp
@@ -124,10 +124,9 @@ public:
                           const CheckerBase *checker)
       : DeclWithIssue(declWithIssue), BR(br), Checker(checker) {}
   void VisitChildren(Stmt *S) {
-    for (Stmt::child_iterator I = S->child_begin(), E = S->child_end() ;
-      I != E; ++I)
-      if (Stmt *child = *I)
-        Visit(child);
+    for (Stmt *Child : S->children())
+      if (Child)
+        Visit(Child);
   }
   void VisitStmt(Stmt *S) { VisitChildren(S); }
   void VisitDeclStmt(DeclStmt *DS);
diff --git a/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp b/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
index 52e2936..7838901 100644
--- a/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
@@ -103,9 +103,8 @@ private:
   const ExplodedNode *getAllocationNode(const ExplodedNode *N, SymbolRef Sym,
                                         CheckerContext &C) const;
 
-  BugReport *generateAllocatedDataNotReleasedReport(const AllocationPair &AP,
-                                                    ExplodedNode *N,
-                                                    CheckerContext &C) const;
+  std::unique_ptr<BugReport> generateAllocatedDataNotReleasedReport(
+      const AllocationPair &AP, ExplodedNode *N, CheckerContext &C) const;
 
   /// Check if RetSym evaluates to an error value in the current state.
   bool definitelyReturnedError(SymbolRef RetSym,
@@ -269,11 +268,11 @@ void MacOSKeychainAPIChecker::
 
   os << "Deallocator doesn't match the allocator: '"
      << FunctionsToTrack[PDeallocIdx].Name << "' should be used.";
-  BugReport *Report = new BugReport(*BT, os.str(), N);
+  auto Report = llvm::make_unique<BugReport>(*BT, os.str(), N);
   Report->addVisitor(llvm::make_unique<SecKeychainBugVisitor>(AP.first));
   Report->addRange(ArgExpr->getSourceRange());
-  markInteresting(Report, AP);
-  C.emitReport(Report);
+  markInteresting(Report.get(), AP);
+  C.emitReport(std::move(Report));
 }
 
 void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE,
@@ -314,11 +313,11 @@ void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE,
               << "the allocator: missing a call to '"
               << FunctionsToTrack[DIdx].Name
               << "'.";
-          BugReport *Report = new BugReport(*BT, os.str(), N);
+          auto Report = llvm::make_unique<BugReport>(*BT, os.str(), N);
           Report->addVisitor(llvm::make_unique<SecKeychainBugVisitor>(V));
           Report->addRange(ArgExpr->getSourceRange());
           Report->markInteresting(AS->Region);
-          C.emitReport(Report);
+          C.emitReport(std::move(Report));
         }
       }
     return;
@@ -370,12 +369,12 @@ void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE,
     if (!N)
       return;
     initBugType();
-    BugReport *Report = new BugReport(*BT,
-        "Trying to free data which has not been allocated.", N);
+    auto Report = llvm::make_unique<BugReport>(
+        *BT, "Trying to free data which has not been allocated.", N);
     Report->addRange(ArgExpr->getSourceRange());
     if (AS)
       Report->markInteresting(AS->Region);
-    C.emitReport(Report);
+    C.emitReport(std::move(Report));
     return;
   }
 
@@ -436,12 +435,12 @@ void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE,
     if (!N)
       return;
     initBugType();
-    BugReport *Report = new BugReport(*BT,
-        "Only call free if a valid (non-NULL) buffer was returned.", N);
+    auto Report = llvm::make_unique<BugReport>(
+        *BT, "Only call free if a valid (non-NULL) buffer was returned.", N);
     Report->addVisitor(llvm::make_unique<SecKeychainBugVisitor>(ArgSM));
     Report->addRange(ArgExpr->getSourceRange());
     Report->markInteresting(AS->Region);
-    C.emitReport(Report);
+    C.emitReport(std::move(Report));
     return;
   }
 
@@ -519,10 +518,9 @@ MacOSKeychainAPIChecker::getAllocationNode(const ExplodedNode *N,
   return AllocNode;
 }
 
-BugReport *MacOSKeychainAPIChecker::
-  generateAllocatedDataNotReleasedReport(const AllocationPair &AP,
-                                         ExplodedNode *N,
-                                         CheckerContext &C) const {
+std::unique_ptr<BugReport>
+MacOSKeychainAPIChecker::generateAllocatedDataNotReleasedReport(
+    const AllocationPair &AP, ExplodedNode *N, CheckerContext &C) const {
   const ADFunctionInfo &FI = FunctionsToTrack[AP.second->AllocatorIdx];
   initBugType();
   SmallString<70> sbuf;
@@ -547,11 +545,12 @@ BugReport *MacOSKeychainAPIChecker::
                                               C.getSourceManager(),
                                               AllocNode->getLocationContext());
 
-  BugReport *Report = new BugReport(*BT, os.str(), N, LocUsedForUniqueing,
-                                   AllocNode->getLocationContext()->getDecl());
+  auto Report =
+      llvm::make_unique<BugReport>(*BT, os.str(), N, LocUsedForUniqueing,
+                                  AllocNode->getLocationContext()->getDecl());
 
   Report->addVisitor(llvm::make_unique<SecKeychainBugVisitor>(AP.first));
-  markInteresting(Report, AP);
+  markInteresting(Report.get(), AP);
   return Report;
 }
 
@@ -589,10 +588,8 @@ void MacOSKeychainAPIChecker::checkDeadSymbols(SymbolReaper &SR,
   ExplodedNode *N = C.addTransition(C.getState(), C.getPredecessor(), &Tag);
 
   // Generate the error reports.
-  for (AllocationPairVec::iterator I = Errors.begin(), E = Errors.end();
-                                                       I != E; ++I) {
-    C.emitReport(generateAllocatedDataNotReleasedReport(*I, N, C));
-  }
+  for (const auto P : Errors)
+    C.emitReport(generateAllocatedDataNotReleasedReport(P, N, C));
 
   // Generate the new, cleaned up state.
   C.addTransition(State, N);
diff --git a/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp b/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp
index 13a401d..11ba609 100644
--- a/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp
@@ -92,9 +92,9 @@ void MacOSXAPIChecker::CheckDispatchOnce(CheckerContext &C, const CallExpr *CE,
   if (isa<VarRegion>(R) && isa<StackLocalsSpaceRegion>(R->getMemorySpace()))
     os << "  Perhaps you intended to declare the variable as 'static'?";
 
-  BugReport *report = new BugReport(*BT_dispatchOnce, os.str(), N);
+  auto report = llvm::make_unique<BugReport>(*BT_dispatchOnce, os.str(), N);
   report->addRange(CE->getArg(0)->getSourceRange());
-  C.emitReport(report);
+  C.emitReport(std::move(report));
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index 0cf0094..a9e0865 100644
--- a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -1619,10 +1619,10 @@ void MallocChecker::ReportBadFree(CheckerContext &C, SVal ArgVal,
 
     printExpectedAllocName(os, C, DeallocExpr);
 
-    BugReport *R = new BugReport(*BT_BadFree[*CheckKind], os.str(), N);
+    auto R = llvm::make_unique<BugReport>(*BT_BadFree[*CheckKind], os.str(), N);
     R->markInteresting(MR);
     R->addRange(Range);
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
@@ -1643,11 +1643,12 @@ void MallocChecker::ReportFreeAlloca(CheckerContext &C, SVal ArgVal,
       BT_FreeAlloca[*CheckKind].reset(
           new BugType(CheckNames[*CheckKind], "Free alloca()", "Memory Error"));
 
-    BugReport *R = new BugReport(*BT_FreeAlloca[*CheckKind], 
-                 "Memory allocated by alloca() should not be deallocated", N);
+    auto R = llvm::make_unique<BugReport>(
+        *BT_FreeAlloca[*CheckKind],
+        "Memory allocated by alloca() should not be deallocated", N);
     R->markInteresting(ArgVal.getAsRegion());
     R->addRange(Range);
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
@@ -1698,11 +1699,11 @@ void MallocChecker::ReportMismatchedDealloc(CheckerContext &C,
         os << ", not " << DeallocOs.str();
     }
 
-    BugReport *R = new BugReport(*BT_MismatchedDealloc, os.str(), N);
+    auto R = llvm::make_unique<BugReport>(*BT_MismatchedDealloc, os.str(), N);
     R->markInteresting(Sym);
     R->addRange(Range);
     R->addVisitor(llvm::make_unique<MallocBugVisitor>(Sym));
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
@@ -1757,10 +1758,10 @@ void MallocChecker::ReportOffsetFree(CheckerContext &C, SVal ArgVal,
   else
     os << "allocated memory";
 
-  BugReport *R = new BugReport(*BT_OffsetFree[*CheckKind], os.str(), N);
+  auto R = llvm::make_unique<BugReport>(*BT_OffsetFree[*CheckKind], os.str(), N);
   R->markInteresting(MR->getBaseRegion());
   R->addRange(Range);
-  C.emitReport(R);
+  C.emitReport(std::move(R));
 }
 
 void MallocChecker::ReportUseAfterFree(CheckerContext &C, SourceRange Range,
@@ -1779,13 +1780,13 @@ void MallocChecker::ReportUseAfterFree(CheckerContext &C, SourceRange Range,
       BT_UseFree[*CheckKind].reset(new BugType(
           CheckNames[*CheckKind], "Use-after-free", "Memory Error"));
 
-    BugReport *R = new BugReport(*BT_UseFree[*CheckKind],
-                                 "Use of memory after it is freed", N);
+    auto R = llvm::make_unique<BugReport>(*BT_UseFree[*CheckKind],
+                                         "Use of memory after it is freed", N);
 
     R->markInteresting(Sym);
     R->addRange(Range);
     R->addVisitor(llvm::make_unique<MallocBugVisitor>(Sym));
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
@@ -1806,17 +1807,17 @@ void MallocChecker::ReportDoubleFree(CheckerContext &C, SourceRange Range,
       BT_DoubleFree[*CheckKind].reset(
           new BugType(CheckNames[*CheckKind], "Double free", "Memory Error"));
 
-    BugReport *R =
-        new BugReport(*BT_DoubleFree[*CheckKind],
-                      (Released ? "Attempt to free released memory"
-                                : "Attempt to free non-owned memory"),
-                      N);
+    auto R = llvm::make_unique<BugReport>(
+        *BT_DoubleFree[*CheckKind],
+        (Released ? "Attempt to free released memory"
+                  : "Attempt to free non-owned memory"),
+        N);
     R->addRange(Range);
     R->markInteresting(Sym);
     if (PrevSym)
       R->markInteresting(PrevSym);
     R->addVisitor(llvm::make_unique<MallocBugVisitor>(Sym));
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
@@ -1834,12 +1835,12 @@ void MallocChecker::ReportDoubleDelete(CheckerContext &C, SymbolRef Sym) const {
       BT_DoubleDelete.reset(new BugType(CheckNames[CK_NewDeleteChecker],
                                         "Double delete", "Memory Error"));
 
-    BugReport *R = new BugReport(*BT_DoubleDelete,
-                                 "Attempt to delete released memory", N);
+    auto R = llvm::make_unique<BugReport>(
+        *BT_DoubleDelete, "Attempt to delete released memory", N);
 
     R->markInteresting(Sym);
     R->addVisitor(llvm::make_unique<MallocBugVisitor>(Sym));
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
@@ -1861,15 +1862,15 @@ void MallocChecker::ReportUseZeroAllocated(CheckerContext &C,
       BT_UseZerroAllocated[*CheckKind].reset(new BugType(
           CheckNames[*CheckKind], "Use of zero allocated", "Memory Error"));
 
-    BugReport *R = new BugReport(*BT_UseZerroAllocated[*CheckKind],
-                                 "Use of zero-allocated memory", N);
+    auto R = llvm::make_unique<BugReport>(*BT_UseZerroAllocated[*CheckKind],
+                                         "Use of zero-allocated memory", N);
 
     R->addRange(Range);
     if (Sym) {
       R->markInteresting(Sym);
       R->addVisitor(llvm::make_unique<MallocBugVisitor>(Sym));
     }
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
@@ -2099,12 +2100,12 @@ void MallocChecker::reportLeak(SymbolRef Sym, ExplodedNode *N,
     os << "Potential memory leak";
   }
 
-  BugReport *R =
-      new BugReport(*BT_Leak[*CheckKind], os.str(), N, LocUsedForUniqueing,
-                    AllocNode->getLocationContext()->getDecl());
+  auto R = llvm::make_unique<BugReport>(
+      *BT_Leak[*CheckKind], os.str(), N, LocUsedForUniqueing,
+      AllocNode->getLocationContext()->getDecl());
   R->markInteresting(Sym);
   R->addVisitor(llvm::make_unique<MallocBugVisitor>(Sym, true));
-  C.emitReport(R);
+  C.emitReport(std::move(R));
 }
 
 void MallocChecker::checkDeadSymbols(SymbolReaper &SymReaper,
diff --git a/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp b/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp
index 296aec6..fb07484 100644
--- a/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp
@@ -65,10 +65,9 @@ public:
   }
 
   void VisitChildren(const Stmt *S) {
-    for (Stmt::const_child_iterator I = S->child_begin(), E = S->child_end();
-         I!=E; ++I)
-      if (const Stmt *child = *I)
-        VisitChild(S, child);
+    for (const Stmt *Child : S->children())
+      if (Child)
+        VisitChild(S, Child);
   }
 
   TypeCallPair VisitCastExpr(const CastExpr *E) {
diff --git a/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp b/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp
index b180c03..d23708e 100644
--- a/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp
@@ -68,10 +68,11 @@ void NSAutoreleasePoolChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
     return;
   }
 
-  BugReport *Report = new BugReport(*BT, "Use -drain instead of -release when "
-    "using NSAutoreleasePool and garbage collection", N);
+  auto Report = llvm::make_unique<BugReport>(
+      *BT, "Use -drain instead of -release when using NSAutoreleasePool and "
+           "garbage collection", N);
   Report->addRange(msg.getSourceRange());
-  C.emitReport(Report);
+  C.emitReport(std::move(Report));
 }
 
 void ento::registerNSAutoreleasePoolChecker(CheckerManager &mgr) {
diff --git a/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp b/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
index 2be7f1d..c351c6e 100644
--- a/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
@@ -275,8 +275,7 @@ void NSOrCFErrorDerefChecker::checkEvent(ImplicitNullDerefEvent event) const {
       CFBT.reset(new CFErrorDerefBug(this));
     bug = CFBT.get();
   }
-  BugReport *report = new BugReport(*bug, os.str(), event.SinkNode);
-  BR.emitReport(report);
+  BR.emitReport(llvm::make_unique<BugReport>(*bug, os.str(), event.SinkNode));
 }
 
 static bool IsNSError(QualType T, IdentifierInfo *II) {
diff --git a/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp b/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
index cb2d46b..73f8087 100644
--- a/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
@@ -36,10 +36,11 @@ public:
 
   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
 
-  BugReport *genReportNullAttrNonNull(const ExplodedNode *ErrorN,
-                                      const Expr *ArgE) const;
-  BugReport *genReportReferenceToNullPointer(const ExplodedNode *ErrorN,
-                                             const Expr *ArgE) const;
+  std::unique_ptr<BugReport>
+  genReportNullAttrNonNull(const ExplodedNode *ErrorN, const Expr *ArgE) const;
+  std::unique_ptr<BugReport>
+  genReportReferenceToNullPointer(const ExplodedNode *ErrorN,
+                                  const Expr *ArgE) const;
 };
 } // end anonymous namespace
 
@@ -143,7 +144,7 @@ void NonNullParamChecker::checkPreCall(const CallEvent &Call,
       // we cache out.
       if (ExplodedNode *errorNode = C.generateSink(stateNull)) {
 
-        BugReport *R = nullptr;
+        std::unique_ptr<BugReport> R;
         if (haveAttrNonNull)
           R = genReportNullAttrNonNull(errorNode, ArgE);
         else if (haveRefTypeParam)
@@ -153,7 +154,7 @@ void NonNullParamChecker::checkPreCall(const CallEvent &Call,
         R->addRange(Call.getArgSourceRange(idx));
 
         // Emit the bug report.
-        C.emitReport(R);
+        C.emitReport(std::move(R));
       }
 
       // Always return.  Either we cached out or we just emitted an error.
@@ -171,8 +172,9 @@ void NonNullParamChecker::checkPreCall(const CallEvent &Call,
   C.addTransition(state);
 }
 
-BugReport *NonNullParamChecker::genReportNullAttrNonNull(
-  const ExplodedNode *ErrorNode, const Expr *ArgE) const {
+std::unique_ptr<BugReport>
+NonNullParamChecker::genReportNullAttrNonNull(const ExplodedNode *ErrorNode,
+                                              const Expr *ArgE) const {
   // Lazily allocate the BugType object if it hasn't already been
   // created. Ownership is transferred to the BugReporter object once
   // the BugReport is passed to 'EmitWarning'.
@@ -180,23 +182,22 @@ BugReport *NonNullParamChecker::genReportNullAttrNonNull(
     BTAttrNonNull.reset(new BugType(
         this, "Argument with 'nonnull' attribute passed null", "API"));
 
-  BugReport *R = new BugReport(*BTAttrNonNull,
-                  "Null pointer passed as an argument to a 'nonnull' parameter",
-                  ErrorNode);
+  auto R = llvm::make_unique<BugReport>(
+      *BTAttrNonNull,
+      "Null pointer passed as an argument to a 'nonnull' parameter", ErrorNode);
   if (ArgE)
     bugreporter::trackNullOrUndefValue(ErrorNode, ArgE, *R);
 
   return R;
 }
 
-BugReport *NonNullParamChecker::genReportReferenceToNullPointer(
-  const ExplodedNode *ErrorNode, const Expr *ArgE) const {
+std::unique_ptr<BugReport> NonNullParamChecker::genReportReferenceToNullPointer(
+    const ExplodedNode *ErrorNode, const Expr *ArgE) const {
   if (!BTNullRefArg)
     BTNullRefArg.reset(new BuiltinBug(this, "Dereference of null pointer"));
 
-  BugReport *R = new BugReport(*BTNullRefArg,
-                               "Forming reference to null pointer",
-                               ErrorNode);
+  auto R = llvm::make_unique<BugReport>(
+      *BTNullRefArg, "Forming reference to null pointer", ErrorNode);
   if (ArgE) {
     const Expr *ArgEDeref = bugreporter::getDerefExpr(ArgE);
     if (!ArgEDeref)
diff --git a/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp
index fbf2d73..a7b92b4 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp
@@ -47,10 +47,10 @@ void ObjCAtSyncChecker::checkPreStmt(const ObjCAtSynchronizedStmt *S,
       if (!BT_undef)
         BT_undef.reset(new BuiltinBug(this, "Uninitialized value used as mutex "
                                             "for @synchronized"));
-      BugReport *report =
-        new BugReport(*BT_undef, BT_undef->getDescription(), N);
+      auto report =
+          llvm::make_unique<BugReport>(*BT_undef, BT_undef->getDescription(), N);
       bugreporter::trackNullOrUndefValue(N, Ex, *report);
-      C.emitReport(report);
+      C.emitReport(std::move(report));
     }
     return;
   }
@@ -71,11 +71,11 @@ void ObjCAtSyncChecker::checkPreStmt(const ObjCAtSynchronizedStmt *S,
           BT_null.reset(new BuiltinBug(
               this, "Nil value used as mutex for @synchronized() "
                     "(no synchronization will occur)"));
-        BugReport *report =
-          new BugReport(*BT_null, BT_null->getDescription(), N);
+        auto report =
+            llvm::make_unique<BugReport>(*BT_null, BT_null->getDescription(), N);
         bugreporter::trackNullOrUndefValue(N, Ex, *report);
 
-        C.emitReport(report);
+        C.emitReport(std::move(report));
         return;
       }
     }
diff --git a/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp
index e3fc611..224251b 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp
@@ -153,9 +153,9 @@ void WalkAST::VisitCallExpr(CallExpr *CE) {
 }
 
 void WalkAST::VisitChildren(Stmt *S) {
-  for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I!=E; ++I)
-    if (Stmt *child = *I)
-      Visit(child);
+  for (Stmt *Child : S->children())
+    if (Child)
+      Visit(Child);
 }
 
 namespace {
diff --git a/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
index 4f0b7e5..53e1598 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
@@ -143,9 +143,9 @@ void ObjCContainersChecker::checkPreStmt(const CallExpr *CE,
       if (!N)
         return;
       initBugType();
-      BugReport *R = new BugReport(*BT, "Index is out of bounds", N);
+      auto R = llvm::make_unique<BugReport>(*BT, "Index is out of bounds", N);
       R->addRange(IdxExpr->getSourceRange());
-      C.emitReport(R);
+      C.emitReport(std::move(R));
       return;
     }
   }
diff --git a/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp
index 51bc7e6..93b0553 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp
@@ -160,8 +160,7 @@ void ObjCSelfInitChecker::checkForInvalidSelf(const Expr *E, CheckerContext &C,
   if (!BT)
     BT.reset(new BugType(this, "Missing \"self = [(super or self) init...]\"",
                          categories::CoreFoundationObjectiveC));
-  BugReport *report = new BugReport(*BT, errorStr, N);
-  C.emitReport(report);
+  C.emitReport(llvm::make_unique<BugReport>(*BT, errorStr, N));
 }
 
 void ObjCSelfInitChecker::checkPostObjCMessage(const ObjCMethodCall &Msg,
diff --git a/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp
index d3b1753..c6da37e 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCUnusedIVarsChecker.cpp
@@ -57,8 +57,8 @@ static void Scan(IvarUsageMap& M, const Stmt *S) {
       Scan(M, sub);
     }
 
-  for (Stmt::const_child_iterator I=S->child_begin(),E=S->child_end(); I!=E;++I)
-    Scan(M, *I);
+  for (const Stmt *SubStmt : S->children())
+    Scan(M, SubStmt);
 }
 
 static void Scan(IvarUsageMap& M, const ObjCPropertyImplDecl *D) {
diff --git a/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp b/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
index 00480e4..8063124 100644
--- a/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
@@ -58,9 +58,9 @@ void PointerArithChecker::checkPreStmt(const BinaryOperator *B,
                            "Pointer arithmetic done on non-array variables "
                            "means reliance on memory layout, which is "
                            "dangerous."));
-      BugReport *R = new BugReport(*BT, BT->getDescription(), N);
+      auto R = llvm::make_unique<BugReport>(*BT, BT->getDescription(), N);
       R->addRange(B->getSourceRange());
-      C.emitReport(R);
+      C.emitReport(std::move(R));
     }
   }
 }
diff --git a/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp b/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp
index fbb2628..cf1f88a 100644
--- a/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp
@@ -66,9 +66,9 @@ void PointerSubChecker::checkPreStmt(const BinaryOperator *B,
           new BuiltinBug(this, "Pointer subtraction",
                          "Subtraction of two pointers that do not point to "
                          "the same memory chunk may cause incorrect result."));
-    BugReport *R = new BugReport(*BT, BT->getDescription(), N);
+    auto R = llvm::make_unique<BugReport>(*BT, BT->getDescription(), N);
     R->addRange(B->getSourceRange());
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp b/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
index 1ede3a2..4209017 100644
--- a/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
@@ -145,11 +145,10 @@ void PthreadLockChecker::AcquireLock(CheckerContext &C, const CallExpr *CE,
       ExplodedNode *N = C.generateSink();
       if (!N)
         return;
-      BugReport *report = new BugReport(*BT_doublelock,
-                                        "This lock has already been acquired",
-                                        N);
+      auto report = llvm::make_unique<BugReport>(
+          *BT_doublelock, "This lock has already been acquired", N);
       report->addRange(CE->getArg(0)->getSourceRange());
-      C.emitReport(report);
+      C.emitReport(std::move(report));
       return;
     } else if (LState->isDestroyed()) {
       reportUseDestroyedBug(C, CE);
@@ -208,11 +207,10 @@ void PthreadLockChecker::ReleaseLock(CheckerContext &C, const CallExpr *CE,
       ExplodedNode *N = C.generateSink();
       if (!N)
         return;
-      BugReport *Report = new BugReport(*BT_doubleunlock,
-                                        "This lock has already been unlocked",
-                                        N);
+      auto Report = llvm::make_unique<BugReport>(
+          *BT_doubleunlock, "This lock has already been unlocked", N);
       Report->addRange(CE->getArg(0)->getSourceRange());
-      C.emitReport(Report);
+      C.emitReport(std::move(Report));
       return;
     } else if (LState->isDestroyed()) {
       reportUseDestroyedBug(C, CE);
@@ -232,13 +230,11 @@ void PthreadLockChecker::ReleaseLock(CheckerContext &C, const CallExpr *CE,
       ExplodedNode *N = C.generateSink();
       if (!N)
         return;
-      BugReport *report = new BugReport(*BT_lor,
-                                        "This was not the most recently "
-                                        "acquired lock. Possible lock order "
-                                        "reversal",
-                                        N);
+      auto report = llvm::make_unique<BugReport>(
+          *BT_lor, "This was not the most recently acquired lock. Possible "
+                   "lock order reversal", N);
       report->addRange(CE->getArg(0)->getSourceRange());
-      C.emitReport(report);
+      C.emitReport(std::move(report));
       return;
     }
     // Record that the lock was released.
@@ -279,9 +275,9 @@ void PthreadLockChecker::DestroyLock(CheckerContext &C, const CallExpr *CE,
   ExplodedNode *N = C.generateSink();
   if (!N)
     return;
-  BugReport *Report = new BugReport(*BT_destroylock, Message, N);
+  auto Report = llvm::make_unique<BugReport>(*BT_destroylock, Message, N);
   Report->addRange(CE->getArg(0)->getSourceRange());
-  C.emitReport(Report);
+  C.emitReport(std::move(Report));
 }
 
 void PthreadLockChecker::InitLock(CheckerContext &C, const CallExpr *CE,
@@ -314,9 +310,9 @@ void PthreadLockChecker::InitLock(CheckerContext &C, const CallExpr *CE,
   ExplodedNode *N = C.generateSink();
   if (!N)
     return;
-  BugReport *Report = new BugReport(*BT_initlock, Message, N);
+  auto Report = llvm::make_unique<BugReport>(*BT_initlock, Message, N);
   Report->addRange(CE->getArg(0)->getSourceRange());
-  C.emitReport(Report);
+  C.emitReport(std::move(Report));
 }
 
 void PthreadLockChecker::reportUseDestroyedBug(CheckerContext &C,
@@ -327,11 +323,10 @@ void PthreadLockChecker::reportUseDestroyedBug(CheckerContext &C,
   ExplodedNode *N = C.generateSink();
   if (!N)
     return;
-  BugReport *Report = new BugReport(*BT_destroylock,
-                                    "This lock has already been destroyed",
-                                    N);
+  auto Report = llvm::make_unique<BugReport>(
+      *BT_destroylock, "This lock has already been destroyed", N);
   Report->addRange(CE->getArg(0)->getSourceRange());
-  C.emitReport(Report);
+  C.emitReport(std::move(Report));
 }
 
 void ento::registerPthreadLockChecker(CheckerManager &mgr) {
diff --git a/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp b/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
index 49eef23..6ee87a5 100644
--- a/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
@@ -2182,9 +2182,8 @@ PathDiagnosticPiece *CFRefReportVisitor::VisitNode(const ExplodedNode *N,
 
   // Add the range by scanning the children of the statement for any bindings
   // to Sym.
-  for (Stmt::const_child_iterator I = S->child_begin(), E = S->child_end();
-       I!=E; ++I)
-    if (const Expr *Exp = dyn_cast_or_null<Expr>(*I))
+  for (const Stmt *Child : S->children())
+    if (const Expr *Exp = dyn_cast_or_null<Expr>(Child))
       if (CurrSt->getSValAsScalarOrLoc(Exp, LCtx).getAsLocSymbol() == Sym) {
         P->addRange(Exp->getSourceRange());
         break;
@@ -2779,16 +2778,14 @@ void RetainCountChecker::processObjCLiterals(CheckerContext &C,
                                              const Expr *Ex) const {
   ProgramStateRef state = C.getState();
   const ExplodedNode *pred = C.getPredecessor();  
-  for (Stmt::const_child_iterator it = Ex->child_begin(), et = Ex->child_end() ;
-       it != et ; ++it) {
-    const Stmt *child = *it;
-    SVal V = state->getSVal(child, pred->getLocationContext());
+  for (const Stmt *Child : Ex->children()) {
+    SVal V = state->getSVal(Child, pred->getLocationContext());
     if (SymbolRef sym = V.getAsSymbol())
       if (const RefVal* T = getRefBinding(state, sym)) {
         RefVal::Kind hasErr = (RefVal::Kind) 0;
         state = updateSymbol(state, sym, *T, MayEscape, hasErr, C);
         if (hasErr) {
-          processNonLeakError(state, child->getSourceRange(), hasErr, sym, C);
+          processNonLeakError(state, Child->getSourceRange(), hasErr, sym, C);
           return;
         }
       }
@@ -3320,31 +3317,31 @@ void RetainCountChecker::processNonLeakError(ProgramStateRef St,
     case RefVal::ErrorUseAfterRelease:
       if (!useAfterRelease)
         useAfterRelease.reset(new UseAfterRelease(this));
-      BT = &*useAfterRelease;
+      BT = useAfterRelease.get();
       break;
     case RefVal::ErrorReleaseNotOwned:
       if (!releaseNotOwned)
         releaseNotOwned.reset(new BadRelease(this));
-      BT = &*releaseNotOwned;
+      BT = releaseNotOwned.get();
       break;
     case RefVal::ErrorDeallocGC:
       if (!deallocGC)
         deallocGC.reset(new DeallocGC(this));
-      BT = &*deallocGC;
+      BT = deallocGC.get();
       break;
     case RefVal::ErrorDeallocNotOwned:
       if (!deallocNotOwned)
         deallocNotOwned.reset(new DeallocNotOwned(this));
-      BT = &*deallocNotOwned;
+      BT = deallocNotOwned.get();
       break;
   }
 
   assert(BT);
-  CFRefReport *report = new CFRefReport(*BT, C.getASTContext().getLangOpts(),
-                                        C.isObjCGCEnabled(), SummaryLog,
-                                        N, Sym);
+  auto report = std::unique_ptr<BugReport>(
+      new CFRefReport(*BT, C.getASTContext().getLangOpts(), C.isObjCGCEnabled(),
+                      SummaryLog, N, Sym));
   report->addRange(ErrorRange);
-  C.emitReport(report);
+  C.emitReport(std::move(report));
 }
 
 //===----------------------------------------------------------------------===//
@@ -3573,12 +3570,9 @@ void RetainCountChecker::checkReturnWithRetEffect(const ReturnStmt *S,
         if (N) {
           const LangOptions &LOpts = C.getASTContext().getLangOpts();
           bool GCEnabled = C.isObjCGCEnabled();
-          CFRefReport *report =
-            new CFRefLeakReport(*getLeakAtReturnBug(LOpts, GCEnabled),
-                                LOpts, GCEnabled, SummaryLog,
-                                N, Sym, C, IncludeAllocationLine);
-
-          C.emitReport(report);
+          C.emitReport(std::unique_ptr<BugReport>(new CFRefLeakReport(
+              *getLeakAtReturnBug(LOpts, GCEnabled), LOpts, GCEnabled,
+              SummaryLog, N, Sym, C, IncludeAllocationLine)));
         }
       }
     }
@@ -3603,11 +3597,9 @@ void RetainCountChecker::checkReturnWithRetEffect(const ReturnStmt *S,
           if (!returnNotOwnedForOwned)
             returnNotOwnedForOwned.reset(new ReturnedNotOwnedForOwned(this));
 
-          CFRefReport *report =
-              new CFRefReport(*returnNotOwnedForOwned,
-                              C.getASTContext().getLangOpts(), 
-                              C.isObjCGCEnabled(), SummaryLog, N, Sym);
-          C.emitReport(report);
+          C.emitReport(std::unique_ptr<BugReport>(new CFRefReport(
+              *returnNotOwnedForOwned, C.getASTContext().getLangOpts(),
+              C.isObjCGCEnabled(), SummaryLog, N, Sym)));
         }
       }
     }
@@ -3810,10 +3802,9 @@ RetainCountChecker::handleAutoreleaseCounts(ProgramStateRef state,
       overAutorelease.reset(new OverAutorelease(this));
 
     const LangOptions &LOpts = Ctx.getASTContext().getLangOpts();
-    CFRefReport *report =
-      new CFRefReport(*overAutorelease, LOpts, /* GCEnabled = */ false,
-                      SummaryLog, N, Sym, os.str());
-    Ctx.emitReport(report);
+    Ctx.emitReport(std::unique_ptr<BugReport>(
+        new CFRefReport(*overAutorelease, LOpts, /* GCEnabled = */ false,
+                        SummaryLog, N, Sym, os.str())));
   }
 
   return nullptr;
@@ -3865,10 +3856,9 @@ RetainCountChecker::processLeaks(ProgramStateRef state,
                           : getLeakAtReturnBug(LOpts, GCEnabled);
       assert(BT && "BugType not initialized.");
 
-      CFRefLeakReport *report = new CFRefLeakReport(*BT, LOpts, GCEnabled, 
-                                                    SummaryLog, N, *I, Ctx,
-                                                    IncludeAllocationLine);
-      Ctx.emitReport(report);
+      Ctx.emitReport(std::unique_ptr<BugReport>(
+          new CFRefLeakReport(*BT, LOpts, GCEnabled, SummaryLog, N, *I, Ctx,
+                              IncludeAllocationLine)));
     }
   }
 
diff --git a/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp b/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp
index b1cde6b..acbd0d9 100644
--- a/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp
@@ -80,11 +80,10 @@ void ReturnPointerRangeChecker::checkPreStmt(const ReturnStmt *RS,
     // reference is outside the range.
 
     // Generate a report for this bug.
-    BugReport *report = 
-      new BugReport(*BT, BT->getDescription(), N);
+    auto report = llvm::make_unique<BugReport>(*BT, BT->getDescription(), N);
 
     report->addRange(RetE->getSourceRange());
-    C.emitReport(report);
+    C.emitReport(std::move(report));
   }
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp b/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp
index 6622313..2668ac1 100644
--- a/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp
@@ -84,12 +84,12 @@ static void emitBug(CheckerContext &C, BuiltinBug &BT, const Expr *RetE,
   if (!N)
     return;
 
-  BugReport *Report = new BugReport(BT, BT.getDescription(), N);
+  auto Report = llvm::make_unique<BugReport>(BT, BT.getDescription(), N);
 
   Report->addRange(RetE->getSourceRange());
   bugreporter::trackNullOrUndefValue(N, TrackingE ? TrackingE : RetE, *Report);
 
-  C.emitReport(Report);
+  C.emitReport(std::move(Report));
 }
 
 void ReturnUndefChecker::emitUndef(CheckerContext &C, const Expr *RetE) const {
diff --git a/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp b/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp
index ccf816c..c22e78b 100644
--- a/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp
@@ -214,11 +214,11 @@ void SimpleStreamChecker::reportDoubleClose(SymbolRef FileDescSym,
     return;
 
   // Generate the report.
-  BugReport *R = new BugReport(*DoubleCloseBugType,
+  auto R = llvm::make_unique<BugReport>(*DoubleCloseBugType,
       "Closing a previously closed file stream", ErrNode);
   R->addRange(Call.getSourceRange());
   R->markInteresting(FileDescSym);
-  C.emitReport(R);
+  C.emitReport(std::move(R));
 }
 
 void SimpleStreamChecker::reportLeaks(ArrayRef<SymbolRef> LeakedStreams,
@@ -227,10 +227,10 @@ void SimpleStreamChecker::reportLeaks(ArrayRef<SymbolRef> LeakedStreams,
   // Attach bug reports to the leak node.
   // TODO: Identify the leaked file descriptor.
   for (SymbolRef LeakedStream : LeakedStreams) {
-    BugReport *R = new BugReport(*LeakBugType,
+    auto R = llvm::make_unique<BugReport>(*LeakBugType,
         "Opened file is never closed; potential resource leak", ErrNode);
     R->markInteresting(LeakedStream);
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp b/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
index 327a9e0..813c811 100644
--- a/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
@@ -108,12 +108,12 @@ void StackAddrEscapeChecker::EmitStackError(CheckerContext &C, const MemRegion *
   llvm::raw_svector_ostream os(buf);
   SourceRange range = genName(os, R, C.getASTContext());
   os << " returned to caller";
-  BugReport *report = new BugReport(*BT_returnstack, os.str(), N);
+  auto report = llvm::make_unique<BugReport>(*BT_returnstack, os.str(), N);
   report->addRange(RetE->getSourceRange());
   if (range.isValid())
     report->addRange(range);
 
-  C.emitReport(report);
+  C.emitReport(std::move(report));
 }
 
 void StackAddrEscapeChecker::checkPreStmt(const ReturnStmt *RS,
@@ -231,11 +231,11 @@ void StackAddrEscapeChecker::checkEndFunction(CheckerContext &Ctx) const {
     const VarRegion *VR = cast<VarRegion>(cb.V[i].first->getBaseRegion());
     os << *VR->getDecl()
        << "' upon returning to the caller.  This will be a dangling reference";
-    BugReport *report = new BugReport(*BT_stackleak, os.str(), N);
+    auto report = llvm::make_unique<BugReport>(*BT_stackleak, os.str(), N);
     if (range.isValid())
       report->addRange(range);
 
-    Ctx.emitReport(report);
+    Ctx.emitReport(std::move(report));
   }
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index 894765a..2109a75 100644
--- a/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -277,9 +277,8 @@ void StreamChecker::Fseek(CheckerContext &C, const CallExpr *CE) const {
           new BuiltinBug(this, "Illegal whence argument",
                          "The whence argument to fseek() should be "
                          "SEEK_SET, SEEK_END, or SEEK_CUR."));
-    BugReport *R = new BugReport(*BT_illegalwhence, 
-				 BT_illegalwhence->getDescription(), N);
-    C.emitReport(R);
+    C.emitReport(llvm::make_unique<BugReport>(
+        *BT_illegalwhence, BT_illegalwhence->getDescription(), N));
   }
 }
 
@@ -354,8 +353,8 @@ ProgramStateRef StreamChecker::CheckNullStream(SVal SV, ProgramStateRef state,
       if (!BT_nullfp)
         BT_nullfp.reset(new BuiltinBug(this, "NULL stream pointer",
                                        "Stream pointer might be NULL."));
-      BugReport *R =new BugReport(*BT_nullfp, BT_nullfp->getDescription(), N);
-      C.emitReport(R);
+      C.emitReport(llvm::make_unique<BugReport>(
+          *BT_nullfp, BT_nullfp->getDescription(), N));
     }
     return nullptr;
   }
@@ -385,9 +384,8 @@ ProgramStateRef StreamChecker::CheckDoubleClose(const CallExpr *CE,
         BT_doubleclose.reset(new BuiltinBug(
             this, "Double fclose", "Try to close a file Descriptor already"
                                    " closed. Cause undefined behaviour."));
-      BugReport *R = new BugReport(*BT_doubleclose,
-                                   BT_doubleclose->getDescription(), N);
-      C.emitReport(R);
+      C.emitReport(llvm::make_unique<BugReport>(
+          *BT_doubleclose, BT_doubleclose->getDescription(), N));
     }
     return nullptr;
   }
@@ -414,9 +412,8 @@ void StreamChecker::checkDeadSymbols(SymbolReaper &SymReaper,
           BT_ResourceLeak.reset(new BuiltinBug(
               this, "Resource Leak",
               "Opened File never closed. Potential Resource leak."));
-        BugReport *R = new BugReport(*BT_ResourceLeak, 
-                                     BT_ResourceLeak->getDescription(), N);
-        C.emitReport(R);
+        C.emitReport(llvm::make_unique<BugReport>(
+            *BT_ResourceLeak, BT_ResourceLeak->getDescription(), N));
       }
     }
   }
diff --git a/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp b/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp
index d33c977..6e24775 100644
--- a/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp
@@ -50,9 +50,9 @@ void TaintTesterChecker::checkPostStmt(const Expr *E,
   if (State->isTainted(E, C.getLocationContext())) {
     if (ExplodedNode *N = C.addTransition()) {
       initBugType();
-      BugReport *report = new BugReport(*BT, "tainted",N);
+      auto report = llvm::make_unique<BugReport>(*BT, "tainted",N);
       report->addRange(E->getSourceRange());
-      C.emitReport(report);
+      C.emitReport(std::move(report));
     }
   }
 }
diff --git a/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp b/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp
index 083075d..638701d 100644
--- a/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp
@@ -171,14 +171,14 @@ void TestAfterDivZeroChecker::reportBug(SVal Val, CheckerContext &C) const {
     if (!DivZeroBug)
       DivZeroBug.reset(new BuiltinBug(this, "Division by zero"));
 
-    BugReport *R =
-        new BugReport(*DivZeroBug, "Value being compared against zero has "
-                                   "already been used for division",
-                      N);
+    auto R = llvm::make_unique<BugReport>(
+        *DivZeroBug, "Value being compared against zero has already been used "
+                     "for division",
+        N);
 
     R->addVisitor(llvm::make_unique<DivisionBRVisitor>(Val.getAsSymbol(),
                                                        C.getStackFrame()));
-    C.emitReport(R);
+    C.emitReport(std::move(R));
   }
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp
index fc49a46..1d8ef99 100644
--- a/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp
@@ -37,12 +37,10 @@ class UndefBranchChecker : public Checker<check::BranchCondition> {
       if (!MatchesCriteria(Ex))
         return nullptr;
 
-      for (Stmt::const_child_iterator I = Ex->child_begin(), 
-                                      E = Ex->child_end();I!=E;++I)
-        if (const Expr *ExI = dyn_cast_or_null<Expr>(*I)) {
-          const Expr *E2 = FindExpr(ExI);
-          if (E2) return E2;
-        }
+      for (const Stmt *SubStmt : Ex->children())
+        if (const Expr *ExI = dyn_cast_or_null<Expr>(SubStmt))
+          if (const Expr *E2 = FindExpr(ExI))
+            return E2;
 
       return Ex;
     }
@@ -98,11 +96,11 @@ void UndefBranchChecker::checkBranchCondition(const Stmt *Condition,
       Ex = FindIt.FindExpr(Ex);
 
       // Emit the bug report.
-      BugReport *R = new BugReport(*BT, BT->getDescription(), N);
+      auto R = llvm::make_unique<BugReport>(*BT, BT->getDescription(), N);
       bugreporter::trackNullOrUndefValue(N, Ex, *R);
       R->addRange(Ex->getSourceRange());
 
-      Ctx.emitReport(R);
+      Ctx.emitReport(std::move(R));
     }
   }
 }
diff --git a/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp
index 8976e0a..53fd069 100644
--- a/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp
@@ -40,13 +40,10 @@ static const DeclRefExpr *FindBlockDeclRefExpr(const Stmt *S,
     if (BR->getDecl() == VD)
       return BR;
 
-  for (Stmt::const_child_iterator I = S->child_begin(), E = S->child_end();
-       I!=E; ++I)
-    if (const Stmt *child = *I) {
-      const DeclRefExpr *BR = FindBlockDeclRefExpr(child, VD);
-      if (BR)
+  for (const Stmt *Child : S->children())
+    if (Child)
+      if (const DeclRefExpr *BR = FindBlockDeclRefExpr(Child, VD))
         return BR;
-    }
 
   return nullptr;
 }
@@ -89,14 +86,14 @@ UndefCapturedBlockVarChecker::checkPostStmt(const BlockExpr *BE,
         os << "Variable '" << VD->getName() 
            << "' is uninitialized when captured by block";
 
-        BugReport *R = new BugReport(*BT, os.str(), N);
+        auto R = llvm::make_unique<BugReport>(*BT, os.str(), N);
         if (const Expr *Ex = FindBlockDeclRefExpr(BE->getBody(), VD))
           R->addRange(Ex->getSourceRange());
         R->addVisitor(llvm::make_unique<FindLastStoreBRVisitor>(
             *V, VR, /*EnableNullFPSuppression*/ false));
         R->disablePathPruning();
         // need location of block
-        C.emitReport(R);
+        C.emitReport(std::move(R));
       }
     }
   }
diff --git a/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp
index f3f4dce..5353310 100644
--- a/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp
@@ -84,7 +84,7 @@ void UndefResultChecker::checkPostStmt(const BinaryOperator *B,
          << BinaryOperator::getOpcodeStr(B->getOpcode())
          << "' expression is undefined";
     }
-    BugReport *report = new BugReport(*BT, OS.str(), N);
+    auto report = llvm::make_unique<BugReport>(*BT, OS.str(), N);
     if (Ex) {
       report->addRange(Ex->getSourceRange());
       bugreporter::trackNullOrUndefValue(N, Ex, *report);
@@ -92,7 +92,7 @@ void UndefResultChecker::checkPostStmt(const BinaryOperator *B,
     else
       bugreporter::trackNullOrUndefValue(N, B, *report);
     
-    C.emitReport(report);
+    C.emitReport(std::move(report));
   }
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp
index e952671..ba4daf8 100644
--- a/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp
@@ -53,10 +53,10 @@ UndefinedArraySubscriptChecker::checkPreStmt(const ArraySubscriptExpr *A,
     BT.reset(new BuiltinBug(this, "Array subscript is undefined"));
 
   // Generate a report for this bug.
-  BugReport *R = new BugReport(*BT, BT->getName(), N);
+  auto R = llvm::make_unique<BugReport>(*BT, BT->getName(), N);
   R->addRange(A->getIdx()->getSourceRange());
   bugreporter::trackNullOrUndefValue(N, A->getIdx(), *R);
-  C.emitReport(R);
+  C.emitReport(std::move(R));
 }
 
 void ento::registerUndefinedArraySubscriptChecker(CheckerManager &mgr) {
diff --git a/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp
index bd4493d..81c96c4 100644
--- a/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp
@@ -83,12 +83,12 @@ void UndefinedAssignmentChecker::checkBind(SVal location, SVal val,
     break;
   }
 
-  BugReport *R = new BugReport(*BT, str, N);
+  auto R = llvm::make_unique<BugReport>(*BT, str, N);
   if (ex) {
     R->addRange(ex->getSourceRange());
     bugreporter::trackNullOrUndefValue(N, ex, *R);
   }
-  C.emitReport(R);
+  C.emitReport(std::move(R));
 }
 
 void ento::registerUndefinedAssignmentChecker(CheckerManager &mgr) {
diff --git a/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp b/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
index 4bfed85..a799b4c 100644
--- a/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
@@ -83,9 +83,9 @@ void UnixAPIChecker::ReportOpenBug(CheckerContext &C,
 
   LazyInitialize(BT_open, "Improper use of 'open'");
 
-  BugReport *Report = new BugReport(*BT_open, Msg, N);
+  auto Report = llvm::make_unique<BugReport>(*BT_open, Msg, N);
   Report->addRange(SR);
-  C.emitReport(Report);
+  C.emitReport(std::move(Report));
 }
 
 void UnixAPIChecker::CheckOpen(CheckerContext &C, const CallExpr *CE) const {
@@ -200,9 +200,9 @@ void UnixAPIChecker::CheckPthreadOnce(CheckerContext &C,
 
   LazyInitialize(BT_pthreadOnce, "Improper use of 'pthread_once'");
 
-  BugReport *report = new BugReport(*BT_pthreadOnce, os.str(), N);
+  auto report = llvm::make_unique<BugReport>(*BT_pthreadOnce, os.str(), N);
   report->addRange(CE->getArg(0)->getSourceRange());
-  C.emitReport(report);
+  C.emitReport(std::move(report));
 }
 
 //===----------------------------------------------------------------------===//
@@ -241,11 +241,11 @@ bool UnixAPIChecker::ReportZeroByteAllocation(CheckerContext &C,
   SmallString<256> S;
   llvm::raw_svector_ostream os(S);    
   os << "Call to '" << fn_name << "' has an allocation size of 0 bytes";
-  BugReport *report = new BugReport(*BT_mallocZero, os.str(), N);
+  auto report = llvm::make_unique<BugReport>(*BT_mallocZero, os.str(), N);
 
   report->addRange(arg->getSourceRange());
   bugreporter::trackNullOrUndefValue(N, arg, *report);
-  C.emitReport(report);
+  C.emitReport(std::move(report));
 
   return true;
 }
diff --git a/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp b/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
index cceffef..80384bb 100644
--- a/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -72,10 +72,10 @@ void VLASizeChecker::reportBug(VLASize_Kind Kind,
     break;
   }
 
-  BugReport *report = new BugReport(*BT, os.str(), N);
+  auto report = llvm::make_unique<BugReport>(*BT, os.str(), N);
   report->addRange(SizeE->getSourceRange());
   bugreporter::trackNullOrUndefValue(N, SizeE, *report);
-  C.emitReport(report);
+  C.emitReport(std::move(report));
   return;
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp b/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp
index 7e1fc1e..f6ef4ae 100644
--- a/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp
@@ -125,9 +125,9 @@ public:
 //===----------------------------------------------------------------------===//
 
 void WalkAST::VisitChildren(Stmt *S) {
-  for (Stmt::child_iterator I = S->child_begin(), E = S->child_end(); I!=E; ++I)
-    if (Stmt *child = *I)
-      Visit(child);
+  for (Stmt *Child : S->children())
+    if (Child)
+      Visit(Child);
 }
 
 void WalkAST::VisitCallExpr(CallExpr *CE) {
diff --git a/lib/StaticAnalyzer/Core/BasicValueFactory.cpp b/lib/StaticAnalyzer/Core/BasicValueFactory.cpp
index 0e90566..3c3f41a 100644
--- a/lib/StaticAnalyzer/Core/BasicValueFactory.cpp
+++ b/lib/StaticAnalyzer/Core/BasicValueFactory.cpp
@@ -154,9 +154,13 @@ BasicValueFactory::evalAPSInt(BinaryOperator::Opcode Op,
       return &getValue( V1 * V2 );
 
     case BO_Div:
+      if (V2 == 0) // Avoid division by zero
+        return nullptr;
       return &getValue( V1 / V2 );
 
     case BO_Rem:
+      if (V2 == 0) // Avoid division by zero
+        return nullptr;
       return &getValue( V1 % V2 );
 
     case BO_Add:
diff --git a/lib/StaticAnalyzer/Core/BugReporter.cpp b/lib/StaticAnalyzer/Core/BugReporter.cpp
index 97e97ef..e4db64f 100644
--- a/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -1250,10 +1250,8 @@ static void reversePropagateIntererstingSymbols(BugReport &R,
       // Fall through.
     case Stmt::BinaryOperatorClass:
     case Stmt::UnaryOperatorClass: {
-      for (Stmt::const_child_iterator CI = Ex->child_begin(),
-            CE = Ex->child_end();
-            CI != CE; ++CI) {
-        if (const Expr *child = dyn_cast_or_null<Expr>(*CI)) {
+      for (const Stmt *SubStmt : Ex->children()) {
+        if (const Expr *child = dyn_cast_or_null<Expr>(SubStmt)) {
           IE.insert(child);
           SVal ChildV = State->getSVal(child, LCtx);
           R.markInteresting(ChildV);
@@ -3224,10 +3222,7 @@ void BugReporter::Register(BugType *BT) {
   BugTypes = F.add(BugTypes, BT);
 }
 
-void BugReporter::emitReport(BugReport* R) {
-  // To guarantee memory release.
-  std::unique_ptr<BugReport> UniqueR(R);
-
+void BugReporter::emitReport(std::unique_ptr<BugReport> R) {
   if (const ExplodedNode *E = R->getErrorNode()) {
     const AnalysisDeclContext *DeclCtx =
         E->getLocationContext()->getAnalysisDeclContext();
@@ -3258,11 +3253,11 @@ void BugReporter::emitReport(BugReport* R) {
   BugReportEquivClass* EQ = EQClasses.FindNodeOrInsertPos(ID, InsertPos);
 
   if (!EQ) {
-    EQ = new BugReportEquivClass(std::move(UniqueR));
+    EQ = new BugReportEquivClass(std::move(R));
     EQClasses.InsertNode(EQ, InsertPos);
     EQClassesVector.push_back(EQ);
   } else
-    EQ->AddReport(std::move(UniqueR));
+    EQ->AddReport(std::move(R));
 }
 
 
@@ -3462,12 +3457,12 @@ void BugReporter::EmitBasicReport(const Decl *DeclWithIssue,
 
   // 'BT' is owned by BugReporter.
   BugType *BT = getBugTypeForName(CheckName, name, category);
-  BugReport *R = new BugReport(*BT, str, Loc);
+  auto R = llvm::make_unique<BugReport>(*BT, str, Loc);
   R->setDeclWithIssue(DeclWithIssue);
   for (ArrayRef<SourceRange>::iterator I = Ranges.begin(), E = Ranges.end();
        I != E; ++I)
     R->addRange(*I);
-  emitReport(R);
+  emitReport(std::move(R));
 }
 
 BugType *BugReporter::getBugTypeForName(CheckName CheckName, StringRef name,
diff --git a/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
index fa7884f..f2915ed 100644
--- a/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
+++ b/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -1130,9 +1130,8 @@ void FindLastStoreBRVisitor::registerStatementVarDecls(BugReport &BR,
       }
     }
 
-    for (Stmt::const_child_iterator I = Head->child_begin();
-        I != Head->child_end(); ++I)
-      WorkList.push_back(*I);
+    for (const Stmt *SubStmt : Head->children())
+      WorkList.push_back(SubStmt);
   }
 }
 
diff --git a/lib/StaticAnalyzer/Core/CheckerHelpers.cpp b/lib/StaticAnalyzer/Core/CheckerHelpers.cpp
index 28df695..3d9a815 100644
--- a/lib/StaticAnalyzer/Core/CheckerHelpers.cpp
+++ b/lib/StaticAnalyzer/Core/CheckerHelpers.cpp
@@ -22,11 +22,9 @@ bool clang::ento::containsMacro(const Stmt *S) {
   if (S->getLocEnd().isMacroID())
     return true;
 
-  for (Stmt::const_child_iterator I = S->child_begin(); I != S->child_end();
-      ++I)
-    if (const Stmt *child = *I)
-      if (containsMacro(child))
-        return true;
+  for (const Stmt *Child : S->children())
+    if (Child && containsMacro(Child))
+      return true;
 
   return false;
 }
@@ -38,11 +36,9 @@ bool clang::ento::containsEnum(const Stmt *S) {
   if (DR && isa<EnumConstantDecl>(DR->getDecl()))
     return true;
 
-  for (Stmt::const_child_iterator I = S->child_begin(); I != S->child_end();
-      ++I)
-    if (const Stmt *child = *I)
-      if (containsEnum(child))
-        return true;
+  for (const Stmt *Child : S->children())
+    if (Child && containsEnum(Child))
+      return true;
 
   return false;
 }
@@ -56,11 +52,9 @@ bool clang::ento::containsStaticLocal(const Stmt *S) {
       if (VD->isStaticLocal())
         return true;
 
-  for (Stmt::const_child_iterator I = S->child_begin(); I != S->child_end();
-      ++I)
-    if (const Stmt *child = *I)
-      if (containsStaticLocal(child))
-        return true;
+  for (const Stmt *Child : S->children())
+    if (Child && containsStaticLocal(Child))
+      return true;
 
   return false;
 }
@@ -70,11 +64,9 @@ bool clang::ento::containsBuiltinOffsetOf(const Stmt *S) {
   if (isa<OffsetOfExpr>(S))
     return true;
 
-  for (Stmt::const_child_iterator I = S->child_begin(); I != S->child_end();
-      ++I)
-    if (const Stmt *child = *I)
-      if (containsBuiltinOffsetOf(child))
-        return true;
+  for (const Stmt *Child : S->children())
+    if (Child && containsBuiltinOffsetOf(Child))
+      return true;
 
   return false;
 }
diff --git a/lib/StaticAnalyzer/Core/ExprEngine.cpp b/lib/StaticAnalyzer/Core/ExprEngine.cpp
index ef515fb..a3239f5 100644
--- a/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -822,6 +822,8 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::OMPAtomicDirectiveClass:
     case Stmt::OMPTargetDirectiveClass:
     case Stmt::OMPTeamsDirectiveClass:
+    case Stmt::OMPCancellationPointDirectiveClass:
+    case Stmt::OMPCancelDirectiveClass:
       llvm_unreachable("Stmt should not be in analyzer evaluation loop");
 
     case Stmt::ObjCSubscriptRefExprClass:
diff --git a/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
index fbeffb8..c957a65 100644
--- a/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
+++ b/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
@@ -588,7 +588,10 @@ AnalysisConsumer::getModeForDecl(Decl *D, AnalysisMode Mode) {
   // - Header files: run non-path-sensitive checks only.
   // - System headers: don't run any checks.
   SourceManager &SM = Ctx->getSourceManager();
-  SourceLocation SL = SM.getExpansionLoc(D->getLocation());
+  SourceLocation SL = D->hasBody() ? D->getBody()->getLocStart()
+                                     : D->getLocation();
+  SL = SM.getExpansionLoc(SL);
+
   if (!Opts->AnalyzeAll && !SM.isWrittenInMainFile(SL)) {
     if (SL.isInvalid() || SM.isInSystemHeader(SL))
       return AM_None;
diff --git a/lib/Tooling/CompilationDatabase.cpp b/lib/Tooling/CompilationDatabase.cpp
index 4483b18..2272be6 100644
--- a/lib/Tooling/CompilationDatabase.cpp
+++ b/lib/Tooling/CompilationDatabase.cpp
@@ -250,14 +250,11 @@ static bool stripPositionalArgs(std::vector<const char *> Args,
 
   CompileJobAnalyzer CompileAnalyzer;
 
-  for (const auto &Job : Jobs) {
-    if (Job.getKind() == driver::Job::CommandClass) {
-      const driver::Command &Cmd = cast<driver::Command>(Job);
-      // Collect only for Assemble jobs. If we do all jobs we get duplicates
-      // since Link jobs point to Assemble jobs as inputs.
-      if (Cmd.getSource().getKind() == driver::Action::AssembleJobClass)
-        CompileAnalyzer.run(&Cmd.getSource());
-    }
+  for (const auto &Cmd : Jobs) {
+    // Collect only for Assemble jobs. If we do all jobs we get duplicates
+    // since Link jobs point to Assemble jobs as inputs.
+    if (Cmd.getSource().getKind() == driver::Action::AssembleJobClass)
+      CompileAnalyzer.run(&Cmd.getSource());
   }
 
   if (CompileAnalyzer.Inputs.empty()) {
diff --git a/test/Analysis/division-by-zero.c b/test/Analysis/division-by-zero.c
new file mode 100644
index 0000000..d3c228e
--- /dev/null
+++ b/test/Analysis/division-by-zero.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=unix.Malloc %s
+// Do not crash due to division by zero
+
+int f(unsigned int a) {
+  if (a <= 0) return 1 / a;
+  return a;
+}
diff --git a/test/Analysis/retain-release.m b/test/Analysis/retain-release.m
index 1dbcda5..f0d91e3 100644
--- a/test/Analysis/retain-release.m
+++ b/test/Analysis/retain-release.m
@@ -2165,7 +2165,7 @@ void testCFReturnsNotRetained() {
 }
 
 void testCFReturnsNotRetainedAnnotated() {
-  extern void getViaParam2(CFTypeRef * __nonnull CF_RETURNS_NOT_RETAINED outObj);
+  extern void getViaParam2(CFTypeRef * _Nonnull CF_RETURNS_NOT_RETAINED outObj);
   CFTypeRef obj;
   getViaParam2(&obj);
   CFRelease(obj); // // expected-warning {{Incorrect decrement of the reference count of an object that is not owned at this point by the caller}}
diff --git a/test/Analysis/test-include-cpp.cpp b/test/Analysis/test-include-cpp.cpp
new file mode 100644
index 0000000..2ac5e11
--- /dev/null
+++ b/test/Analysis/test-include-cpp.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core -verify %s
+
+#include "test-include-cpp.h"
+
+int TestIncludeClass::test1(int *p) {
+  p = 0;
+  return *p; // expected-warning{{Dereference of null pointer}}
+}
+
+int TestIncludeClass::test2(int *p) {
+  p = 0;
+  return *p; // expected-warning{{Dereference of null pointer}}
+}
diff --git a/test/Analysis/test-include-cpp.h b/test/Analysis/test-include-cpp.h
new file mode 100644
index 0000000..90ec27a
--- /dev/null
+++ b/test/Analysis/test-include-cpp.h
@@ -0,0 +1,9 @@
+#ifndef TEST_INCLUDE_CPP_H
+#define TEST_INCLUDE_CPP_H
+
+class TestIncludeClass {
+  int test1(int *);
+  static int test2(int *);
+};
+
+#endif
diff --git a/test/Analysis/test-include.c b/test/Analysis/test-include.c
new file mode 100644
index 0000000..6aa80b9
--- /dev/null
+++ b/test/Analysis/test-include.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core -verify %s
+
+#include "test-include.h"
+#define DIVYX(X,Y) Y/X
+
+void test_01(int *data) {
+  data = 0;
+  *data = 1; // expected-warning{{Dereference of null pointer}}
+}
+
+int test_02() {
+  int res = DIVXY(1,0); // expected-warning{{Division by zero}}
+                        // expected-warning@-1{{division by zero is undefined}}
+  return res;
+}
+
+int test_03() {
+  int res = DIVYX(0,1); // expected-warning{{Division by zero}}
+                        // expected-warning@-1{{division by zero is undefined}}
+  return res;
+}
\ No newline at end of file
diff --git a/test/Analysis/test-include.h b/test/Analysis/test-include.h
new file mode 100644
index 0000000..07cd1c9
--- /dev/null
+++ b/test/Analysis/test-include.h
@@ -0,0 +1,2 @@
+void test_01(int * data);
+#define DIVXY(X,Y) X/Y
diff --git a/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p7-1y.cpp b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p7-1y.cpp
index f7b3e8e..c3dc1de 100644
--- a/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p7-1y.cpp
+++ b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p7-1y.cpp
@@ -11,6 +11,9 @@ namespace std {
 int i;
 int &&f();
 
+template <typename T>
+void overloaded_fn(T); // expected-note {{possible target}}
+
 using Int = int;
 using IntLRef = int&;
 using IntRRef = int&&;
@@ -57,6 +60,7 @@ decltype(auto) ((((((v1)))))) = 0; // ok
 decltype(auto) v2[1] = { 0 }; // expected-error {{cannot form array of 'decltype(auto)'}}
 decltype(auto) &v3 = { 0 }; // expected-error {{cannot form reference to 'decltype(auto)'}}
 decltype(auto) *v4 = { 0 }; // expected-error {{cannot form pointer to 'decltype(auto)'}}
+decltype(auto) v5 = &overloaded_fn; // expected-error {{could not be resolved}}
 
 auto multi1a = 0, &multi1b = multi1a;
 auto multi1c = multi1a, multi1d = multi1b;
diff --git a/test/CXX/drs/dr9xx.cpp b/test/CXX/drs/dr9xx.cpp
index 4bcd656..b37e17d 100644
--- a/test/CXX/drs/dr9xx.cpp
+++ b/test/CXX/drs/dr9xx.cpp
@@ -44,3 +44,32 @@ namespace dr990 { // dr990: 3.5
   D d{};
 #endif
 }
+
+namespace dr948 { // dr948: 3.7
+#if __cplusplus >= 201103L
+  class A {
+  public:
+     constexpr A(int v) : v(v) { }
+     constexpr operator int() const { return v; }
+  private:
+     int v;
+  };
+
+  constexpr int id(int x)
+  {
+    return x;
+  }
+
+  void f() {
+     if (constexpr int i = id(101)) { }
+     switch (constexpr int i = id(2)) { default: break; case 2: break; }
+     for (; constexpr int i = id(0); ) { }
+     while (constexpr int i = id(0)) { }
+
+     if (constexpr A i = 101) { }
+     switch (constexpr A i = 2) { default: break; case 2: break; }
+     for (; constexpr A i = 0; ) { }
+     while (constexpr A i = 0) { }
+  }
+#endif
+}
diff --git a/test/CodeCompletion/macros-in-modules.c b/test/CodeCompletion/macros-in-modules.c
new file mode 100644
index 0000000..82ffaae
--- /dev/null
+++ b/test/CodeCompletion/macros-in-modules.c
@@ -0,0 +1,11 @@
+// RUN: rm -rf %t && mkdir %t
+// RUN: echo 'module Foo { header "foo.h" }' > %t/module.modulemap
+// RUN: echo '#define FOO_MACRO 42' > %t/foo.h
+// RUN: c-index-test -code-completion-at=%s:9:1 -I %t %s | FileCheck %s
+// RUN: c-index-test -code-completion-at=%s:9:1 -I %t -fmodules %s | FileCheck %s
+
+#include "foo.h"
+int x =
+/*here*/1;
+
+// CHECK: FOO_MACRO
diff --git a/test/CodeCompletion/macros-in-modules.m b/test/CodeCompletion/macros-in-modules.m
new file mode 100644
index 0000000..8d6b375
--- /dev/null
+++ b/test/CodeCompletion/macros-in-modules.m
@@ -0,0 +1,10 @@
+// RUN: rm -rf %t && mkdir %t
+// RUN: echo 'module Foo { header "foo.h" }' > %t/module.modulemap
+// RUN: echo '#define FOO_MACRO 42' > %t/foo.h
+// RUN: c-index-test -code-completion-at=%s:8:1 -I %t -fmodules %s | FileCheck %s
+
+@import Foo;
+int x =
+/*here*/1;
+
+// CHECK: FOO_MACRO
diff --git a/test/CodeGen/2004-11-27-StaticFunctionRedeclare.c b/test/CodeGen/2004-11-27-StaticFunctionRedeclare.c
index 9ceee4c..f6ce552 100644
--- a/test/CodeGen/2004-11-27-StaticFunctionRedeclare.c
+++ b/test/CodeGen/2004-11-27-StaticFunctionRedeclare.c
@@ -6,7 +6,7 @@
 
 // This is PR244
 
-// CHECK-LABEL: define void @bar(
+// CHECK-LABEL: define {{.*}}void @bar(
 // CHECK: call {{.*}} @func
 // CHECK: define internal {{.*}}i32 @func(
 static int func();
diff --git a/test/CodeGen/2007-04-14-FNoBuiltin.c b/test/CodeGen/2007-04-14-FNoBuiltin.c
index 25ae01c..4d194b1 100644
--- a/test/CodeGen/2007-04-14-FNoBuiltin.c
+++ b/test/CodeGen/2007-04-14-FNoBuiltin.c
@@ -3,7 +3,7 @@
 
 extern int printf(const char*, ...);
 
-// CHECK: define void {{.*}}foo(
+// CHECK: define {{.*}}void {{.*}}foo(
 void foo(const char *msg) {
   // CHECK: call {{.*}}printf
   printf("%s\n",msg);
diff --git a/test/CodeGen/PR8880.c b/test/CodeGen/PR8880.c
index e03d2a4..ff8491e 100644
--- a/test/CodeGen/PR8880.c
+++ b/test/CodeGen/PR8880.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -Wno-gcc-compat -emit-llvm -o - %s | FileCheck %s
 
 void pr8880_cg_1(int *iptr) {
-// CHECK-LABEL: define void @pr8880_cg_1(
+// CHECK-LABEL: define {{.*}}void @pr8880_cg_1(
   int i, j;
 // CHECK: br label %[[OUTER_COND:[0-9A-Za-z$._]+]]
   for (i = 2; i != 10 ; i++ )
@@ -31,7 +31,7 @@ void pr8880_cg_1(int *iptr) {
 }
 
 void pr8880_cg_2(int *iptr) {
-// CHECK-LABEL: define void @pr8880_cg_2(
+// CHECK-LABEL: define {{.*}}void @pr8880_cg_2(
   int i, j;
 // CHECK: br label %[[OUTER_COND:[0-9A-Za-z$._]+]]
   for (i = 2; i != 10 ; i++ )
@@ -61,7 +61,7 @@ void pr8880_cg_2(int *iptr) {
 }
 
 void pr8880_cg_3(int *iptr) {
-// CHECK-LABEL: define void @pr8880_cg_3(
+// CHECK-LABEL: define {{.*}}void @pr8880_cg_3(
   int i, j;
 // CHECK: br label %[[OUTER_COND:[0-9A-Za-z$._]+]]
   for (i = 2 ; i != 10 ; i++ )
@@ -92,7 +92,7 @@ void pr8880_cg_3(int *iptr) {
 }
 
 void pr8880_cg_4(int *iptr) {
-// CHECK-LABEL: define void @pr8880_cg_4(
+// CHECK-LABEL: define {{.*}}void @pr8880_cg_4(
   int i, j;
 // CHECK: br label %[[OUTER_COND:[0-9A-Za-z$._]+]]
   for (i = 2 ; i != 10 ; i++ )
@@ -123,7 +123,7 @@ void pr8880_cg_4(int *iptr) {
 }
 
 void pr8880_cg_5(int x, int *iptr) {
-// CHECK-LABEL: define void @pr8880_cg_5(
+// CHECK-LABEL: define {{.*}}void @pr8880_cg_5(
   int y = 5;
 // CHECK: br label %[[OUTER_COND:[0-9A-Za-z$._]+]]
 // CHECK: [[OUTER_COND]]
@@ -148,7 +148,7 @@ void pr8880_cg_5(int x, int *iptr) {
 }
 
 void pr8880_cg_6(int x, int *iptr) {
-// CHECK-LABEL: define void @pr8880_cg_6(
+// CHECK-LABEL: define {{.*}}void @pr8880_cg_6(
   int y = 5;
 // CHECK: br label %[[OUTER_COND:[0-9A-Za-z$._]+]]
 // CHECK: [[OUTER_COND]]
diff --git a/test/CodeGen/arm-microsoft-intrinsics.c b/test/CodeGen/arm-microsoft-intrinsics.c
index 5f19e5e..e073cc2 100644
--- a/test/CodeGen/arm-microsoft-intrinsics.c
+++ b/test/CodeGen/arm-microsoft-intrinsics.c
@@ -47,17 +47,17 @@ unsigned int check_MoveFromCoprocessor2(void) {
 // CHECK-MSVC: @llvm.arm.mrc2(i32 0, i32 0, i32 0, i32 0, i32 0)
 // CHECK-EABI: error: implicit declaration of function '_MoveFromCoprocessor2'
 
-void check_MoveToCoprocessor(void) {
-  _MoveToCoprocessor(0, 0, 0, 0, 0, 0);
+void check_MoveToCoprocessor(unsigned int value) {
+  _MoveToCoprocessor(value, 10, 7, 1, 0, 0);
 }
 
-// CHECK-MSVC: @llvm.arm.mcr(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+// CHECK-MSVC: @llvm.arm.mcr(i32 10, i32 7, i32 %{{[^,]*}}, i32 1, i32 0, i32 0)
 // CHECK-EABI: error: implicit declaration of function '_MoveToCoprocessor'
 
-void check_MoveToCoprocessor2(void) {
-  _MoveToCoprocessor2(0, 0, 0, 0, 0, 0);
+void check_MoveToCoprocessor2(unsigned int value) {
+  _MoveToCoprocessor2(value, 10, 7, 1, 0, 0);
 }
 
-// CHECK-MSVC: @llvm.arm.mcr2(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+// CHECK-MSVC: @llvm.arm.mcr2(i32 10, i32 7, i32 %{{[^,]*}}, i32 1, i32 0, i32 0)
 // CHECK-EABI: error: implicit declaration of function '_MoveToCoprocessor2'
 
diff --git a/test/CodeGen/arm-target-features.c b/test/CodeGen/arm-target-features.c
index ece8bdf..36804b4 100644
--- a/test/CodeGen/arm-target-features.c
+++ b/test/CodeGen/arm-target-features.c
@@ -6,7 +6,7 @@
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4
-// CHECK-VFP4: "target-features"="+vfp4,+neon"
+// CHECK-VFP4: "target-features"="+neon,+vfp4"
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
@@ -15,14 +15,14 @@
 // RUN: %clang_cc1 -triple armv7-linux-gnueabihf -target-cpu cortex-a17 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
 // RUN: %clang_cc1 -triple thumbv7s-linux-gnueabi -target-cpu swift -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu krait -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
-// CHECK-VFP4-DIV: "target-features"="+vfp4,+neon,+hwdiv,+hwdiv-arm"
+// CHECK-VFP4-DIV: "target-features"="+hwdiv,+hwdiv-arm,+neon,+vfp4"
 
 
 // RUN: %clang_cc1 -triple thumbv7s-apple-ios7.0 -target-cpu cyclone -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple armv8-linux-gnueabi -target-cpu cortex-a53 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a72 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
-// CHECK-BASIC-V8: "target-features"="+neon,+fp-armv8,+hwdiv,+crypto,+crc,+hwdiv-arm"
+// CHECK-BASIC-V8: "target-features"="+crc,+crypto,+fp-armv8,+hwdiv,+hwdiv-arm,+neon"
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-DIV
diff --git a/test/CodeGen/attr-nodebug.c b/test/CodeGen/attr-nodebug.c
index 66caa2b..07a4aa3 100644
--- a/test/CodeGen/attr-nodebug.c
+++ b/test/CodeGen/attr-nodebug.c
@@ -1,12 +1,32 @@
-// RUN: %clang_cc1 -g -emit-llvm -o %t %s
-// RUN: not grep 'call void @llvm.dbg.func.start' %t
+// RUN: %clang_cc1 -g -emit-llvm -o - %s | FileCheck %s
 
 void t1() __attribute__((nodebug));
 
 void t1()
 {
   int a = 10;
-  
   a++;
 }
 
+void t2()
+{
+  int b = 10;
+  b++;
+}
+
+// With nodebug, IR should have no llvm.dbg.* calls, or !dbg annotations.
+// CHECK-LABEL: @t1
+// CHECK-NOT:   dbg
+// CHECK:       }
+
+// For sanity, check those things do occur normally.
+// CHECK-LABEL: @t2
+// CHECK:       call{{.*}}llvm.dbg
+// CHECK:       !dbg
+// CHECK:       }
+
+// We should see a function description for t2 but not t1.
+// CHECK-NOT: DISubprogram(name: "t1"
+// CHECK:     DISubprogram(name: "t2"
+// CHECK-NOT: DISubprogram(name: "t1"
+
diff --git a/test/CodeGen/attr-target.c b/test/CodeGen/attr-target.c
index 8c0f335..7ea5fe5 100644
--- a/test/CodeGen/attr-target.c
+++ b/test/CodeGen/attr-target.c
@@ -9,6 +9,8 @@ int __attribute__((target("fpmath=387"))) koala(int a) { return 4; }
 
 int __attribute__((target("mno-sse2"))) echidna(int a) { return 4; }
 
+int __attribute__((target("sse4"))) panda(int a) { return 4; }
+
 int bar(int a) { return baz(a) + foo(a); }
 
 // Check that we emit the additional subtarget and cpu features for foo and not for baz or bar.
@@ -21,5 +23,6 @@ int bar(int a) { return baz(a) + foo(a); }
 // CHECK: echidna{{.*}} #2
 // CHECK: bar{{.*}} #0
 // CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+sse,+sse2"
-// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+sse,+sse2,+avx,+sse4.2"
-// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+sse,+sse2,-sse2"
+// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3"
+// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+sse,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512pf,-avx512vl,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop"
+// CHECK: #3 = {{.*}}"target-cpu"="x86-64" "target-features"="+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3"
diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c
index 452d737..6cc02ef 100644
--- a/test/CodeGen/avx512bw-builtins.c
+++ b/test/CodeGen/avx512bw-builtins.c
@@ -427,3 +427,409 @@ __m512i test_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
   //CHECK: @llvm.x86.avx512.mask.pmull.w.512
   return _mm512_maskz_mullo_epi16(__U, __A, __B);
 }
+
+__m512i test_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) {
+  // CHECK-LABEL: @test_mm512_mask_blend_epi8
+  // CHECK: @llvm.x86.avx512.mask.blend.b.512
+  return _mm512_mask_blend_epi8(__U,__A,__W); 
+}
+__m512i test_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) {
+  // CHECK-LABEL: @test_mm512_mask_blend_epi16
+  // CHECK: @llvm.x86.avx512.mask.blend.w.512
+  return _mm512_mask_blend_epi16(__U,__A,__W); 
+}
+__m512i test_mm512_abs_epi8(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_abs_epi8
+  // CHECK: @llvm.x86.avx512.mask.pabs.b.512
+  return _mm512_abs_epi8(__A); 
+}
+__m512i test_mm512_mask_abs_epi8(__m512i __W, __mmask64 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_abs_epi8
+  // CHECK: @llvm.x86.avx512.mask.pabs.b.512
+  return _mm512_mask_abs_epi8(__W,__U,__A); 
+}
+__m512i test_mm512_maskz_abs_epi8(__mmask64 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_abs_epi8
+  // CHECK: @llvm.x86.avx512.mask.pabs.b.512
+  return _mm512_maskz_abs_epi8(__U,__A); 
+}
+__m512i test_mm512_abs_epi16(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_abs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pabs.w.512
+  return _mm512_abs_epi16(__A); 
+}
+__m512i test_mm512_mask_abs_epi16(__m512i __W, __mmask32 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_abs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pabs.w.512
+  return _mm512_mask_abs_epi16(__W,__U,__A); 
+}
+__m512i test_mm512_maskz_abs_epi16(__mmask32 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_abs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pabs.w.512
+  return _mm512_maskz_abs_epi16(__U,__A); 
+}
+__m512i test_mm512_packs_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_packs_epi32
+  // CHECK: @llvm.x86.avx512.mask.packssdw.512
+  return _mm512_packs_epi32(__A,__B); 
+}
+__m512i test_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_packs_epi32
+  // CHECK: @llvm.x86.avx512.mask.packssdw.512
+  return _mm512_maskz_packs_epi32(__M,__A,__B); 
+}
+__m512i test_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_packs_epi32
+  // CHECK: @llvm.x86.avx512.mask.packssdw.512
+  return _mm512_mask_packs_epi32(__W,__M,__A,__B); 
+}
+__m512i test_mm512_packs_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_packs_epi16
+  // CHECK: @llvm.x86.avx512.mask.packsswb.512
+  return _mm512_packs_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_packs_epi16
+  // CHECK: @llvm.x86.avx512.mask.packsswb.512
+  return _mm512_mask_packs_epi16(__W,__M,__A,__B); 
+}
+__m512i test_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_packs_epi16
+  // CHECK: @llvm.x86.avx512.mask.packsswb.512
+  return _mm512_maskz_packs_epi16(__M,__A,__B); 
+}
+__m512i test_mm512_packus_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_packus_epi32
+  // CHECK: @llvm.x86.avx512.mask.packusdw.512
+  return _mm512_packus_epi32(__A,__B); 
+}
+__m512i test_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_packus_epi32
+  // CHECK: @llvm.x86.avx512.mask.packusdw.512
+  return _mm512_maskz_packus_epi32(__M,__A,__B); 
+}
+__m512i test_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_packus_epi32
+  // CHECK: @llvm.x86.avx512.mask.packusdw.512
+  return _mm512_mask_packus_epi32(__W,__M,__A,__B); 
+}
+__m512i test_mm512_packus_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_packus_epi16
+  // CHECK: @llvm.x86.avx512.mask.packuswb.512
+  return _mm512_packus_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_packus_epi16
+  // CHECK: @llvm.x86.avx512.mask.packuswb.512
+  return _mm512_mask_packus_epi16(__W,__M,__A,__B); 
+}
+__m512i test_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_packus_epi16
+  // CHECK: @llvm.x86.avx512.mask.packuswb.512
+  return _mm512_maskz_packus_epi16(__M,__A,__B); 
+}
+__m512i test_mm512_adds_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_adds_epi8
+  // CHECK: @llvm.x86.avx512.mask.padds.b.512
+  return _mm512_adds_epi8(__A,__B); 
+}
+__m512i test_mm512_mask_adds_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_adds_epi8
+  // CHECK: @llvm.x86.avx512.mask.padds.b.512
+  return _mm512_mask_adds_epi8(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_adds_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_adds_epi8
+  // CHECK: @llvm.x86.avx512.mask.padds.b.512
+  return _mm512_maskz_adds_epi8(__U,__A,__B); 
+}
+__m512i test_mm512_adds_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_adds_epi16
+  // CHECK: @llvm.x86.avx512.mask.padds.w.512
+  return _mm512_adds_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_adds_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_adds_epi16
+  // CHECK: @llvm.x86.avx512.mask.padds.w.512
+  return _mm512_mask_adds_epi16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_adds_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_adds_epi16
+  // CHECK: @llvm.x86.avx512.mask.padds.w.512
+  return _mm512_maskz_adds_epi16(__U,__A,__B); 
+}
+__m512i test_mm512_adds_epu8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_adds_epu8
+  // CHECK: @llvm.x86.avx512.mask.paddus.b.512
+  return _mm512_adds_epu8(__A,__B); 
+}
+__m512i test_mm512_mask_adds_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_adds_epu8
+  // CHECK: @llvm.x86.avx512.mask.paddus.b.512
+  return _mm512_mask_adds_epu8(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_adds_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_adds_epu8
+  // CHECK: @llvm.x86.avx512.mask.paddus.b.512
+  return _mm512_maskz_adds_epu8(__U,__A,__B); 
+}
+__m512i test_mm512_adds_epu16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_adds_epu16
+  // CHECK: @llvm.x86.avx512.mask.paddus.w.512
+  return _mm512_adds_epu16(__A,__B); 
+}
+__m512i test_mm512_mask_adds_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_adds_epu16
+  // CHECK: @llvm.x86.avx512.mask.paddus.w.512
+  return _mm512_mask_adds_epu16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_adds_epu16
+  // CHECK: @llvm.x86.avx512.mask.paddus.w.512
+  return _mm512_maskz_adds_epu16(__U,__A,__B); 
+}
+__m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_avg_epu8
+  // CHECK: @llvm.x86.avx512.mask.pavg.b.512
+  return _mm512_avg_epu8(__A,__B); 
+}
+__m512i test_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_avg_epu8
+  // CHECK: @llvm.x86.avx512.mask.pavg.b.512
+  return _mm512_mask_avg_epu8(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_avg_epu8
+  // CHECK: @llvm.x86.avx512.mask.pavg.b.512
+  return _mm512_maskz_avg_epu8(__U,__A,__B); 
+}
+__m512i test_mm512_avg_epu16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_avg_epu16
+  // CHECK: @llvm.x86.avx512.mask.pavg.w.512
+  return _mm512_avg_epu16(__A,__B); 
+}
+__m512i test_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_avg_epu16
+  // CHECK: @llvm.x86.avx512.mask.pavg.w.512
+  return _mm512_mask_avg_epu16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_avg_epu16
+  // CHECK: @llvm.x86.avx512.mask.pavg.w.512
+  return _mm512_maskz_avg_epu16(__U,__A,__B); 
+}
+__m512i test_mm512_max_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_max_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.b.512
+  return _mm512_max_epi8(__A,__B); 
+}
+__m512i test_mm512_maskz_max_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_max_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.b.512
+  return _mm512_maskz_max_epi8(__M,__A,__B); 
+}
+__m512i test_mm512_mask_max_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_max_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.b.512
+  return _mm512_mask_max_epi8(__W,__M,__A,__B); 
+}
+__m512i test_mm512_max_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_max_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.w.512
+  return _mm512_max_epi16(__A,__B); 
+}
+__m512i test_mm512_maskz_max_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_max_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.w.512
+  return _mm512_maskz_max_epi16(__M,__A,__B); 
+}
+__m512i test_mm512_mask_max_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_max_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.w.512
+  return _mm512_mask_max_epi16(__W,__M,__A,__B); 
+}
+__m512i test_mm512_max_epu8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_max_epu8
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.b.512
+  return _mm512_max_epu8(__A,__B); 
+}
+__m512i test_mm512_maskz_max_epu8(__mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_max_epu8
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.b.512
+  return _mm512_maskz_max_epu8(__M,__A,__B); 
+}
+__m512i test_mm512_mask_max_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_max_epu8
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.b.512
+  return _mm512_mask_max_epu8(__W,__M,__A,__B); 
+}
+__m512i test_mm512_max_epu16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_max_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.w.512
+  return _mm512_max_epu16(__A,__B); 
+}
+__m512i test_mm512_maskz_max_epu16(__mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_max_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.w.512
+  return _mm512_maskz_max_epu16(__M,__A,__B); 
+}
+__m512i test_mm512_mask_max_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_max_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.w.512
+  return _mm512_mask_max_epu16(__W,__M,__A,__B); 
+}
+__m512i test_mm512_min_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_min_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmins.b.512
+  return _mm512_min_epi8(__A,__B); 
+}
+__m512i test_mm512_maskz_min_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_min_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmins.b.512
+  return _mm512_maskz_min_epi8(__M,__A,__B); 
+}
+__m512i test_mm512_mask_min_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_min_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmins.b.512
+  return _mm512_mask_min_epi8(__W,__M,__A,__B); 
+}
+__m512i test_mm512_min_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_min_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmins.w.512
+  return _mm512_min_epi16(__A,__B); 
+}
+__m512i test_mm512_maskz_min_epi16(__mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_min_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmins.w.512
+  return _mm512_maskz_min_epi16(__M,__A,__B); 
+}
+__m512i test_mm512_mask_min_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_min_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmins.w.512
+  return _mm512_mask_min_epi16(__W,__M,__A,__B); 
+}
+__m512i test_mm512_min_epu8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_min_epu8
+  // CHECK: @llvm.x86.avx512.mask.pminu.b.512
+  return _mm512_min_epu8(__A,__B); 
+}
+__m512i test_mm512_maskz_min_epu8(__mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_min_epu8
+  // CHECK: @llvm.x86.avx512.mask.pminu.b.512
+  return _mm512_maskz_min_epu8(__M,__A,__B); 
+}
+__m512i test_mm512_mask_min_epu8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_min_epu8
+  // CHECK: @llvm.x86.avx512.mask.pminu.b.512
+  return _mm512_mask_min_epu8(__W,__M,__A,__B); 
+}
+__m512i test_mm512_min_epu16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_min_epu16
+  // CHECK: @llvm.x86.avx512.mask.pminu.w.512
+  return _mm512_min_epu16(__A,__B); 
+}
+__m512i test_mm512_maskz_min_epu16(__mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_min_epu16
+  // CHECK: @llvm.x86.avx512.mask.pminu.w.512
+  return _mm512_maskz_min_epu16(__M,__A,__B); 
+}
+__m512i test_mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_min_epu16
+  // CHECK: @llvm.x86.avx512.mask.pminu.w.512
+  return _mm512_mask_min_epu16(__W,__M,__A,__B); 
+}
+__m512i test_mm512_shuffle_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_shuffle_epi8
+  // CHECK: @llvm.x86.avx512.mask.pshuf.b.512
+  return _mm512_shuffle_epi8(__A,__B); 
+}
+__m512i test_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_shuffle_epi8
+  // CHECK: @llvm.x86.avx512.mask.pshuf.b.512
+  return _mm512_mask_shuffle_epi8(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_shuffle_epi8
+  // CHECK: @llvm.x86.avx512.mask.pshuf.b.512
+  return _mm512_maskz_shuffle_epi8(__U,__A,__B); 
+}
+__m512i test_mm512_subs_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_subs_epi8
+  // CHECK: @llvm.x86.avx512.mask.psubs.b.512
+  return _mm512_subs_epi8(__A,__B); 
+}
+__m512i test_mm512_mask_subs_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_subs_epi8
+  // CHECK: @llvm.x86.avx512.mask.psubs.b.512
+  return _mm512_mask_subs_epi8(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_subs_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_subs_epi8
+  // CHECK: @llvm.x86.avx512.mask.psubs.b.512
+  return _mm512_maskz_subs_epi8(__U,__A,__B); 
+}
+__m512i test_mm512_subs_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_subs_epi16
+  // CHECK: @llvm.x86.avx512.mask.psubs.w.512
+  return _mm512_subs_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_subs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_subs_epi16
+  // CHECK: @llvm.x86.avx512.mask.psubs.w.512
+  return _mm512_mask_subs_epi16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_subs_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_subs_epi16
+  // CHECK: @llvm.x86.avx512.mask.psubs.w.512
+  return _mm512_maskz_subs_epi16(__U,__A,__B); 
+}
+__m512i test_mm512_subs_epu8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_subs_epu8
+  // CHECK: @llvm.x86.avx512.mask.psubus.b.512
+  return _mm512_subs_epu8(__A,__B); 
+}
+__m512i test_mm512_mask_subs_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_subs_epu8
+  // CHECK: @llvm.x86.avx512.mask.psubus.b.512
+  return _mm512_mask_subs_epu8(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_subs_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_subs_epu8
+  // CHECK: @llvm.x86.avx512.mask.psubus.b.512
+  return _mm512_maskz_subs_epu8(__U,__A,__B); 
+}
+__m512i test_mm512_subs_epu16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_subs_epu16
+  // CHECK: @llvm.x86.avx512.mask.psubus.w.512
+  return _mm512_subs_epu16(__A,__B); 
+}
+__m512i test_mm512_mask_subs_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_subs_epu16
+  // CHECK: @llvm.x86.avx512.mask.psubus.w.512
+  return _mm512_mask_subs_epu16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_subs_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_subs_epu16
+  // CHECK: @llvm.x86.avx512.mask.psubus.w.512
+  return _mm512_maskz_subs_epu16(__U,__A,__B); 
+}
+__m512i test_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask2_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.hi.512
+  return _mm512_mask2_permutex2var_epi16(__A,__I,__U,__B); 
+}
+__m512i test_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.hi.512
+  return _mm512_permutex2var_epi16(__A,__I,__B); 
+}
+__m512i test_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.hi.512
+  return _mm512_mask_permutex2var_epi16(__A,__U,__I,__B); 
+}
+__m512i test_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_permutex2var_epi16
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.hi.512
+  return _mm512_maskz_permutex2var_epi16(__U,__A,__I,__B); 
+}
diff --git a/test/CodeGen/avx512cdintrin.c b/test/CodeGen/avx512cdintrin.c
new file mode 100644
index 0000000..1b4860a
--- /dev/null
+++ b/test/CodeGen/avx512cdintrin.c
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512cd -emit-llvm -o - -Werror | FileCheck %s
+#include <immintrin.h>
+__m512i test_mm512_conflict_epi64(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_conflict_epi64
+  // CHECK: @llvm.x86.avx512.mask.conflict.q.512
+  return _mm512_conflict_epi64(__A); 
+}
+__m512i test_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_conflict_epi64
+  // CHECK: @llvm.x86.avx512.mask.conflict.q.512
+  return _mm512_mask_conflict_epi64(__W,__U,__A); 
+}
+__m512i test_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_conflict_epi64
+  // CHECK: @llvm.x86.avx512.mask.conflict.q.512
+  return _mm512_maskz_conflict_epi64(__U,__A); 
+}
+__m512i test_mm512_conflict_epi32(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_conflict_epi32
+  // CHECK: @llvm.x86.avx512.mask.conflict.d.512
+  return _mm512_conflict_epi32(__A); 
+}
+__m512i test_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_conflict_epi32
+  // CHECK: @llvm.x86.avx512.mask.conflict.d.512
+  return _mm512_mask_conflict_epi32(__W,__U,__A); 
+}
+__m512i test_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_conflict_epi32
+  // CHECK: @llvm.x86.avx512.mask.conflict.d.512
+  return _mm512_maskz_conflict_epi32(__U,__A); 
+}
+__m512i test_mm512_lzcnt_epi32(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_lzcnt_epi32
+  // CHECK: @llvm.x86.avx512.mask.lzcnt.d.512
+  return _mm512_lzcnt_epi32(__A); 
+}
+__m512i test_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_lzcnt_epi32
+  // CHECK: @llvm.x86.avx512.mask.lzcnt.d.512
+  return _mm512_mask_lzcnt_epi32(__W,__U,__A); 
+}
+__m512i test_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_lzcnt_epi32
+  // CHECK: @llvm.x86.avx512.mask.lzcnt.d.512
+  return _mm512_maskz_lzcnt_epi32(__U,__A); 
+}
+__m512i test_mm512_lzcnt_epi64(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_lzcnt_epi64
+  // CHECK: @llvm.x86.avx512.mask.lzcnt.q.512
+  return _mm512_lzcnt_epi64(__A); 
+}
+__m512i test_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_lzcnt_epi64
+  // CHECK: @llvm.x86.avx512.mask.lzcnt.q.512
+  return _mm512_mask_lzcnt_epi64(__W,__U,__A); 
+}
+__m512i test_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_lzcnt_epi64
+  // CHECK: @llvm.x86.avx512.mask.lzcnt.q.512
+  return _mm512_maskz_lzcnt_epi64(__U,__A); 
+}
diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c
index a49a198..112dfd8 100644
--- a/test/CodeGen/avx512f-builtins.c
+++ b/test/CodeGen/avx512f-builtins.c
@@ -201,11 +201,486 @@ __m512d test_mm512_broadcastsd_pd(__m128d a)
   return _mm512_broadcastsd_pd(a);
 }
 
-__m512i test_mm512_fmadd_pd(__m512d a, __m512d b, __m512d c)
-{
+__m512d test_mm512_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+
+__m512d test_mm512_mask_fmadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_mask_fmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.512
+  return _mm512_mask3_fmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fmadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmadd_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_mask_fmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsub_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fnmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fnmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fnmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fnmadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.512
+  return _mm512_mask3_fnmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fnmadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmadd_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fnmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fnmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fnmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fnmsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmsub_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fnmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
   // CHECK-LABEL: @test_mm512_fmadd_pd
-  // CHECK: @llvm.x86.fma.mask.vfmadd.pd.512
-  return _mm512_fmadd_pd(a, b, c);
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fmadd_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_mask_fmadd_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.512
+  return _mm512_mask3_fmadd_pd(__A, __B, __C, __U);
+}
+__m512d test_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fmadd_pd(__U, __A, __B, __C);
+}
+__m512d test_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fmsub_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_mask_fmsub_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fmsub_pd(__U, __A, __B, __C);
+}
+__m512d test_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fnmadd_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.512
+  return _mm512_mask3_fnmadd_pd(__A, __B, __C, __U);
+}
+__m512d test_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fnmadd_pd(__U, __A, __B, __C);
+}
+__m512d test_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512
+  return _mm512_fnmsub_pd(__A, __B, __C);
+}
+__m512d test_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.512
+  return _mm512_maskz_fnmsub_pd(__U, __A, __B, __C);
+}
+__m512 test_mm512_fmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_mask_fmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fmadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.512
+  return _mm512_mask3_fmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fmadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmadd_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_mask_fmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsub_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fnmadd_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fnmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fnmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fnmadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.512
+  return _mm512_mask3_fnmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fnmadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmadd_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fnmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fnmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fnmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fnmsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmsub_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fnmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fmadd_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_mask_fmadd_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.512
+  return _mm512_mask3_fmadd_ps(__A, __B, __C, __U);
+}
+__m512 test_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fmadd_ps(__U, __A, __B, __C);
+}
+__m512 test_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fmsub_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_mask_fmsub_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fmsub_ps(__U, __A, __B, __C);
+}
+__m512 test_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fnmadd_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.512
+  return _mm512_mask3_fnmadd_ps(__A, __B, __C, __U);
+}
+__m512 test_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fnmadd_ps(__U, __A, __B, __C);
+}
+__m512 test_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.512
+  return _mm512_fnmsub_ps(__A, __B, __C);
+}
+__m512 test_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.512
+  return _mm512_maskz_fnmsub_ps(__U, __A, __B, __C);
+}
+__m512d test_mm512_fmaddsub_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmaddsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_fmaddsub_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fmaddsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmaddsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_mask_fmaddsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fmaddsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmaddsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.pd.512
+  return _mm512_mask3_fmaddsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fmaddsub_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmaddsub_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.512
+  return _mm512_maskz_fmaddsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmsubadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_fmsubadd_round_pd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fmsubadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsubadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_mask_fmsubadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_maskz_fmsubadd_round_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsubadd_round_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.512
+  return _mm512_maskz_fmsubadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_fmaddsub_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_mask_fmaddsub_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.pd.512
+  return _mm512_mask3_fmaddsub_pd(__A, __B, __C, __U);
+}
+__m512d test_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.512
+  return _mm512_maskz_fmaddsub_pd(__U, __A, __B, __C);
+}
+__m512d test_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_fmsubadd_pd(__A, __B, __C);
+}
+__m512d test_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.512
+  return _mm512_mask_fmsubadd_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.512
+  return _mm512_maskz_fmsubadd_pd(__U, __A, __B, __C);
+}
+__m512 test_mm512_fmaddsub_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmaddsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_fmaddsub_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fmaddsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmaddsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_mask_fmaddsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fmaddsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmaddsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.ps.512
+  return _mm512_mask3_fmaddsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fmaddsub_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmaddsub_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.512
+  return _mm512_maskz_fmaddsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmsubadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_fmsubadd_round_ps(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fmsubadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsubadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_mask_fmsubadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_maskz_fmsubadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsubadd_round_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.512
+  return _mm512_maskz_fmsubadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_fmaddsub_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_mask_fmaddsub_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.ps.512
+  return _mm512_mask3_fmaddsub_ps(__A, __B, __C, __U);
+}
+__m512 test_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.512
+  return _mm512_maskz_fmaddsub_ps(__U, __A, __B, __C);
+}
+__m512 test_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_fmsubadd_ps(__A, __B, __C);
+}
+__m512 test_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.512
+  return _mm512_mask_fmsubadd_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_maskz_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.512
+  return _mm512_maskz_fmsubadd_ps(__U, __A, __B, __C);
+}
+__m512d test_mm512_mask3_fmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.pd.512
+  return _mm512_mask3_fmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.pd.512
+  return _mm512_mask3_fmsub_pd(__A, __B, __C, __U);
+}
+__m512 test_mm512_mask3_fmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ps.512
+  return _mm512_mask3_fmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ps.512
+  return _mm512_mask3_fmsub_ps(__A, __B, __C, __U);
+}
+__m512d test_mm512_mask3_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsubadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.pd.512
+  return _mm512_mask3_fmsubadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.pd.512
+  return _mm512_mask3_fmsubadd_pd(__A, __B, __C, __U);
+}
+__m512 test_mm512_mask3_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsubadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.ps.512
+  return _mm512_mask3_fmsubadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.ps.512
+  return _mm512_mask3_fmsubadd_ps(__A, __B, __C, __U);
+}
+__m512d test_mm512_mask_fnmadd_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmadd_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.pd.512
+  return _mm512_mask_fnmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.pd.512
+  return _mm512_mask_fnmadd_pd(__A, __U, __B, __C);
+}
+__m512 test_mm512_mask_fnmadd_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmadd_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.ps.512
+  return _mm512_mask_fnmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.ps.512
+  return _mm512_mask_fnmadd_ps(__A, __U, __B, __C);
+}
+__m512d test_mm512_mask_fnmsub_round_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.pd.512
+  return _mm512_mask_fnmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask3_fnmsub_round_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmsub_round_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.pd.512
+  return _mm512_mask3_fnmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512d test_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.pd.512
+  return _mm512_mask_fnmsub_pd(__A, __U, __B, __C);
+}
+__m512d test_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.pd.512
+  return _mm512_mask3_fnmsub_pd(__A, __B, __C, __U);
+}
+__m512 test_mm512_mask_fnmsub_round_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.ps.512
+  return _mm512_mask_fnmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask3_fnmsub_round_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmsub_round_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ps.512
+  return _mm512_mask3_fnmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_NEAREST_INT);
+}
+__m512 test_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
+  // CHECK-LABEL: @test_mm512_mask_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.ps.512
+  return _mm512_mask_fnmsub_ps(__A, __U, __B, __C);
+}
+__m512 test_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
+  // CHECK-LABEL: @test_mm512_mask3_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ps.512
+  return _mm512_mask3_fnmsub_ps(__A, __B, __C, __U);
 }
 
 __mmask16 test_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c
index 9446d46..00b0d5d 100644
--- a/test/CodeGen/avx512vl-builtins.c
+++ b/test/CodeGen/avx512vl-builtins.c
@@ -1121,3 +1121,439 @@ __mmask8 test_mm128_mask_cmp_pd_mask(__mmask8 m, __m128d __A, __m128d __B) {
   // CHECK: @llvm.x86.avx512.mask.cmp.pd.128
   return _mm128_mask_cmp_pd_mask(m, __A, __B, 0);
 }
+
+
+//igorb
+
+__m128d test_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.128
+  return _mm_mask_fmadd_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.128
+  return _mm_mask_fmsub_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.128
+  return _mm_mask3_fmadd_pd(__A, __B, __C, __U);
+}
+
+__m128d test_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.128
+  return _mm_mask3_fnmadd_pd(__A, __B, __C, __U);
+}
+
+__m128d test_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.128
+  return _mm_maskz_fmadd_pd(__U, __A, __B, __C);
+}
+
+__m128d test_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.128
+  return _mm_maskz_fmsub_pd(__U, __A, __B, __C);
+}
+
+__m128d test_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.128
+  return _mm_maskz_fnmadd_pd(__U, __A, __B, __C);
+}
+
+__m128d test_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.128
+  return _mm_maskz_fnmsub_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.256
+  return _mm256_mask_fmadd_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.256
+  return _mm256_mask_fmsub_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.256
+  return _mm256_mask3_fmadd_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.pd.256
+  return _mm256_mask3_fnmadd_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.256
+  return _mm256_maskz_fmadd_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.256
+  return _mm256_maskz_fmsub_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.256
+  return _mm256_maskz_fnmadd_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.pd.256
+  return _mm256_maskz_fnmsub_pd(__U, __A, __B, __C);
+}
+
+__m128 test_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.128
+  return _mm_mask_fmadd_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.128
+  return _mm_mask_fmsub_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.128
+  return _mm_mask3_fmadd_ps(__A, __B, __C, __U);
+}
+
+__m128 test_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.128
+  return _mm_mask3_fnmadd_ps(__A, __B, __C, __U);
+}
+
+__m128 test_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.128
+  return _mm_maskz_fmadd_ps(__U, __A, __B, __C);
+}
+
+__m128 test_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.128
+  return _mm_maskz_fmsub_ps(__U, __A, __B, __C);
+}
+
+__m128 test_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.128
+  return _mm_maskz_fnmadd_ps(__U, __A, __B, __C);
+}
+
+__m128 test_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.128
+  return _mm_maskz_fnmsub_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.256
+  return _mm256_mask_fmadd_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ps.256
+  return _mm256_mask_fmsub_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.256
+  return _mm256_mask3_fmadd_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmadd.ps.256
+  return _mm256_mask3_fnmadd_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.256
+  return _mm256_maskz_fmadd_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.256
+  return _mm256_maskz_fmsub_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.256
+  return _mm256_maskz_fnmadd_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmadd.ps.256
+  return _mm256_maskz_fnmsub_ps(__U, __A, __B, __C);
+}
+
+__m128d test_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.128
+  return _mm_mask_fmaddsub_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.128
+  return _mm_mask_fmsubadd_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.pd.128
+  return _mm_mask3_fmaddsub_pd(__A, __B, __C, __U);
+}
+
+__m128d test_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.128
+  return _mm_maskz_fmaddsub_pd(__U, __A, __B, __C);
+}
+
+__m128d test_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.128
+  return _mm_maskz_fmsubadd_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.256
+  return _mm256_mask_fmaddsub_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.pd.256
+  return _mm256_mask_fmsubadd_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.pd.256
+  return _mm256_mask3_fmaddsub_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmaddsub_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.256
+  return _mm256_maskz_fmaddsub_pd(__U, __A, __B, __C);
+}
+
+__m256d test_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.pd.256
+  return _mm256_maskz_fmsubadd_pd(__U, __A, __B, __C);
+}
+
+__m128 test_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.128
+  return _mm_mask_fmaddsub_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.128
+  return _mm_mask_fmsubadd_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.ps.128
+  return _mm_mask3_fmaddsub_ps(__A, __B, __C, __U);
+}
+
+__m128 test_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.128
+  return _mm_maskz_fmaddsub_ps(__U, __A, __B, __C);
+}
+
+__m128 test_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_maskz_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.128
+  return _mm_maskz_fmsubadd_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.256
+  return _mm256_mask_fmaddsub_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfmaddsub.ps.256
+  return _mm256_mask_fmsubadd_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmaddsub.ps.256
+  return _mm256_mask3_fmaddsub_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmaddsub_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.256
+  return _mm256_maskz_fmaddsub_ps(__U, __A, __B, __C);
+}
+
+__m256 test_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_maskz_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.maskz.vfmaddsub.ps.256
+  return _mm256_maskz_fmsubadd_ps(__U, __A, __B, __C);
+}
+
+__m128d test_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.pd.128
+  return _mm_mask3_fmsub_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.pd.256
+  return _mm256_mask3_fmsub_pd(__A, __B, __C, __U);
+}
+
+__m128 test_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ps.128
+  return _mm_mask3_fmsub_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsub.ps.256
+  return _mm256_mask3_fmsub_ps(__A, __B, __C, __U);
+}
+
+__m128d test_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.pd.128
+  return _mm_mask3_fmsubadd_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmsubadd_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.pd.256
+  return _mm256_mask3_fmsubadd_pd(__A, __B, __C, __U);
+}
+
+__m128 test_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.ps.128
+  return _mm_mask3_fmsubadd_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fmsubadd_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfmsubadd.ps.256
+  return _mm256_mask3_fmsubadd_ps(__A, __B, __C, __U);
+}
+
+__m128d test_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.pd.128
+  return _mm_mask_fnmadd_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fnmadd_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.pd.256
+  return _mm256_mask_fnmadd_pd(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.ps.128
+  return _mm_mask_fnmadd_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fnmadd_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmadd.ps.256
+  return _mm256_mask_fnmadd_ps(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) {
+  // CHECK-LABEL: @test_mm_mask_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.pd.128
+  return _mm_mask_fnmsub_pd(__A, __U, __B, __C);
+}
+
+__m128d test_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.pd.128
+  return _mm_mask3_fnmsub_pd(__A, __B, __C, __U);
+}
+
+__m256d test_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) {
+  // CHECK-LABEL: @test_mm256_mask_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.pd.256
+  return _mm256_mask_fnmsub_pd(__A, __U, __B, __C);
+}
+
+__m256d test_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fnmsub_pd
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.pd.256
+  return _mm256_mask3_fnmsub_pd(__A, __B, __C, __U);
+}
+
+__m128 test_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) {
+  // CHECK-LABEL: @test_mm_mask_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.ps.128
+  return _mm_mask_fnmsub_ps(__A, __U, __B, __C);
+}
+
+__m128 test_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm_mask3_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ps.128
+  return _mm_mask3_fnmsub_ps(__A, __B, __C, __U);
+}
+
+__m256 test_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) {
+  // CHECK-LABEL: @test_mm256_mask_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask.vfnmsub.ps.256
+  return _mm256_mask_fnmsub_ps(__A, __U, __B, __C);
+}
+
+__m256 test_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) {
+  // CHECK-LABEL: @test_mm256_mask3_fnmsub_ps
+  // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ps.256 
+  return _mm256_mask3_fnmsub_ps(__A, __B, __C, __U);
+}
+
diff --git a/test/CodeGen/builtin-cpu-supports.c b/test/CodeGen/builtin-cpu-supports.c
new file mode 100644
index 0000000..2252b3e
--- /dev/null
+++ b/test/CodeGen/builtin-cpu-supports.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm < %s| FileCheck %s
+
+// Test that we have the structure definition, the gep offsets, the name of the
+// global, the bit grab, and the icmp correct.
+extern void a(const char *);
+
+int main() {
+  if (__builtin_cpu_supports("sse4.2"))
+    a("sse4.2");
+
+  // CHECK: [[LOAD:%[^ ]+]] = load i32, i32* getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, { i32, i32, i32, [1 x i32] }* @__cpu_model, i32 0, i32 3, i32 0)
+  // CHECK: [[AND:%[^ ]+]] = and i32 [[LOAD]], 256
+  // CHECK = icmp ne i32 [[AND]], 0
+
+  return 0;
+}
diff --git a/test/CodeGen/builtins-nvptx.c b/test/CodeGen/builtins-nvptx.c
index 5f91f7ad..ebf2067 100644
--- a/test/CodeGen/builtins-nvptx.c
+++ b/test/CodeGen/builtins-nvptx.c
@@ -1,8 +1,13 @@
 // REQUIRES: nvptx-registered-target
-// RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s
 
-int read_tid() {
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+__device__ int read_tid() {
 
 // CHECK: call i32 @llvm.ptx.read.tid.x()
 // CHECK: call i32 @llvm.ptx.read.tid.y()
@@ -18,7 +23,7 @@ int read_tid() {
 
 }
 
-int read_ntid() {
+__device__ int read_ntid() {
 
 // CHECK: call i32 @llvm.ptx.read.ntid.x()
 // CHECK: call i32 @llvm.ptx.read.ntid.y()
@@ -34,7 +39,7 @@ int read_ntid() {
 
 }
 
-int read_ctaid() {
+__device__ int read_ctaid() {
 
 // CHECK: call i32 @llvm.ptx.read.ctaid.x()
 // CHECK: call i32 @llvm.ptx.read.ctaid.y()
@@ -50,7 +55,7 @@ int read_ctaid() {
 
 }
 
-int read_nctaid() {
+__device__ int read_nctaid() {
 
 // CHECK: call i32 @llvm.ptx.read.nctaid.x()
 // CHECK: call i32 @llvm.ptx.read.nctaid.y()
@@ -66,7 +71,7 @@ int read_nctaid() {
 
 }
 
-int read_ids() {
+__device__ int read_ids() {
 
 // CHECK: call i32 @llvm.ptx.read.laneid()
 // CHECK: call i32 @llvm.ptx.read.warpid()
@@ -86,7 +91,7 @@ int read_ids() {
 
 }
 
-int read_lanemasks() {
+__device__ int read_lanemasks() {
 
 // CHECK: call i32 @llvm.ptx.read.lanemask.eq()
 // CHECK: call i32 @llvm.ptx.read.lanemask.le()
@@ -104,8 +109,7 @@ int read_lanemasks() {
 
 }
 
-
-long read_clocks() {
+__device__ long read_clocks() {
 
 // CHECK: call i32 @llvm.ptx.read.clock()
 // CHECK: call i64 @llvm.ptx.read.clock64()
@@ -117,7 +121,7 @@ long read_clocks() {
 
 }
 
-int read_pms() {
+__device__ int read_pms() {
 
 // CHECK: call i32 @llvm.ptx.read.pm0()
 // CHECK: call i32 @llvm.ptx.read.pm1()
@@ -133,7 +137,7 @@ int read_pms() {
 
 }
 
-void sync() {
+__device__ void sync() {
 
 // CHECK: call void @llvm.ptx.bar.sync(i32 0)
 
@@ -146,7 +150,7 @@ void sync() {
 
 // The idea is not to test all intrinsics, just that Clang is recognizing the
 // builtins defined in BuiltinsNVPTX.def
-void nvvm_math(float f1, float f2, double d1, double d2) {
+__device__ void nvvm_math(float f1, float f2, double d1, double d2) {
 // CHECK: call float @llvm.nvvm.fmax.f
   float t1 = __nvvm_fmax_f(f1, f2);
 // CHECK: call float @llvm.nvvm.fmin.f
@@ -176,3 +180,95 @@ void nvvm_math(float f1, float f2, double d1, double d2) {
 // CHECK: call void @llvm.nvvm.barrier0()
   __nvvm_bar0();
 }
+
+__device__ int di;
+__shared__ int si;
+__device__ long dl;
+__shared__ long sl;
+__device__ long long dll;
+__shared__ long long sll;
+
+// Check for atomic intrinsics
+// CHECK-LABEL: nvvm_atom
+__device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l,
+                          long long *llp, long long ll) {
+  // CHECK: atomicrmw add
+  __nvvm_atom_add_gen_i(ip, i);
+  // CHECK: atomicrmw add
+  __nvvm_atom_add_gen_l(&dl, l);
+  // CHECK: atomicrmw add
+  __nvvm_atom_add_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw sub
+  __nvvm_atom_sub_gen_i(ip, i);
+  // CHECK: atomicrmw sub
+  __nvvm_atom_sub_gen_l(&dl, l);
+  // CHECK: atomicrmw sub
+  __nvvm_atom_sub_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw and
+  __nvvm_atom_and_gen_i(ip, i);
+  // CHECK: atomicrmw and
+  __nvvm_atom_and_gen_l(&dl, l);
+  // CHECK: atomicrmw and
+  __nvvm_atom_and_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw or
+  __nvvm_atom_or_gen_i(ip, i);
+  // CHECK: atomicrmw or
+  __nvvm_atom_or_gen_l(&dl, l);
+  // CHECK: atomicrmw or
+  __nvvm_atom_or_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw xor
+  __nvvm_atom_xor_gen_i(ip, i);
+  // CHECK: atomicrmw xor
+  __nvvm_atom_xor_gen_l(&dl, l);
+  // CHECK: atomicrmw xor
+  __nvvm_atom_xor_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw xchg
+  __nvvm_atom_xchg_gen_i(ip, i);
+  // CHECK: atomicrmw xchg
+  __nvvm_atom_xchg_gen_l(&dl, l);
+  // CHECK: atomicrmw xchg
+  __nvvm_atom_xchg_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw max
+  __nvvm_atom_max_gen_i(ip, i);
+  // CHECK: atomicrmw max
+  __nvvm_atom_max_gen_ui((unsigned int *)ip, i);
+  // CHECK: atomicrmw max
+  __nvvm_atom_max_gen_l(&dl, l);
+  // CHECK: atomicrmw max
+  __nvvm_atom_max_gen_ul((unsigned long *)&dl, l);
+  // CHECK: atomicrmw max
+  __nvvm_atom_max_gen_ll(&sll, ll);
+  // CHECK: atomicrmw max
+  __nvvm_atom_max_gen_ull((unsigned long long *)&sll, ll);
+
+  // CHECK: atomicrmw min
+  __nvvm_atom_min_gen_i(ip, i);
+  // CHECK: atomicrmw min
+  __nvvm_atom_min_gen_ui((unsigned int *)ip, i);
+  // CHECK: atomicrmw min
+  __nvvm_atom_min_gen_l(&dl, l);
+  // CHECK: atomicrmw min
+  __nvvm_atom_min_gen_ul((unsigned long *)&dl, l);
+  // CHECK: atomicrmw min
+  __nvvm_atom_min_gen_ll(&sll, ll);
+  // CHECK: atomicrmw min
+  __nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll);
+
+  // CHECK: cmpxchg
+  __nvvm_atom_cas_gen_i(ip, 0, i);
+  // CHECK: cmpxchg
+  __nvvm_atom_cas_gen_l(&dl, 0, l);
+  // CHECK: cmpxchg
+  __nvvm_atom_cas_gen_ll(&sll, 0, ll);
+
+  // CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32
+  __nvvm_atom_add_gen_f(fp, f);
+
+  // CHECK: ret
+}
diff --git a/test/CodeGen/builtins-ppc-p8vector.c b/test/CodeGen/builtins-ppc-p8vector.c
index ac40790..61e14ba 100644
--- a/test/CodeGen/builtins-ppc-p8vector.c
+++ b/test/CodeGen/builtins-ppc-p8vector.c
@@ -1,7 +1,11 @@
 // REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -faltivec -target-feature +power8-vector -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -faltivec -target-feature +power8-vector -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-LE
-// RUN: not %clang_cc1 -faltivec -triple powerpc64-unknown-unknown -emit-llvm %s -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PPC
+// RUN: not %clang_cc1 -faltivec -target-feature +vsx -triple powerpc64-unknown-unknown -emit-llvm %s -o - 2>&1 | FileCheck %s -check-prefix=CHECK-PPC
+// Added -target-feature +vsx above to avoid errors about "vector double" and to
+// generate the correct errors for functions that are only overloaded with VSX
+// (vec_cmpge, vec_cmple). Without this option, there is only one overload so
+// it is selected.
 
 vector signed char vsc = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5 };
 vector unsigned char vuc = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5 };
@@ -11,6 +15,7 @@ vector bool int vbi = {0, -1, -1, 0};
 vector bool long long vbll = { 1, 0 };
 vector signed long long vsll = { 1, 2 };
 vector unsigned long long vull = { 1, 2 };
+vector double vda = { 1.e-11, -132.23e10 };
 
 int res_i;
 vector signed char res_vsc;
@@ -21,10 +26,63 @@ vector bool int res_vbi;
 vector bool long long res_vbll;
 vector signed long long res_vsll;
 vector unsigned long long res_vull;
+vector double res_vd;
 
 // CHECK-LABEL: define void @test1
 void test1() {
 
+  /* vec_abs */
+  res_vsll = vec_abs(vsll);
+// CHECK: call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %{{[0-9]*}}, <2 x i64>
+// CHECK-LE: call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %{{[0-9]*}}, <2 x i64>
+// CHECK-PPC: error: call to 'vec_abs' is ambiguous
+
+  res_vd = vec_abs(vda);
+// CHECK: store <2 x i64> <i64 9223372036854775807, i64 9223372036854775807>, <2 x i64>*
+// CHECK: and <2 x i64>
+// CHECK-LE: store <2 x i64> <i64 9223372036854775807, i64 9223372036854775807>, <2 x i64>*
+// CHECK-LE: and <2 x i64>
+// CHECK-PPC: error: call to 'vec_abs' is ambiguous
+
+  /* vec_add */
+  res_vsll = vec_add(vsll, vsll);
+// CHECK: add <2 x i64>
+// CHECK-LE: add <2 x i64>
+// CHECK-PPC: error: call to 'vec_add' is ambiguous
+
+  res_vull = vec_add(vull, vull);
+// CHECK: add <2 x i64>
+// CHECK-LE: add <2 x i64>
+// CHECK-PPC: error: call to 'vec_add' is ambiguous
+
+  /* vec_mergee */  
+  res_vbi = vec_mergee(vbi, vbi);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+  
+  res_vi = vec_mergee(vi, vi);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vui = vec_mergee(vui, vui);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+// CHECK-PPC: warning: implicit declaration of function 'vec_mergee'
+
+  /* vec_mergeo */
+  res_vbi = vec_mergeo(vbi, vbi);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vi = vec_mergeo(vi, vi);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vui = vec_mergeo(vui, vui);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+// CHECK-PPC: warning: implicit declaration of function 'vec_mergeo'
+  
   /* vec_cmpeq */
   res_vbll = vec_cmpeq(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpequd
@@ -36,6 +94,28 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpequd
 // CHECK-PPC: error: call to 'vec_cmpeq' is ambiguous
 
+  /* vec_cmpge */
+  res_vbll = vec_cmpge(vsll, vsll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd
+// CHECK-PPC: error: call to 'vec_cmpge' is ambiguous
+
+  res_vbll = vec_cmpge(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud
+// CHECK-PPC: error: call to 'vec_cmpge' is ambiguous
+
+  /* vec_cmple */
+  res_vbll = vec_cmple(vsll, vsll);
+// CHECK: @llvm.ppc.altivec.vcmpgtsd
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsd
+// CHECK-PPC: error: call to 'vec_cmple' is ambiguous
+
+  res_vbll = vec_cmple(vull, vull);
+// CHECK: @llvm.ppc.altivec.vcmpgtud
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtud
+// CHECK-PPC: error: call to 'vec_cmple' is ambiguous
+
   /* vec_cmpgt */
   res_vbll = vec_cmpgt(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd
@@ -47,6 +127,17 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud
 // CHECK-PPC: error: call to 'vec_cmpgt' is ambiguous
 
+  /* vec_cmplt */
+  res_vbll = vec_cmplt(vsll, vsll);
+// CHECK: call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %{{[0-9]*}}, <2 x i64> %{{[0-9]*}})
+// CHECK-LE: call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %{{[0-9]*}}, <2 x i64> %{{[0-9]*}})
+// CHECK-PPC: error: call to 'vec_cmplt' is ambiguous
+
+  res_vbll = vec_cmplt(vull, vull);
+// CHECK: call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %{{[0-9]*}}, <2 x i64> %{{[0-9]*}})
+// CHECK-LE: call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %{{[0-9]*}}, <2 x i64> %{{[0-9]*}})
+// CHECK-PPC: error: call to 'vec_cmplt' is ambiguous
+
   /* ----------------------- predicates --------------------------- */
   /* vec_all_eq */
   res_i = vec_all_eq(vsll, vsll);
diff --git a/test/CodeGen/builtins-ppc-vsx.c b/test/CodeGen/builtins-ppc-vsx.c
index 631cb6c..9936213 100644
--- a/test/CodeGen/builtins-ppc-vsx.c
+++ b/test/CodeGen/builtins-ppc-vsx.c
@@ -1,5 +1,6 @@
 // REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -faltivec -target-feature +vsx -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -faltivec -target-feature +vsx -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s
 
 vector unsigned char vuc = { 8,  9, 10, 11, 12, 13, 14, 15,
                              0,  1,  2,  3,  4,  5,  6,  7};
@@ -16,14 +17,98 @@ vector float res_vf;
 vector double res_vd;
 vector signed int res_vsi;
 vector unsigned int res_vui;
+vector bool int res_vbi;
 vector bool long long res_vbll;
 vector signed long long res_vsll;
 vector unsigned long long res_vull;
 double res_d;
 
+void dummy() { }
+
 void test1() {
 // CHECK-LABEL: define void @test1
 
+  res_vd = vec_add(vd, vd);
+// CHECK: fadd <2 x double>
+
+  res_vd = vec_and(vbll, vd);
+// CHECK: and <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+
+  res_vd = vec_and(vd, vbll);
+// CHECK: and <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+
+  res_vd = vec_and(vd, vd);
+// CHECK: and <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+
+  res_vd = vec_andc(vbll, vd);
+// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
+// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK: and <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+
+  res_vd = vec_andc(vd, vbll);
+// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
+// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK: and <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+
+  res_vd = vec_andc(vd, vd);
+// CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
+// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK: and <2 x i64>
+// CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
+
+  dummy();
+// CHECK: call void @dummy()
+
+  res_vd = vec_ceil(vd);
+// CHECK: call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{[0-9]*}})
+
+  res_vf = vec_ceil(vf);
+// CHECK: call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{[0-9]*}})
+
+  res_vbll = vec_cmpeq(vd, vd);
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpeqdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+
+  res_vbi = vec_cmpeq(vf, vf);
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpeqsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+
+  res_vbll = vec_cmpge(vd, vd);
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+
+  res_vbi = vec_cmpge(vf, vf);
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+
+  res_vbll = vec_cmpgt(vd, vd);
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+
+  res_vbi = vec_cmpgt(vf, vf);
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+
+  res_vbll = vec_cmple(vd, vd);
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgedp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+
+  res_vbi = vec_cmple(vf, vf);
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgesp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+
+  res_vbll = vec_cmplt(vd, vd);
+// CHECK: call <2 x i64> @llvm.ppc.vsx.xvcmpgtdp(<2 x double> %{{[0-9]*}}, <2 x double> %{{[0-9]*}})
+
+  res_vbi = vec_cmplt(vf, vf);
+// CHECK: call <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float> %{{[0-9]*}}, <4 x float> %{{[0-9]*}})
+
   /* vec_div */
   res_vf = vec_div(vf, vf);
 // CHECK: @llvm.ppc.vsx.xvdivsp
diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c
index 8a5b5a2..a239889 100644
--- a/test/CodeGen/builtins-x86.c
+++ b/test/CodeGen/builtins-x86.c
@@ -260,6 +260,10 @@ void f0() {
 
   (void) __builtin_ia32_ldmxcsr(tmp_Ui);
   tmp_Ui = __builtin_ia32_stmxcsr();
+  (void)__builtin_ia32_fxsave(tmp_vp);
+  (void)__builtin_ia32_fxsave64(tmp_vp);
+  (void)__builtin_ia32_fxrstor(tmp_vp);
+  (void)__builtin_ia32_fxrstor64(tmp_vp);
   tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i);
   tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f);
   tmp_i = __builtin_ia32_cvtss2si(tmp_V4f);
diff --git a/test/CodeGen/builtinshufflevector2.c b/test/CodeGen/builtinshufflevector2.c
index 8712c99..0e7b2ec 100644
--- a/test/CodeGen/builtinshufflevector2.c
+++ b/test/CodeGen/builtinshufflevector2.c
@@ -3,7 +3,7 @@
 typedef float float4 __attribute__((ext_vector_type(4)));
 typedef unsigned int uint4 __attribute__((ext_vector_type(4)));
 
-// CHECK-LABEL: define void @clang_shufflevector_v_v(
+// CHECK-LABEL: define {{.*}}void @clang_shufflevector_v_v(
 void clang_shufflevector_v_v( float4* A, float4 x, uint4 mask ) {
 // CHECK: [[MASK:%.*]] = and <4 x i32> {{%.*}}, <i32 3, i32 3, i32 3, i32 3>
 // CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i{{[0-9]+}} 0
@@ -27,14 +27,14 @@ void clang_shufflevector_v_v( float4* A, float4 x, uint4 mask ) {
   *A = __builtin_shufflevector( x, mask );
 }
 
-// CHECK-LABEL: define void @clang_shufflevector_v_v_c(
+// CHECK-LABEL: define {{.*}}void @clang_shufflevector_v_v_c(
 void clang_shufflevector_v_v_c( float4* A, float4 x, float4 y) {
 // CHECK: [[V:%.*]] = shufflevector <4 x float> {{%.*}}, <4 x float> {{%.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 // CHECK: store <4 x float> [[V]], <4 x float>* {{%.*}}
   *A = __builtin_shufflevector( x, y, 0, 4, 1, 5 );
 }
 
-// CHECK-LABEL: define void @clang_shufflevector_v_v_undef(
+// CHECK-LABEL: define {{.*}}void @clang_shufflevector_v_v_undef(
 void clang_shufflevector_v_v_undef( float4* A, float4 x, float4 y) {
 // CHECK: [[V:%.*]] = shufflevector <4 x float> {{%.*}}, <4 x float> {{%.*}}, <4 x i32> <i32 0, i32 4, i32 undef, i32 5>
 // CHECK: store <4 x float> [[V]], <4 x float>* {{%.*}}
diff --git a/test/CodeGen/c-strings.c b/test/CodeGen/c-strings.c
index 588a716..974eeea 100644
--- a/test/CodeGen/c-strings.c
+++ b/test/CodeGen/c-strings.c
@@ -23,44 +23,44 @@ unsigned char align = 1;
 
 void bar(const char *);
 
-// CHECK-LABEL: define void @f0()
+// CHECK-LABEL: define {{.*}}void @f0()
 void f0() {
   bar("hello");
-  // ITANIUM: call void @bar({{.*}} @.str
-  // MSABI: call void @bar({{.*}} @"\01??_C@_05CJBACGMB@hello?$AA@"
+  // ITANIUM: call {{.*}}void @bar({{.*}} @.str
+  // MSABI: call {{.*}}void @bar({{.*}} @"\01??_C@_05CJBACGMB@hello?$AA@"
 }
 
-// CHECK-LABEL: define void @f1()
+// CHECK-LABEL: define {{.*}}void @f1()
 void f1() {
   static char *x = "hello";
   bar(x);
   // CHECK: [[T1:%.*]] = load i8*, i8** @f1.x
-  // CHECK: call void @bar(i8* [[T1:%.*]])
+  // CHECK: call {{.*}}void @bar(i8* [[T1:%.*]])
 }
 
-// CHECK-LABEL: define void @f2()
+// CHECK-LABEL: define {{.*}}void @f2()
 void f2() {
   static char x[] = "hello";
   bar(x);
-  // CHECK: call void @bar({{.*}} @f2.x
+  // CHECK: call {{.*}}void @bar({{.*}} @f2.x
 }
 
-// CHECK-LABEL: define void @f3()
+// CHECK-LABEL: define {{.*}}void @f3()
 void f3() {
   static char x[8] = "hello";
   bar(x);
-  // CHECK: call void @bar({{.*}} @f3.x
+  // CHECK: call {{.*}}void @bar({{.*}} @f3.x
 }
 
 void gaz(void *);
 
-// CHECK-LABEL: define void @f4()
+// CHECK-LABEL: define {{.*}}void @f4()
 void f4() {
   static struct s {
     char *name;
   } x = { "hello" };
   gaz(&x);
-  // CHECK: call void @gaz({{.*}} @f4.x
+  // CHECK: call {{.*}}void @gaz({{.*}} @f4.x
 }
 
 char x[3] = "ola";
diff --git a/test/CodeGen/c11atomics.c b/test/CodeGen/c11atomics.c
index a35eef9..d1e4478 100644
--- a/test/CodeGen/c11atomics.c
+++ b/test/CodeGen/c11atomics.c
@@ -12,8 +12,24 @@
 // they're sufficiently rare that it's not worth making sure that the semantics
 // are correct.
 
-// CHECK: @testStructGlobal = global {{.*}} { i16 1, i16 2, i16 3, i16 4 }
-// CHECK: @testPromotedStructGlobal = global {{.*}} { %{{.*}} { i16 1, i16 2, i16 3 }, [2 x i8] zeroinitializer }
+struct elem;
+
+struct ptr {
+    struct elem *ptr;
+};
+// CHECK-DAG: %struct.ptr = type { %struct.elem* }
+
+struct elem {
+    _Atomic(struct ptr) link;
+};
+// CHECK-DAG: %struct.elem = type { %struct.ptr }
+
+struct ptr object;
+// CHECK-DAG: @object = common global %struct.ptr zeroinitializer
+
+// CHECK-DAG: @testStructGlobal = global {{.*}} { i16 1, i16 2, i16 3, i16 4 }
+// CHECK-DAG: @testPromotedStructGlobal = global {{.*}} { %{{.*}} { i16 1, i16 2, i16 3 }, [2 x i8] zeroinitializer }
+
 
 typedef int __attribute__((vector_size(16))) vector;
 
diff --git a/test/CodeGen/call.c b/test/CodeGen/call.c
index 7239111..eec6e52 100644
--- a/test/CodeGen/call.c
+++ b/test/CodeGen/call.c
@@ -18,7 +18,7 @@ void JS_ReportErrorNumber(JSErrorCallback errorCallback, ...);
 void Interpret() {
   JS_ReportErrorNumber(js_GetErrorMessage, 0);
   
-  // CHECK: call void ({{.*}}, ...) @JS_ReportErrorNumber({{.*}}@js_GetErrorMessage
+  // CHECK: call {{.*}}void ({{.*}}, ...) @JS_ReportErrorNumber({{.*}}@js_GetErrorMessage
 }
 
 
diff --git a/test/CodeGen/captured-statements-nested.c b/test/CodeGen/captured-statements-nested.c
index 6464243..b81705b 100644
--- a/test/CodeGen/captured-statements-nested.c
+++ b/test/CodeGen/captured-statements-nested.c
@@ -30,7 +30,7 @@ void test_nest_captured_stmt(int param, int size, int param_arr[size]) {
         param_arr[size - 1] = 2;
         arr[10][z.a] = 12;
 
-        // CHECK1: define internal void @__captured_stmt{{.*}}([[T]]
+        // CHECK1: define internal {{.*}}void @__captured_stmt{{.*}}([[T]]
         // CHECK1: [[PARAM_ARR_SIZE_REF:%.+]] = getelementptr inbounds [[T]], [[T]]* {{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5
         // CHECK1: [[PARAM_ARR_SIZE:%.+]] = load [[SIZE_TYPE]], [[SIZE_TYPE]]* [[PARAM_ARR_SIZE_REF]]
         // CHECK1: [[ARR_SIZE1_REF:%.+]] = getelementptr inbounds [[T]], [[T]]* {{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 8
@@ -106,7 +106,7 @@ void test_nest_block() {
     }
   }();
 
-  // CHECK2: define internal void @{{.*}}test_nest_block_block_invoke
+  // CHECK2: define internal {{.*}}void @{{.*}}test_nest_block_block_invoke
   //
   // CHECK2: [[Z:%[0-9a-z_]*]] = alloca i{{[0-9]+}},
   // CHECK2: alloca %struct.anon{{.*}}
diff --git a/test/CodeGen/captured-statements.c b/test/CodeGen/captured-statements.c
index b4fbfd4..53632ac 100644
--- a/test/CodeGen/captured-statements.c
+++ b/test/CodeGen/captured-statements.c
@@ -28,7 +28,7 @@ void test1() {
   // CHECK-1: call void @[[HelperName:__captured_stmt[\.0-9]+]]
 }
 
-// CHECK-1: define internal void @[[HelperName]](%struct.anon
+// CHECK-1: define internal {{.*}}void @[[HelperName]](%struct.anon
 // CHECK-1:   getelementptr inbounds %struct.anon{{.*}}, i32 0, i32 0
 // CHECK-1:   load i32*, i32**
 // CHECK-1:   load i32, i32*
@@ -48,7 +48,7 @@ void test2(int x) {
   // CHECK-2: call void @[[HelperName:__captured_stmt[\.0-9]+]]
 }
 
-// CHECK-2: define internal void @[[HelperName]]
+// CHECK-2: define internal {{.*}}void @[[HelperName]]
 // CHECK-2-NOT: }
 // CHECK-2:   %i = alloca i32
 
@@ -93,7 +93,7 @@ void dont_capture_global() {
   // CHECK-GLOBALS: call void @__captured_stmt[[HelperName:[\.0-9]+]](%[[Capture]]
 }
 
-// CHECK-GLOBALS: define internal void @__captured_stmt[[HelperName]]
+// CHECK-GLOBALS: define internal {{.*}}void @__captured_stmt[[HelperName]]
 // CHECK-GLOBALS-NOT: ret
 // CHECK-GLOBALS:   load i32, i32* @global
 // CHECK-GLOBALS:   load i32, i32* @
diff --git a/test/CodeGen/complex-convert.c b/test/CodeGen/complex-convert.c
index 0db2588..c65a98c 100644
--- a/test/CodeGen/complex-convert.c
+++ b/test/CodeGen/complex-convert.c
@@ -21,7 +21,7 @@ void foo(signed char sc, unsigned char uc, signed long long sll,
   _Complex unsigned char cuc1;
   _Complex signed long long csll1;
   _Complex unsigned long long cull1;
-  // CHECK-LABEL: define void @foo(
+  // CHECK-LABEL: define {{.*}}void @foo(
   // Match the prototype to pick up the size of sc and sll.
   // CHECK: i[[CHSIZE:[0-9]+]]{{[^,]*}},
   // CHECK: i[[CHSIZE]]{{[^,]*}},
diff --git a/test/CodeGen/debug-info-packed-struct.c b/test/CodeGen/debug-info-packed-struct.c
new file mode 100644
index 0000000..0b5226b
--- /dev/null
+++ b/test/CodeGen/debug-info-packed-struct.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -x c -g -emit-llvm -triple x86_64-apple-darwin -o - %s | FileCheck %s
+
+// CHECK: %struct.layout0 = type { i8, %struct.size8, i8 }
+// CHECK: %struct.layout1 = type <{ i8, %struct.size8_anon, i8, [2 x i8] }>
+// CHECK: %struct.layout2 = type <{ i8, %struct.size8_pack1, i8 }>
+// CHECK: %struct.layout3 = type <{ i8, [3 x i8], %struct.size8_pack4, i8, [3 x i8] }>
+
+// ---------------------------------------------------------------------
+// Not packed.
+// ---------------------------------------------------------------------
+struct size8 {
+  int i : 4;
+  long long l : 60;
+};
+struct layout0 {
+  char l0_ofs0;
+  struct size8 l0_ofs8;
+  int l0_ofs16 : 1;
+};
+// CHECK: l0_ofs0
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l0_ofs8",
+// CHECK-SAME:     {{.*}}size: 64, align: 64, offset: 64)
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l0_ofs16",
+// CHECK-SAME:     {{.*}}size: 1, align: 32, offset: 128)
+
+
+// ---------------------------------------------------------------------
+// Implicitly packed.
+// ---------------------------------------------------------------------
+struct size8_anon {
+  int : 4;
+  long long : 60;
+};
+struct layout1 {
+  char l1_ofs0;
+  struct size8_anon l1_ofs1;
+  int l1_ofs9 : 1;
+};
+// CHECK: l1_ofs0
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l1_ofs1",
+// CHECK-SAME:     {{.*}}size: 64, align: 8, offset: 8)
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l1_ofs9",
+// CHECK-SAME:     {{.*}}size: 1, align: 32, offset: 72)
+
+
+// ---------------------------------------------------------------------
+// Explicitly packed.
+// ---------------------------------------------------------------------
+#pragma pack(1)
+struct size8_pack1 {
+  int i : 4;
+  long long l : 60;
+};
+struct layout2 {
+  char l2_ofs0;
+  struct size8_pack1 l2_ofs1;
+  int l2_ofs9 : 1;
+};
+#pragma pack()
+// CHECK: l2_ofs0
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l2_ofs1",
+// CHECK-SAME:     {{.*}}size: 64, align: 8, offset: 8)
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l2_ofs9",
+// CHECK-SAME:     {{.*}}size: 1, align: 32, offset: 72)
+
+
+
+// ---------------------------------------------------------------------
+// Explicitly packed with different alignment.
+// ---------------------------------------------------------------------
+#pragma pack(4)
+struct size8_pack4 {
+  int i : 4;
+  long long l : 60;
+};
+struct layout3 {
+  char l3_ofs0;
+  struct size8_pack4 l3_ofs4;
+  int l3_ofs12 : 1;
+};
+#pragma pack()
+// CHECK: l3_ofs0
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l3_ofs4",
+// CHECK-SAME:     {{.*}}size: 64, align: 32, offset: 32)
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "l3_ofs12",
+// CHECK-SAME:     {{.*}}size: 1, align: 32, offset: 96)
+
+struct layout0 l0;
+struct layout1 l1;
+struct layout2 l2;
+struct layout3 l3;
diff --git a/test/CodeGen/dostmt.c b/test/CodeGen/dostmt.c
index 54973dc..67ef64e 100644
--- a/test/CodeGen/dostmt.c
+++ b/test/CodeGen/dostmt.c
@@ -71,6 +71,6 @@ void test6f(void);
 void test6() {
   do {
   } while (test6f(), 0);
-  // CHECK: call void @test6f()
+  // CHECK: call {{.*}}void @test6f()
 }
 
diff --git a/test/CodeGen/fast-math.c b/test/CodeGen/fast-math.c
index 4a51358..6f98b84 100644
--- a/test/CodeGen/fast-math.c
+++ b/test/CodeGen/fast-math.c
@@ -2,7 +2,7 @@
 float f0, f1, f2;
 
 void foo(void) {
-  // CHECK-LABEL: define void @foo()
+  // CHECK-LABEL: define {{.*}}void @foo()
 
   // CHECK: fadd fast
   f0 = f1 + f2;
diff --git a/test/CodeGen/finite-math.c b/test/CodeGen/finite-math.c
index 8365b56..90a83fa 100644
--- a/test/CodeGen/finite-math.c
+++ b/test/CodeGen/finite-math.c
@@ -5,7 +5,7 @@
 float f0, f1, f2;
 
 void foo(void) {
-  // CHECK-LABEL: define void @foo()
+  // CHECK-LABEL: define {{.*}}void @foo()
 
   // FINITE: fadd nnan ninf
   // NSZ: fadd nsz
diff --git a/test/CodeGen/linkage-redecl.c b/test/CodeGen/linkage-redecl.c
index 58993f3..4767a94 100644
--- a/test/CodeGen/linkage-redecl.c
+++ b/test/CodeGen/linkage-redecl.c
@@ -16,4 +16,4 @@ void g0() {
 }
 
 extern void f(int x) { } // still has internal linkage
-// CHECK-LABEL: define internal void @f
+// CHECK-LABEL: define internal {{.*}}void @f
diff --git a/test/CodeGen/nomathbuiltin.c b/test/CodeGen/nomathbuiltin.c
index f80cd91..35e7c56 100644
--- a/test/CodeGen/nomathbuiltin.c
+++ b/test/CodeGen/nomathbuiltin.c
@@ -7,6 +7,6 @@ double pow(double, double);
 
 double foo(double a, double b) {
   return pow(a, b);
-// CHECK: call double @pow
+// CHECK: call {{.*}}double @pow
 }
 
diff --git a/test/CodeGen/openmp_default_simd_align.c b/test/CodeGen/openmp_default_simd_align.c
new file mode 100644
index 0000000..dcab5ab
--- /dev/null
+++ b/test/CodeGen/openmp_default_simd_align.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -O1 -emit-llvm -o - %s | FileCheck %s
+
+enum e0 { E0 };
+struct s0 {
+  enum e0         a:31;
+};
+
+int f0() {
+  return __builtin_omp_required_simd_align(struct s0);
+  // CHECK: ret i32 16
+}
diff --git a/test/CodeGen/pr9614.c b/test/CodeGen/pr9614.c
index 53abef1..63cb5af 100644
--- a/test/CodeGen/pr9614.c
+++ b/test/CodeGen/pr9614.c
@@ -4,26 +4,42 @@ extern void foo_alias (void) __asm ("foo");
 inline void foo (void) {
   return foo_alias ();
 }
-extern void bar_alias (void) __asm ("bar");
-inline __attribute__ ((__always_inline__)) void bar (void) {
-  return bar_alias ();
+extern int abs_alias (int) __asm ("abs");
+inline __attribute__ ((__always_inline__)) int abs (int x) {
+  return abs_alias(x);
 }
 extern char *strrchr_foo (const char *__s, int __c)  __asm ("strrchr");
 extern inline __attribute__ ((__always_inline__)) __attribute__ ((__gnu_inline__)) char * strrchr_foo (const char *__s, int __c)  {
   return __builtin_strrchr (__s, __c);
 }
+
+extern inline void __attribute__((always_inline, __gnu_inline__))
+prefetch(void) {
+  __builtin_prefetch(0, 0, 1);
+}
+
+extern inline __attribute__((__always_inline__, __gnu_inline__)) void *memchr(void *__s, int __c, __SIZE_TYPE__ __n) {
+  return __builtin_memchr(__s, __c, __n);
+}
+
 void f(void) {
   foo();
-  bar();
+  abs(0);
   strrchr_foo("", '.');
+  prefetch();
+  memchr("", '.', 0);
 }
 
 // CHECK-LABEL: define void @f()
 // CHECK: call void @foo()
-// CHECK-NEXT: call void @bar()
-// CHECK-NEXT: call i8* @strrchr(
-// CHECK-NEXT: ret void
+// CHECK: call i32 @abs(i32 0)
+// CHECK: call i8* @strrchr(
+// CHECK: call void @llvm.prefetch(
+// CHECK: call i8* @memchr(
+// CHECK: ret void
 
 // CHECK: declare void @foo()
-// CHECK: declare void @bar()
+// CHECK: declare i32 @abs(i32
 // CHECK: declare i8* @strrchr(i8*, i32)
+// CHECK: declare i8* @memchr(
+// CHECK: declare void @llvm.prefetch(
diff --git a/test/CodeGen/redefine_extname.c b/test/CodeGen/redefine_extname.c
index a91e5b8..ad4106d 100644
--- a/test/CodeGen/redefine_extname.c
+++ b/test/CodeGen/redefine_extname.c
@@ -13,3 +13,14 @@ int fish() { return fake() + __PRAGMA_REDEFINE_EXTNAME + name; }
 // CHECK:   call i32 @real()
 // Check that this also works with variables names
 // CHECK:   load i32, i32* @alias
+
+// This is a case when redefenition is deferred *and* we have a local of the
+// same name. PR23923.
+#pragma redefine_extname foo bar
+int f() {
+  int foo = 0;
+  return foo;
+}
+extern int foo() { return 1; }
+// CHECK: define i32 @bar()
+
diff --git a/test/CodeGen/stack-protector.c b/test/CodeGen/stack-protector.c
index 8039b60..ecfbc90 100644
--- a/test/CodeGen/stack-protector.c
+++ b/test/CodeGen/stack-protector.c
@@ -1,13 +1,13 @@
 // RUN: %clang_cc1 -emit-llvm -o - %s -stack-protector 0 | FileCheck -check-prefix=NOSSP %s
-// NOSSP: define void @test1(i8* %msg) #0 {
+// NOSSP: define {{.*}}void @test1(i8* %msg) #0 {
 // RUN: %clang_cc1 -emit-llvm -o - %s -stack-protector 1 | FileCheck -check-prefix=WITHSSP %s
-// WITHSSP: define void @test1(i8* %msg) #0 {
+// WITHSSP: define {{.*}}void @test1(i8* %msg) #0 {
 // RUN: %clang_cc1 -emit-llvm -o - %s -stack-protector 2 | FileCheck -check-prefix=SSPSTRONG %s
-// SSPSTRONG: define void @test1(i8* %msg) #0 {
+// SSPSTRONG: define {{.*}}void @test1(i8* %msg) #0 {
 // RUN: %clang_cc1 -emit-llvm -o - %s -stack-protector 3 | FileCheck -check-prefix=SSPREQ %s
-// SSPREQ: define void @test1(i8* %msg) #0 {
+// SSPREQ: define {{.*}}void @test1(i8* %msg) #0 {
 // RUN: %clang_cc1 -emit-llvm -o - %s -fsanitize=safe-stack | FileCheck -check-prefix=SAFESTACK %s
-// SAFESTACK: define void @test1(i8* %msg) #0 {
+// SAFESTACK: define {{.*}}void @test1(i8* %msg) #0 {
 
 typedef __SIZE_TYPE__ size_t;
 
diff --git a/test/CodeGen/static-order.c b/test/CodeGen/static-order.c
index 58aabbe..20277d7 100644
--- a/test/CodeGen/static-order.c
+++ b/test/CodeGen/static-order.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
 // CHECK: ModuleID
 // CHECK-NOT: zeroinitializer
-// CHECK-LABEL: define i8* @f
+// CHECK-LABEL: define {{.*}}i8* @f
 
 struct s {
     int a;
diff --git a/test/CodeGen/ubsan-blacklist.c b/test/CodeGen/ubsan-blacklist.c
index 6c67f02..f6e3882 100644
--- a/test/CodeGen/ubsan-blacklist.c
+++ b/test/CodeGen/ubsan-blacklist.c
@@ -14,9 +14,9 @@ unsigned i;
 // FUNC: @hash
 // FILE: @hash
 unsigned hash() {
-// DEFAULT: call void @__ubsan
-// FUNC-NOT: call void @__ubsan
-// FILE-NOT: call void @__ubsan
+// DEFAULT: call {{.*}}void @__ubsan
+// FUNC-NOT: call {{.*}}void @__ubsan
+// FILE-NOT: call {{.*}}void @__ubsan
   return i * 37;
 }
 
@@ -24,8 +24,8 @@ unsigned hash() {
 // FUNC: @add
 // FILE: @add
 unsigned add() {
-// DEFAULT: call void @__ubsan
-// FUNC: call void @__ubsan
-// FILE-NOT: call void @__ubsan
+// DEFAULT: call {{.*}}void @__ubsan
+// FUNC: call {{.*}}void @__ubsan
+// FILE-NOT: call {{.*}}void @__ubsan
   return i + 1;
 }
diff --git a/test/CodeGen/volatile-1.c b/test/CodeGen/volatile-1.c
index 71cd5f8..f63274b 100644
--- a/test/CodeGen/volatile-1.c
+++ b/test/CodeGen/volatile-1.c
@@ -22,7 +22,7 @@ int printf(const char *, ...);
 // that do implicit lvalue-to-rvalue conversion are substantially
 // reduced.
 
-// CHECK-LABEL: define void @test()
+// CHECK-LABEL: define {{.*}}void @test()
 void test() {
   // CHECK: load volatile [[INT]], [[INT]]* @i
   i;
@@ -303,11 +303,11 @@ void test() {
 }
 
 extern volatile enum X x;
-// CHECK-LABEL: define void @test1()
+// CHECK-LABEL: define {{.*}}void @test1()
 void test1() {
   extern void test1_helper(void);
   test1_helper();
-  // CHECK: call void @test1_helper()
+  // CHECK: call {{.*}}void @test1_helper()
   // CHECK-NEXT: ret void
   x;
   (void) x;
diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c
index c412e3c..bb9fba1 100644
--- a/test/CodeGen/x86_64-arguments.c
+++ b/test/CodeGen/x86_64-arguments.c
@@ -1,7 +1,9 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
-// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=NO-AVX512
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | \
-// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=NO-AVX512
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f | \
+// RUN:   FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX512
 #include <stdarg.h>
 
 // CHECK-LABEL: define signext i8 @f0()
@@ -458,3 +460,77 @@ void test54() {
 }
 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
+
+typedef float __m512 __attribute__ ((__vector_size__ (64)));
+typedef struct {
+  __m512 m;
+} s512;
+
+s512 x55;
+__m512 x56;
+
+// Even on AVX512, aggregates of size larger than four eightbytes have class
+// MEMORY (AVX512 draft 0.3 3.2.3p2 Rule 1).
+//
+// CHECK: declare void @f55(%struct.s512* byval align 64)
+void f55(s512 x);
+
+// However, __m512 has type SSE/SSEUP on AVX512.
+//
+// AVX512: declare void @f56(<16 x float>)
+// NO-AVX512: declare void @f56(<16 x float>* byval align 64)
+void f56(__m512 x);
+void f57() { f55(x55); f56(x56); }
+
+// Like for __m128 on AVX, check that the struct below is passed
+// in the same way regardless of AVX512 being used.
+//
+// CHECK: declare void @f58(%struct.t256* byval align 32)
+typedef struct t256 {
+  __m256 m;
+  __m256 n;
+} two256;
+
+extern void f58(two256 s);
+void f59(two256 s) {
+  f58(s);
+}
+
+// CHECK: declare void @f60(%struct.sat256* byval align 32)
+typedef struct at256 {
+  __m256 array[2];
+} Atwo256;
+typedef struct sat256 {
+  Atwo256 x;
+} SAtwo256;
+
+extern void f60(SAtwo256 s);
+void f61(SAtwo256 s) {
+  f60(s);
+}
+
+// AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
+void f62_helper(int, ...);
+__m512 x62;
+void f62() {
+  f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+}
+
+// Like for __m256 on AVX, we always pass __m512 in memory, and don't
+// need to use the register save area.
+//
+// AVX512-LABEL: define void @f63
+// AVX512-NOT: br i1
+// AVX512: ret void
+void f63(__m512 *m, __builtin_va_list argList) {
+  *m = __builtin_va_arg(argList, __m512);
+}
+
+// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
+// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
+void f64_helper(__m512, ...);
+__m512 x64;
+void f64() {
+  f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+  f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+}
diff --git a/test/CodeGenCXX/PR5093-static-member-function.cpp b/test/CodeGenCXX/PR5093-static-member-function.cpp
index d61a87a..6811351 100644
--- a/test/CodeGenCXX/PR5093-static-member-function.cpp
+++ b/test/CodeGenCXX/PR5093-static-member-function.cpp
@@ -4,6 +4,6 @@ struct a {
 };
 
 void g(a *a) {
-  // CHECK: call void @_ZN1a1fEv()
+  // CHECK: call {{.*}}void @_ZN1a1fEv()
   a->f();
 }
diff --git a/test/CodeGenCXX/address-of-fntemplate.cpp b/test/CodeGenCXX/address-of-fntemplate.cpp
index 4ff597a..4840fe8 100644
--- a/test/CodeGenCXX/address-of-fntemplate.cpp
+++ b/test/CodeGenCXX/address-of-fntemplate.cpp
@@ -9,8 +9,8 @@ void test() {
   // CHECK: @_Z1fIiEvv
   void (*p2)() = f<int>;
 }
-// CHECK-LABEL: define linkonce_odr void @_Z1fIiEvT_
-// CHECK-LABEL: define linkonce_odr void @_Z1fIiEvv
+// CHECK-LABEL: define linkonce_odr {{.*}}void @_Z1fIiEvT_
+// CHECK-LABEL: define linkonce_odr {{.*}}void @_Z1fIiEvv
 
 namespace PR6973 {
   template<typename T>
diff --git a/test/CodeGenCXX/attr-cleanup.cpp b/test/CodeGenCXX/attr-cleanup.cpp
index 18a7798..5ece41a 100644
--- a/test/CodeGenCXX/attr-cleanup.cpp
+++ b/test/CodeGenCXX/attr-cleanup.cpp
@@ -5,7 +5,7 @@ namespace N {
 }
 
 int main(void) {
-  // CHECK: call void @_ZN1N4freeEPv
+  // CHECK: call {{.*}}void @_ZN1N4freeEPv
   void *fp __attribute__((cleanup(N::free)));
   return 0;
 }
diff --git a/test/CodeGenCXX/block-byref-cxx-objc.cpp b/test/CodeGenCXX/block-byref-cxx-objc.cpp
index 5c35ad7..ce1ebd6 100644
--- a/test/CodeGenCXX/block-byref-cxx-objc.cpp
+++ b/test/CodeGenCXX/block-byref-cxx-objc.cpp
@@ -20,9 +20,9 @@ int main()
 // CHECK-LABEL: define internal void @__Block_byref_object_dispose_
 // CHECK: call {{.*}} @_ZN1AD1Ev
 // CHECK-LABEL: define internal void @__copy_helper_block_
-// CHECK: call void @_Block_object_assign
+// CHECK: call {{.*}}void @_Block_object_assign
 // CHECK-LABEL: define internal void @__destroy_helper_block_
-// CHECK: call void @_Block_object_dispose
+// CHECK: call {{.*}}void @_Block_object_dispose
 
 // rdar://problem/11135650
 namespace test1 {
diff --git a/test/CodeGenCXX/c-linkage.cpp b/test/CodeGenCXX/c-linkage.cpp
index 2f8729e..a70a22e 100644
--- a/test/CodeGenCXX/c-linkage.cpp
+++ b/test/CodeGenCXX/c-linkage.cpp
@@ -15,10 +15,10 @@ extern "C" {
 extern "C" {
   static void test2_f() {
   }
-  // CHECK-LABEL: define internal void @_Z7test2_fv
+  // CHECK-LABEL: define internal {{.*}}void @_Z7test2_fv
   static void test2_f(int x) {
   }
-  // CHECK-LABEL: define internal void @_Z7test2_fi
+  // CHECK-LABEL: define internal {{.*}}void @_Z7test2_fi
   void test2_use() {
     test2_f();
     test2_f(42);
diff --git a/test/CodeGenCXX/captured-statements.cpp b/test/CodeGenCXX/captured-statements.cpp
index ebb3833..fdda24f 100644
--- a/test/CodeGenCXX/captured-statements.cpp
+++ b/test/CodeGenCXX/captured-statements.cpp
@@ -44,11 +44,11 @@ void test1() {
   // CHECK-1:   ret
 }
 
-// CHECK-1: define internal void @[[HelperName]]
+// CHECK-1: define internal {{.*}}void @[[HelperName]]
 // CHECK-1:   getelementptr inbounds %[[Capture]], %[[Capture]]* {{[^,]*}}, i32 0, i32 0
-// CHECK-1:   call i32 @__cxa_guard_acquire(
+// CHECK-1:   call {{.*}}i32 @__cxa_guard_acquire(
 // CHECK-1:   store double %{{.+}}, double* [[INNER]],
-// CHECK-1:   call void @__cxa_guard_release(
+// CHECK-1:   call {{.*}}void @__cxa_guard_release(
 // CHECK-1:   getelementptr inbounds %struct.TestClass, %struct.TestClass* {{[^,]*}}, i32 0, i32 0
 // CHECK-1:   getelementptr inbounds %[[Capture]], %[[Capture]]* {{[^,]*}}, i32 0, i32 1
 
@@ -61,13 +61,13 @@ void test2(int x) {
     return x;
   }();
 
-  // CHECK-2-LABEL: define void @_Z5test2i
+  // CHECK-2-LABEL: define {{.*}}void @_Z5test2i
   // CHECK-2:   call {{.*}} @[[Lambda:["$\w]+]]
   //
   // CHECK-2: define internal {{.*}} @[[Lambda]]
   // CHECK-2:   call void @[[HelperName:["$_A-Za-z0-9]+]](%[[Capture:.*]]*
   //
-  // CHECK-2: define internal void @[[HelperName]]
+  // CHECK-2: define internal {{.*}}void @[[HelperName]]
   // CHECK-2:   getelementptr inbounds %[[Capture]], %[[Capture]]*
   // CHECK-2:   load i32*, i32**
   // CHECK-2:   load i32, i32*
@@ -81,7 +81,7 @@ void test3(int x) {
 
   // CHECK-3: %[[Capture:struct\.anon[\.0-9]*]] = type { i32* }
 
-  // CHECK-3-LABEL: define void @_Z5test3i
+  // CHECK-3-LABEL: define {{.*}}void @_Z5test3i
   // CHECK-3:   store i32*
   // CHECK-3:   call void @{{.*}}__captured_stmt
   // CHECK-3:   ret void
@@ -93,11 +93,11 @@ void test4() {
     Foo f;
     f.x = 5;
   }
-  // CHECK-4-LABEL: define void @_Z5test4v
+  // CHECK-4-LABEL: define {{.*}}void @_Z5test4v
   // CHECK-4:   call void @[[HelperName:[\."$_A-Za-z0-9]+]](%[[Capture:.*]]*
   // CHECK-4:   ret void
   //
-  // CHECK-4: define internal void @[[HelperName]]
+  // CHECK-4: define internal {{.*}}void @[[HelperName]]
   // CHECK-4:   store i32 5, i32*
   // CHECK-4:   call {{.*}}FooD1Ev
 }
diff --git a/test/CodeGenCXX/constructor-attr.cpp b/test/CodeGenCXX/constructor-attr.cpp
index 468ce36..ec27ed2 100644
--- a/test/CodeGenCXX/constructor-attr.cpp
+++ b/test/CodeGenCXX/constructor-attr.cpp
@@ -5,7 +5,7 @@
 // PR6521
 void bar();
 struct Foo {
-  // CHECK-LABEL: define linkonce_odr void @_ZN3Foo3fooEv
+  // CHECK-LABEL: define linkonce_odr {{.*}}void @_ZN3Foo3fooEv
   static void foo() __attribute__((constructor)) {
     bar();
   }
diff --git a/test/CodeGenCXX/ctor-globalopt.cpp b/test/CodeGenCXX/ctor-globalopt.cpp
index 26ec523..bcab609 100644
--- a/test/CodeGenCXX/ctor-globalopt.cpp
+++ b/test/CodeGenCXX/ctor-globalopt.cpp
@@ -11,8 +11,8 @@
 // CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] 
 // CHECK: [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_ctor_globalopt.cpp, i8* null }]
 
-// CHECK-LABEL: define internal void @_GLOBAL__sub_I_ctor_globalopt.cpp()
-// CHECK: call void @
+// CHECK-LABEL: define internal {{.*}}void @_GLOBAL__sub_I_ctor_globalopt.cpp()
+// CHECK: call {{.*}}void @
 // CHECK-NOT: call{{ }}
 
 // O1: @llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer
diff --git a/test/CodeGenCXX/debug-info-line.cpp b/test/CodeGenCXX/debug-info-line.cpp
index 0b1b43b..7f8e117 100644
--- a/test/CodeGenCXX/debug-info-line.cpp
+++ b/test/CodeGenCXX/debug-info-line.cpp
@@ -158,7 +158,7 @@ __complex double f11() {
 void f12() {
   int f12_1();
   void f12_2(int = f12_1());
-// CHECK: call {{(signext )?}}i32 {{.*}} !dbg [[DBG_F12:!.*]]
+// CHECK: call {{.*}}{{(signext )?}}i32 {{.*}} !dbg [[DBG_F12:!.*]]
 #line 1400
   f12_2();
 }
diff --git a/test/CodeGenCXX/debug-info-method-nodebug.cpp b/test/CodeGenCXX/debug-info-method-nodebug.cpp
new file mode 100644
index 0000000..474053a
--- /dev/null
+++ b/test/CodeGenCXX/debug-info-method-nodebug.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
+
+class C {
+  void present();
+  void absent() __attribute__((nodebug));
+};
+
+C c;
+
+// CHECK-NOT: name: "absent"
+// CHECK:     name: "present"
+// CHECK-NOT: name: "absent"
diff --git a/test/CodeGenCXX/debug-info-namespace.cpp b/test/CodeGenCXX/debug-info-namespace.cpp
index d59b778..35b371e 100644
--- a/test/CodeGenCXX/debug-info-namespace.cpp
+++ b/test/CodeGenCXX/debug-info-namespace.cpp
@@ -55,7 +55,7 @@ void B::func_fwd() {}
 // This should work even if 'i' and 'func' were declarations & not definitions,
 // but it doesn't yet.
 
-// CHECK: [[CU:![0-9]+]] = !DICompileUnit(
+// CHECK: [[CU:![0-9]+]] = distinct !DICompileUnit(
 // CHECK-SAME:                            imports: [[MODULES:![0-9]*]]
 // CHECK: [[FOO:![0-9]+]] = !DICompositeType(tag: DW_TAG_structure_type, name: "foo",
 // CHECK-SAME:                               line: 5
@@ -102,7 +102,7 @@ void B::func_fwd() {}
 // CHECK: [[M16]] = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: [[FUNC]], entity: [[FUNC_FWD:![0-9]+]]
 // CHECK: [[M17]] = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: [[CTXT]], entity: [[I]]
 
-// CHECK-GMLT: [[CU:![0-9]+]] = !DICompileUnit(
+// CHECK-GMLT: [[CU:![0-9]+]] = distinct !DICompileUnit(
 // CHECK-GMLT-SAME:                            emissionKind: 2,
 // CHECK-GMLT-SAME:                            imports: [[MODULES:![0-9]+]]
 // CHECK-GMLT: [[MODULES]] = !{}
diff --git a/test/CodeGenCXX/deferred-global-init.cpp b/test/CodeGenCXX/deferred-global-init.cpp
index 920037c..e4c0d07 100644
--- a/test/CodeGenCXX/deferred-global-init.cpp
+++ b/test/CodeGenCXX/deferred-global-init.cpp
@@ -7,10 +7,10 @@ void* bar() { return a; }
 
 // CHECK: @_ZL1a = internal global i8* null
 
-// CHECK-LABEL: define internal void @__cxx_global_var_init
+// CHECK-LABEL: define internal {{.*}}void @__cxx_global_var_init
 // CHECK: load i8*, i8** @foo
 // CHECK: ret void
 
-// CHECK-LABEL: define internal void @_GLOBAL__sub_I_deferred_global_init.cpp
-// CHECK: call void @__cxx_global_var_init()
+// CHECK-LABEL: define internal {{.*}}void @_GLOBAL__sub_I_deferred_global_init.cpp
+// CHECK: call {{.*}}void @__cxx_global_var_init()
 // CHECK: ret void
diff --git a/test/CodeGenCXX/derived-to-virtual-base-class-calls-final.cpp b/test/CodeGenCXX/derived-to-virtual-base-class-calls-final.cpp
index dd64e81..3a4766d 100644
--- a/test/CodeGenCXX/derived-to-virtual-base-class-calls-final.cpp
+++ b/test/CodeGenCXX/derived-to-virtual-base-class-calls-final.cpp
@@ -9,7 +9,7 @@ struct D final : virtual C {
   virtual void f();
 };
 
-// CHECK-LABEL: define dereferenceable({{[0-9]+}}) %struct.B* @_Z1fR1D
+// CHECK-LABEL: define {{.*}}dereferenceable({{[0-9]+}}) %struct.B* @_Z1fR1D
 B &f(D &d) {
   // CHECK-NOT: load i8*, i8**
   return d;
diff --git a/test/CodeGenCXX/destructor-crash.cpp b/test/CodeGenCXX/destructor-crash.cpp
new file mode 100644
index 0000000..4329198
--- /dev/null
+++ b/test/CodeGenCXX/destructor-crash.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -emit-llvm -std=c++11 -o %t
+
+struct A {
+  ~A();
+};
+
+struct B {
+  A a;
+};
+
+struct C {
+  union {
+    B b;
+  };
+
+  ~C() noexcept;
+};
+
+C::~C() noexcept {}
diff --git a/test/CodeGenCXX/dllexport.cpp b/test/CodeGenCXX/dllexport.cpp
index 0eb6476..c598880 100644
--- a/test/CodeGenCXX/dllexport.cpp
+++ b/test/CodeGenCXX/dllexport.cpp
@@ -486,7 +486,7 @@ struct S {
 
 struct CtorWithClosure {
   __declspec(dllexport) CtorWithClosure(...) {}
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_FCtorWithClosure@@QAEXXZ"
+// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_FCtorWithClosure@@QAEXXZ"({{.*}}) comdat
 // M32-DAG:   %[[this_addr:.*]] = alloca %struct.CtorWithClosure*, align 4
 // M32-DAG:   store %struct.CtorWithClosure* %this, %struct.CtorWithClosure** %[[this_addr]], align 4
 // M32-DAG:   %[[this:.*]] = load %struct.CtorWithClosure*, %struct.CtorWithClosure** %[[this_addr]]
@@ -503,7 +503,7 @@ struct CtorWithClosure {
 struct __declspec(dllexport) ClassWithClosure {
   DELETE_IMPLICIT_MEMBERS(ClassWithClosure);
   ClassWithClosure(...) {}
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_FClassWithClosure@@QAEXXZ"
+// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_FClassWithClosure@@QAEXXZ"({{.*}}) comdat
 // M32-DAG:   %[[this_addr:.*]] = alloca %struct.ClassWithClosure*, align 4
 // M32-DAG:   store %struct.ClassWithClosure* %this, %struct.ClassWithClosure** %[[this_addr]], align 4
 // M32-DAG:   %[[this:.*]] = load %struct.ClassWithClosure*, %struct.ClassWithClosure** %[[this_addr]]
@@ -520,8 +520,8 @@ struct __declspec(dllexport) NestedOuter {
   };
 };
 
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_FNestedOuter@@QAEXXZ"
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_FNestedInner@NestedOuter@@QAEXXZ"
+// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_FNestedOuter@@QAEXXZ"({{.*}}) comdat
+// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_FNestedInner@NestedOuter@@QAEXXZ"({{.*}}) comdat
 
 template <typename T>
 struct SomeTemplate {
@@ -530,7 +530,7 @@ struct SomeTemplate {
 };
 struct __declspec(dllexport) InheritFromTemplate : SomeTemplate<int> {};
 
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_F?$SomeTemplate@H@@QAEXXZ"
+// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_F?$SomeTemplate@H@@QAEXXZ"({{.*}}) comdat
 
 namespace PR23801 {
 template <typename>
@@ -546,7 +546,7 @@ struct __declspec(dllexport) B {
 };
 }
 //
-// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_FB@PR23801@@QAEXXZ"
+// M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_FB@PR23801@@QAEXXZ"({{.*}}) comdat
 
 struct __declspec(dllexport) T {
   // Copy assignment operator:
diff --git a/test/CodeGenCXX/dynamic_cast-no-rtti.cpp b/test/CodeGenCXX/dynamic_cast-no-rtti.cpp
index cde03a3..58834a5 100644
--- a/test/CodeGenCXX/dynamic_cast-no-rtti.cpp
+++ b/test/CodeGenCXX/dynamic_cast-no-rtti.cpp
@@ -13,14 +13,14 @@ struct B : public A {
 // does not use runtime support.
 A *upcast(B *b) {
   return dynamic_cast<A *>(b);
-// CHECK-LABEL: define %struct.A* @_Z6upcastP1B
-// CHECK-NOT: call i8* @__dynamic_cast
+// CHECK-LABEL: define {{.*}}%struct.A* @_Z6upcastP1B
+// CHECK-NOT: call {{.*}}i8* @__dynamic_cast
 }
 
 // A NoOp dynamic_cast can be used with -fno-rtti iff it does not use
 // runtime support.
 B *samecast(B *b) {
   return dynamic_cast<B *>(b);
-// CHECK-LABEL: define %struct.B* @_Z8samecastP1B
-// CHECK-NOT: call i8* @__dynamic_cast
+// CHECK-LABEL: define {{.*}}%struct.B* @_Z8samecastP1B
+// CHECK-NOT: call {{.*}}i8* @__dynamic_cast
 }
diff --git a/test/CodeGenCXX/extern-c.cpp b/test/CodeGenCXX/extern-c.cpp
index 7852644..e68738b 100644
--- a/test/CodeGenCXX/extern-c.cpp
+++ b/test/CodeGenCXX/extern-c.cpp
@@ -71,5 +71,5 @@ namespace PR19411 {
   struct A { void f(); };
   extern "C" void A::f() { void g(); g(); }
   // CHECK-LABEL: @_ZN7PR194111A1fEv(
-  // CHECK: call void @g()
+  // CHECK: call {{.*}}void @g()
 }
diff --git a/test/CodeGenCXX/function-template-explicit-specialization.cpp b/test/CodeGenCXX/function-template-explicit-specialization.cpp
index 8ff0655..ed6cf84c 100644
--- a/test/CodeGenCXX/function-template-explicit-specialization.cpp
+++ b/test/CodeGenCXX/function-template-explicit-specialization.cpp
@@ -3,11 +3,11 @@
 template<typename T> void a(T);
 template<> void a(int) {}
 
-// CHECK-LABEL: define void @_Z1aIiEvT_
+// CHECK-LABEL: define {{.*}}void @_Z1aIiEvT_
 
 namespace X {
 template<typename T> void b(T);
 template<> void b(int) {}
 }
 
-// CHECK-LABEL: define void @_ZN1X1bIiEEvT_
+// CHECK-LABEL: define {{.*}}void @_ZN1X1bIiEEvT_
diff --git a/test/CodeGenCXX/globalinit-loc.cpp b/test/CodeGenCXX/globalinit-loc.cpp
index 813a890..2712052 100644
--- a/test/CodeGenCXX/globalinit-loc.cpp
+++ b/test/CodeGenCXX/globalinit-loc.cpp
@@ -4,7 +4,7 @@
 // Verify that the global init helper function does not get associated
 // with any source location.
 //
-// CHECK: define internal void @_GLOBAL__sub_I_globalinit_loc.cpp
+// CHECK: define internal {{.*}}void @_GLOBAL__sub_I_globalinit_loc.cpp
 // CHECK: !dbg ![[DBG:.*]]
 // CHECK: !DISubprogram(linkageName: "_GLOBAL__sub_I_globalinit_loc.cpp"
 // CHECK-NOT:           line:
diff --git a/test/CodeGenCXX/inline-dllexport-member.cpp b/test/CodeGenCXX/inline-dllexport-member.cpp
index af9a536..4bc1d4c 100644
--- a/test/CodeGenCXX/inline-dllexport-member.cpp
+++ b/test/CodeGenCXX/inline-dllexport-member.cpp
@@ -5,7 +5,7 @@ struct __declspec(dllexport) s {
   static const unsigned int ui = 0;
 };
 
-// CHECK: ![[SCOPE:[0-9]+]] = !DICompileUnit(
+// CHECK: ![[SCOPE:[0-9]+]] = distinct !DICompileUnit(
 // CHECK: !DIGlobalVariable(name: "ui", linkageName: "_ZN1s2uiE", scope: ![[SCOPE]],
 // CHECK-SAME:              variable: i32* @_ZN1s2uiE
 
diff --git a/test/CodeGenCXX/linetable-virtual-variadic.cpp b/test/CodeGenCXX/linetable-virtual-variadic.cpp
index c16c5e3..115f1ae 100644
--- a/test/CodeGenCXX/linetable-virtual-variadic.cpp
+++ b/test/CodeGenCXX/linetable-virtual-variadic.cpp
@@ -17,7 +17,7 @@ void Derived::VariadicFunction(...) { }
 //
 // CHECK: !llvm.dbg.cu = !{![[CU:[0-9]+]]}
 //
-// CHECK: ![[CU]] = !DICompileUnit({{.*}} subprograms: ![[SPs:[0-9]+]]
+// CHECK: ![[CU]] = distinct !DICompileUnit({{.*}} subprograms: ![[SPs:[0-9]+]]
 // CHECK: ![[SPs]] = !{![[SP:[0-9]+]]}
 // CHECK: ![[SP]] = !DISubprogram(name: "VariadicFunction",{{.*}} function: {{[^:]+}} @_ZN7Derived16VariadicFunctionEz
 // CHECK: ![[LOC]] = !DILocation({{.*}}scope: ![[SP]])
diff --git a/test/CodeGenCXX/mangle-address-space.cpp b/test/CodeGenCXX/mangle-address-space.cpp
index a0b3c1a..f18480d 100644
--- a/test/CodeGenCXX/mangle-address-space.cpp
+++ b/test/CodeGenCXX/mangle-address-space.cpp
@@ -1,12 +1,12 @@
 // RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple -o - %s | FileCheck %s
 
-// CHECK-LABEL: define void @_Z2f0Pc
+// CHECK-LABEL: define {{.*}}void @_Z2f0Pc
 void f0(char *p) { }
-// CHECK-LABEL: define void @_Z2f0PU3AS1c
+// CHECK-LABEL: define {{.*}}void @_Z2f0PU3AS1c
 void f0(char __attribute__((address_space(1))) *p) { }
 
 struct OpaqueType;
 typedef OpaqueType __attribute__((address_space(100))) * OpaqueTypePtr;
 
-// CHECK-LABEL: define void @_Z2f0PU5AS10010OpaqueType
+// CHECK-LABEL: define {{.*}}void @_Z2f0PU5AS10010OpaqueType
 void f0(OpaqueTypePtr) { }
diff --git a/test/CodeGenCXX/mangle-ms-templates-memptrs-2.cpp b/test/CodeGenCXX/mangle-ms-templates-memptrs-2.cpp
index e2cbe15..b01d609 100644
--- a/test/CodeGenCXX/mangle-ms-templates-memptrs-2.cpp
+++ b/test/CodeGenCXX/mangle-ms-templates-memptrs-2.cpp
@@ -3,22 +3,37 @@
 template <typename T, int (T::*)() = nullptr>
 struct J {};
 
+template <typename T, int T::* = nullptr>
+struct K {};
+
 struct __single_inheritance M;
 J<M> m;
 // CHECK-DAG: @"\01?m@@3U?$J@UM@@$0A@@@A"
 
+K<M> m2;
+// CHECK-DAG: @"\01?m2@@3U?$K@UM@@$0?0@@A"
+
 struct __multiple_inheritance N;
 J<N> n;
 // CHECK-DAG: @"\01?n@@3U?$J@UN@@$HA@@@A"
 
+K<N> n2;
+// CHECK-DAG: @"\01?n2@@3U?$K@UN@@$0?0@@A"
+
 struct __virtual_inheritance O;
 J<O> o;
 // CHECK-DAG: @"\01?o@@3U?$J@UO@@$IA@A@@@A"
 
+K<O> o2;
+// CHECK-DAG: @"\01?o2@@3U?$K@UO@@$FA@?0@@A"
+
 struct P;
 J<P> p;
 // CHECK-DAG: @"\01?p@@3U?$J@UP@@$JA@A@?0@@A"
 
+K<P> p2;
+// CHECK-DAG: @"\01?p2@@3U?$K@UP@@$GA@A@?0@@A"
+
 #pragma pointers_to_members(full_generality, virtual_inheritance)
 
 struct S {
diff --git a/test/CodeGenCXX/mangle-ms-templates-memptrs.cpp b/test/CodeGenCXX/mangle-ms-templates-memptrs.cpp
index 803cac3..e710abe 100644
--- a/test/CodeGenCXX/mangle-ms-templates-memptrs.cpp
+++ b/test/CodeGenCXX/mangle-ms-templates-memptrs.cpp
@@ -69,8 +69,8 @@ void ReadFields() {
 // them right in class templates.
 // CHECK: call {{.*}} @"\01??$ReadField@US@@$0A@@@YAHAAUS@@@Z"
 // CHECK: call {{.*}} @"\01??$ReadField@UM@@$0A@@@YAHAAUM@@@Z"
-// CHECK: call {{.*}} @"\01??$ReadField@UV@@$FA@?0@@YAHAAUV@@@Z"
-// CHECK: call {{.*}} @"\01??$ReadField@UU@@$GA@A@?0@@YAHAAUU@@@Z"
+// CHECK: call {{.*}} @"\01??$ReadField@UV@@$0A@@@YAHAAUV@@@Z"
+// CHECK: call {{.*}} @"\01??$ReadField@UU@@$0A@@@YAHAAUU@@@Z"
 
 // Non-polymorphic null data memptr vs first field memptr.  MSVC mangles these
 // the same.
@@ -141,3 +141,15 @@ void CallMethods() {
 // CHECK: call {{.*}} @"\01??$CallMethod@UM@@$0A@@@YAXAAUM@@@Z"
 // CHECK: call {{.*}} @"\01??$CallMethod@UV@@$0A@@@YAXAAUV@@@Z"
 // CHECK: call {{.*}} @"\01??$CallMethod@UU@@$0A@@@YAXAAUU@@@Z"
+
+namespace NegativeNVOffset {
+struct A {};
+struct B : virtual A {};
+struct C : B {
+  virtual void f();
+};
+}
+
+template void CallMethod<NegativeNVOffset::C, &NegativeNVOffset::C::f>(NegativeNVOffset::C &);
+
+// CHECK-LABEL: define {{.*}} @"\01??$CallMethod@UC@NegativeNVOffset@@$I??_912@$BA@AEPPPPPPPM@A@@@YAXAAUC@NegativeNVOffset@@@Z"
diff --git a/test/CodeGenCXX/mangle-nullptr-arg.cpp b/test/CodeGenCXX/mangle-nullptr-arg.cpp
index 66ed7e5..e4ae353 100644
--- a/test/CodeGenCXX/mangle-nullptr-arg.cpp
+++ b/test/CodeGenCXX/mangle-nullptr-arg.cpp
@@ -2,15 +2,15 @@
 
 template<int *ip> struct IP {};
 
-// CHECK-LABEL: define void @_Z5test12IPILPi0EE
+// CHECK-LABEL: define {{.*}}void @_Z5test12IPILPi0EE
 void test1(IP<nullptr>) {}
 
 struct X{ };
 template<int X::*pm> struct PM {};
 
-// CHECK-LABEL: define void @_Z5test22PMILM1Xi0EE
+// CHECK-LABEL: define {{.*}}void @_Z5test22PMILM1Xi0EE
 void test2(PM<nullptr>) { }
 
-// CHECK-LABEL: define void @_Z5test316DependentTypePtrIPiLS0_0EE
+// CHECK-LABEL: define {{.*}}void @_Z5test316DependentTypePtrIPiLS0_0EE
 template<typename T, T x> struct DependentTypePtr {};
 void test3(DependentTypePtr<int*,nullptr>) { }
diff --git a/test/CodeGenCXX/mangle-template.cpp b/test/CodeGenCXX/mangle-template.cpp
index aaae4b2..7fa300a 100644
--- a/test/CodeGenCXX/mangle-template.cpp
+++ b/test/CodeGenCXX/mangle-template.cpp
@@ -98,7 +98,7 @@ namespace test8 {
   template<typename T>
   void f(int_c<meta<T>::type::value>) { }
 
-  // CHECK-LABEL: define weak_odr void @_ZN5test81fIiEEvNS_5int_cIXsr4metaIT_E4typeE5valueEEE(
+  // CHECK-LABEL: define weak_odr {{.*}}void @_ZN5test81fIiEEvNS_5int_cIXsr4metaIT_E4typeE5valueEEE(
   template void f<int>(int_c<sizeof(int)>);
 }
 
@@ -157,13 +157,13 @@ namespace test12 {
   const int n = 10;
   template<typename T, T v> void test() {}
   void use() {
-    // CHECK-LABEL: define internal void @_ZN6test124testIFivEXadL_ZNS_L1fEvEEEEvv(
+    // CHECK-LABEL: define internal {{.*}}void @_ZN6test124testIFivEXadL_ZNS_L1fEvEEEEvv(
     test<int(), &f>();
-    // CHECK-LABEL: define internal void @_ZN6test124testIRFivEL_ZNS_L1fEvEEEvv(
+    // CHECK-LABEL: define internal {{.*}}void @_ZN6test124testIRFivEL_ZNS_L1fEvEEEvv(
     test<int(&)(), f>();
-    // CHECK-LABEL: define internal void @_ZN6test124testIPKiXadL_ZNS_L1nEEEEEvv(
+    // CHECK-LABEL: define internal {{.*}}void @_ZN6test124testIPKiXadL_ZNS_L1nEEEEEvv(
     test<const int*, &n>();
-    // CHECK-LABEL: define internal void @_ZN6test124testIRKiL_ZNS_L1nEEEEvv(
+    // CHECK-LABEL: define internal {{.*}}void @_ZN6test124testIRKiL_ZNS_L1nEEEEvv(
     test<const int&, n>();
   }
 }
diff --git a/test/CodeGenCXX/microsoft-abi-array-cookies.cpp b/test/CodeGenCXX/microsoft-abi-array-cookies.cpp
index 619b1b8..62ead4f 100644
--- a/test/CodeGenCXX/microsoft-abi-array-cookies.cpp
+++ b/test/CodeGenCXX/microsoft-abi-array-cookies.cpp
@@ -58,4 +58,14 @@ void check_array_cookies_aligned() {
 // CHECK: getelementptr inbounds i8, i8* [[ARRAY_AS_CHAR]], i64 -8
 }
 
+namespace PR23990 {
+struct S {
+  char x[42];
+  void operator delete[](void *p, __SIZE_TYPE__);
+  // CHECK-LABEL: define void @"\01?delete_s@PR23990@@YAXPAUS@1@@Z"(
+  // CHECK: call void @"\01??_VS@PR23990@@SAXPAXI@Z"(i8* {{.*}}, i32 42)
+};
+void delete_s(S *s) { delete[] s; }
+}
+
 // CHECK: attributes [[NUW]] = { nounwind{{.*}} }
diff --git a/test/CodeGenCXX/microsoft-abi-member-pointers.cpp b/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
index 8cb4a1f..a509d57 100755
--- a/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
+++ b/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
@@ -541,9 +541,13 @@ void (D::*convertCToD(void (C::*mp)()))() {
 //
 //        memptr.convert:                                   ; preds = %entry
 // CHECK:   extractvalue { i8*, i32, i32 } %{{.*}}, 0
-// CHECK:   extractvalue { i8*, i32, i32 } %{{.*}}, 1
-// CHECK:   extractvalue { i8*, i32, i32 } %{{.*}}, 2
-// CHECK:   %[[adj:.*]] = add nsw i32 %{{.*}}, 4
+// CHECK:   %[[nvoff:.*]] = extractvalue { i8*, i32, i32 } %{{.*}}, 1
+// CHECK:   %[[vbidx:.*]] = extractvalue { i8*, i32, i32 } %{{.*}}, 2
+// CHECK:   %[[is_nvbase:.*]] = icmp eq i32 %[[vbidx]], 0
+// CHECK:   %[[nv_disp:.*]] = add nsw i32 %[[nvoff]], 4
+// CHECK:   %[[nv_adj:.*]] = select i1 %[[is_nvbase]], i32 %[[nv_disp]], i32 0
+// CHECK:   %[[dst_adj:.*]] = select i1 %[[is_nvbase]], i32 4, i32 0
+// CHECK:   %[[adj:.*]] = sub nsw i32 %[[nv_adj]], %[[dst_adj]]
 // CHECK:   insertvalue { i8*, i32, i32 } undef, i8* {{.*}}, 0
 // CHECK:   insertvalue { i8*, i32, i32 } {{.*}}, i32 %[[adj]], 1
 // CHECK:   insertvalue { i8*, i32, i32 } {{.*}}, i32 {{.*}}, 2
@@ -712,3 +716,16 @@ int foo(A *a, int A::*mp) {
 }
 #endif
 #endif
+
+namespace pr23878 {
+struct A { virtual void g(); };
+struct B { virtual void f(); };
+struct C : virtual B { void f(); };
+struct D : A, C {};
+
+typedef void (D::*DMemPtrTy)();
+
+// CHECK-LABEL: define void @"\01?get_memptr@pr23878@@YAP8D@1@AEXXZXZ"
+// CHECK: @"\01??_9C@pr23878@@$BA@AE" to i8*), i32 0, i32 4
+DMemPtrTy get_memptr() { return &D::f; }
+}
diff --git a/test/CodeGenCXX/microsoft-abi-thunks.cpp b/test/CodeGenCXX/microsoft-abi-thunks.cpp
index 8cbea5c..8230334 100644
--- a/test/CodeGenCXX/microsoft-abi-thunks.cpp
+++ b/test/CodeGenCXX/microsoft-abi-thunks.cpp
@@ -91,7 +91,7 @@ struct E : D {
 
 E::E() {}  // Emits vftable and forces thunk generation.
 
-// CODEGEN-LABEL: define weak_odr x86_thiscallcc %struct.C* @"\01?goo@E@@QAEPAUB@@XZ"
+// CODEGEN-LABEL: define weak_odr x86_thiscallcc %struct.C* @"\01?goo@E@@QAEPAUB@@XZ"{{.*}} comdat
 // CODEGEN:   call x86_thiscallcc %struct.C* @"\01?goo@E@@UAEPAUC@@XZ"
 // CODEGEN:   getelementptr inbounds i8, i8* {{.*}}, i32 4
 // CODEGEN: ret
@@ -124,7 +124,7 @@ struct I : D {
 
 I::I() {}  // Emits vftable and forces thunk generation.
 
-// CODEGEN-LABEL: define weak_odr x86_thiscallcc %struct.{{[BF]}}* @"\01?goo@I@@QAEPAUB@@XZ"
+// CODEGEN-LABEL: define weak_odr x86_thiscallcc %struct.{{[BF]}}* @"\01?goo@I@@QAEPAUB@@XZ"{{.*}} comdat
 // CODEGEN: %[[ORIG_RET:.*]] = call x86_thiscallcc %struct.F* @"\01?goo@I@@UAEPAUF@@XZ"
 // CODEGEN: %[[ORIG_RET_i8:.*]] = bitcast %struct.F* %[[ORIG_RET]] to i8*
 // CODEGEN: %[[VBPTR_i8:.*]] = getelementptr inbounds i8, i8* %[[ORIG_RET_i8]], i32 4
diff --git a/test/CodeGenCXX/pr11797.cpp b/test/CodeGenCXX/pr11797.cpp
index 2a31090..3767b1d 100644
--- a/test/CodeGenCXX/pr11797.cpp
+++ b/test/CodeGenCXX/pr11797.cpp
@@ -5,4 +5,4 @@ namespace std __attribute__ ((__visibility__ ("default"))) {}
 void foo() {
 }
 #pragma GCC visibility pop
-// CHECK-LABEL: define void @_Z3foov()
+// CHECK-LABEL: define {{.*}}void @_Z3foov()
diff --git a/test/CodeGenCXX/pr18661.cpp b/test/CodeGenCXX/pr18661.cpp
index 2358678..65ffd6f 100644
--- a/test/CodeGenCXX/pr18661.cpp
+++ b/test/CodeGenCXX/pr18661.cpp
@@ -11,4 +11,4 @@ extern "C" {
 // PR18661: Clang would fail to emit function definition with mismatching
 // exception specification, even though it was just treated as a warning.
 
-// CHECK: define void @f()
+// CHECK: define {{.*}}void @f()
diff --git a/test/CodeGenCXX/pr9965.cpp b/test/CodeGenCXX/pr9965.cpp
index 46fd609..95ba2be 100644
--- a/test/CodeGenCXX/pr9965.cpp
+++ b/test/CodeGenCXX/pr9965.cpp
@@ -8,7 +8,7 @@ struct X : A // default constructor is not trivial
 };
 
 X<int> x;
-// CHECK-LABEL: define internal void @__cxx_global_var_init()
+// CHECK-LABEL: define internal {{.*}}void @__cxx_global_var_init()
 // CHECK: call {{.*}} @_ZN1XIiEC1Ev
 // CHECK: define linkonce_odr {{.*}} @_ZN1XIiEC1Ev
 // CHECK: define linkonce_odr {{.*}} @_ZN1XIiEC2Ev
diff --git a/test/CodeGenCXX/pragma-weak.cpp b/test/CodeGenCXX/pragma-weak.cpp
index e2d4648..caab266 100644
--- a/test/CodeGenCXX/pragma-weak.cpp
+++ b/test/CodeGenCXX/pragma-weak.cpp
@@ -14,18 +14,18 @@ void S::foo() {}
 
 #pragma weak zed
 namespace bar {  void zed() {} }
-// CHECK-LABEL: define void @_ZN3bar3zedEv(
+// CHECK-LABEL: define {{.*}}void @_ZN3bar3zedEv(
 
 #pragma weak bah
 void bah() {}
-// CHECK-LABEL: define void @_Z3bahv(
+// CHECK-LABEL: define {{.*}}void @_Z3bahv(
 
 #pragma weak baz
 extern "C" void baz() {}
-// CHECK-LABEL: define weak void @baz(
+// CHECK-LABEL: define weak {{.*}}void @baz(
 
 #pragma weak _Z3baxv
 void bax() {}
 // GCC produces a weak symbol for this one, but it doesn't look like a good
 // idea to expose the mangling to the pragma unless we really have to.
-// CHECK-LABEL: define void @_Z3baxv(
+// CHECK-LABEL: define {{.*}}void @_Z3baxv(
diff --git a/test/CodeGenCXX/predefined-expr.cpp b/test/CodeGenCXX/predefined-expr.cpp
index 4c0a886..21ccedd 100644
--- a/test/CodeGenCXX/predefined-expr.cpp
+++ b/test/CodeGenCXX/predefined-expr.cpp
@@ -559,11 +559,11 @@ XXX::XXX()
    _dispatch_once(^{ notify_register_dispatch( ^(int token) { XXLog(__FUNCTION__); }); 
    });
 }
-// CHECK: define internal void @___ZN3XXXC2Ev_block_invoke_
+// CHECK: define internal {{.*}}void @___ZN3XXXC2Ev_block_invoke_
 
 XXX::~XXX()
 {
    _dispatch_once(^{ notify_register_dispatch( ^(int token) { XXLog(__FUNCTION__); }); 
    });
 }
-// CHECK: define internal void @___ZN3XXXD2Ev_block_invoke_
+// CHECK: define internal {{.*}}void @___ZN3XXXD2Ev_block_invoke_
diff --git a/test/CodeGenCXX/redefine_extname.cpp b/test/CodeGenCXX/redefine_extname.cpp
index 2b6b703..f76fe62 100644
--- a/test/CodeGenCXX/redefine_extname.cpp
+++ b/test/CodeGenCXX/redefine_extname.cpp
@@ -8,7 +8,7 @@ extern "C" {
   int statvfs64(struct statvfs64 *);
 }
 
-void foo() {
+void some_func() {
   struct statvfs64 st;
   statvfs64(&st);
 // Check that even if there is a structure with redefined name before the
@@ -16,3 +16,15 @@ void foo() {
 // CHECK:  call i32 @statvfs(%struct.statvfs64* %st)
 }
 
+// This is a case when redefenition is deferred *and* we have a local of the
+// same name. PR23923.
+#pragma redefine_extname foo bar
+int f() {
+  int foo = 0;
+  return foo;
+}
+extern "C" {
+  int foo() { return 1; }
+// CHECK: define i32 @bar()
+}
+
diff --git a/test/CodeGenCXX/template-dependent-bind-temporary.cpp b/test/CodeGenCXX/template-dependent-bind-temporary.cpp
index 47d8279..4c4b3ea 100644
--- a/test/CodeGenCXX/template-dependent-bind-temporary.cpp
+++ b/test/CodeGenCXX/template-dependent-bind-temporary.cpp
@@ -18,7 +18,7 @@ void IntToString(T a)
 }
 
 int main() {
-// CHECK-LABEL: define linkonce_odr void @_Z11IntToStringIcEvT_(
+// CHECK-LABEL: define linkonce_odr {{.*}}void @_Z11IntToStringIcEvT_(
   IntToString('a');
 }
 
diff --git a/test/CodeGenCXX/thunks.cpp b/test/CodeGenCXX/thunks.cpp
index 2287d65..0e97255 100644
--- a/test/CodeGenCXX/thunks.cpp
+++ b/test/CodeGenCXX/thunks.cpp
@@ -365,6 +365,7 @@ namespace Test15 {
 
 // This is from Test5:
 // CHECK-LABEL: define internal void @_ZThn8_N6Test4B12_GLOBAL__N_11C1fEv(
+// CHECK-NOT: comdat
 // CHECK-LABEL: define linkonce_odr void @_ZTv0_n24_N5Test51B1fEv
 
 // This is from Test10:
diff --git a/test/CodeGenCXX/trap-fnattr.cpp b/test/CodeGenCXX/trap-fnattr.cpp
new file mode 100644
index 0000000..b73ea43
--- /dev/null
+++ b/test/CodeGenCXX/trap-fnattr.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -O0 -emit-llvm -ftrapv -ftrap-function=mytrap %s -o - | FileCheck %s -check-prefix=TRAPFUNC
+// RUN: %clang_cc1 -O0 -emit-llvm -ftrapv %s -o - | FileCheck %s -check-prefix=NOOPTION
+
+// TRAPFUNC-LABEL: define void @{{_Z12test_builtinv|\"\\01\?test_builtin@@YAXXZ\"}}
+// TRAPFUNC: call void @llvm.trap() [[ATTR0:#[0-9]+]]
+
+// NOOPTION-LABEL: define void @{{_Z12test_builtinv|\"\\01\?test_builtin@@YAXXZ\"}}
+// NOOPTION: call void @llvm.trap(){{$}}
+
+void test_builtin(void) {
+  __builtin_trap();
+}
+
+// TRAPFUNC-LABEL: define {{.*}}i32 @{{_Z13test_noreturnv|\"\\01\?test_noreturn@@YAHXZ\"}}
+// TRAPFUNC: call void @llvm.trap() [[ATTR0]]
+
+// NOOPTION-LABEL: define {{.*}}i32 @{{_Z13test_noreturnv|\"\\01\?test_noreturn@@YAHXZ\"}}
+// NOOPTION: call void @llvm.trap(){{$}}
+
+int test_noreturn(void) {
+}
+
+// TRAPFUNC-LABEL: define {{.*}}i32 @{{_Z17test_add_overflowii|\"\\01\?test_add_overflow@@YAHHH@Z\"}}
+// TRAPFUNC: call void @llvm.trap() [[ATTR1:#[0-9]+]]
+
+// NOOPTION-LABEL: define {{.*}}i32 @{{_Z17test_add_overflowii|\"\\01\?test_add_overflow@@YAHHH@Z\"}}
+// NOOPTION: call void @llvm.trap() [[ATTR2:#[0-9]+]]
+
+int test_add_overflow(int a, int b) {
+  return a + b;
+}
+
+// TRAPFUNC: attributes [[ATTR0]] = { {{.*}}"trap-func-name"="mytrap" }
+// TRAPFUNC: attributes [[ATTR1]] = { {{.*}}"trap-func-name"="mytrap" }
+
+// NOOPTION-NOT: attributes [[ATTR2]] = { {{.*}}"trap-func-name"="mytrap" }
diff --git a/test/CodeGenCXX/typeid-should-throw.cpp b/test/CodeGenCXX/typeid-should-throw.cpp
index 1d8fc85..428c737 100644
--- a/test/CodeGenCXX/typeid-should-throw.cpp
+++ b/test/CodeGenCXX/typeid-should-throw.cpp
@@ -10,73 +10,73 @@ struct A {
 struct B : A {};
 
 void f1(A *x) { typeid(false, *x); }
-// CHECK-LABEL: define void @_Z2f1P1A
+// CHECK-LABEL: define {{.*}}void @_Z2f1P1A
 // CHECK:       icmp eq {{.*}}, null
 // CHECK-NEXT:  br i1
 
 void f2(bool b, A *x, A *y) { typeid(b ? *x : *y); }
-// CHECK-LABEL: define void @_Z2f2bP1AS0_
+// CHECK-LABEL: define {{.*}}void @_Z2f2bP1AS0_
 // CHECK:       icmp eq {{.*}}, null
 // CHECK-NEXT:  br i1
 
 void f3(bool b, A *x, A &y) { typeid(b ? *x : y); }
-// CHECK-LABEL: define void @_Z2f3bP1ARS_
+// CHECK-LABEL: define {{.*}}void @_Z2f3bP1ARS_
 // CHECK:       icmp eq {{.*}}, null
 // CHECK-NEXT:  br i1
 
 void f4(bool b, A &x, A *y) { typeid(b ? x : *y); }
-// CHECK-LABEL: define void @_Z2f4bR1APS_
+// CHECK-LABEL: define {{.*}}void @_Z2f4bR1APS_
 // CHECK:       icmp eq {{.*}}, null
 // CHECK-NEXT:  br i1
 
 void f5(volatile A *x) { typeid(*x); }
-// CHECK-LABEL: define void @_Z2f5PV1A
+// CHECK-LABEL: define {{.*}}void @_Z2f5PV1A
 // CHECK:       icmp eq {{.*}}, null
 // CHECK-NEXT:  br i1
 
 void f6(A *x) { typeid((B &)*(B *)x); }
-// CHECK-LABEL: define void @_Z2f6P1A
+// CHECK-LABEL: define {{.*}}void @_Z2f6P1A
 // CHECK:       icmp eq {{.*}}, null
 // CHECK-NEXT:  br i1
 
 void f7(A *x) { typeid((*x)); }
-// CHECK-LABEL: define void @_Z2f7P1A
+// CHECK-LABEL: define {{.*}}void @_Z2f7P1A
 // CHECK:       icmp eq {{.*}}, null
 // CHECK-NEXT:  br i1
 
 void f8(A *x) { typeid(x[0]); }
-// CHECK-LABEL: define void @_Z2f8P1A
+// CHECK-LABEL: define {{.*}}void @_Z2f8P1A
 // CHECK:       icmp eq {{.*}}, null
 // CHECK-NEXT:  br i1
 
 void f9(A *x) { typeid(0[x]); }
-// CHECK-LABEL: define void @_Z2f9P1A
+// CHECK-LABEL: define {{.*}}void @_Z2f9P1A
 // CHECK:       icmp eq {{.*}}, null
 // CHECK-NEXT:  br i1
 
 void f10(A *x, A *y) { typeid(*y ?: *x); }
-// CHECK-LABEL: define void @_Z3f10P1AS0_
+// CHECK-LABEL: define {{.*}}void @_Z3f10P1AS0_
 // CHECK:       icmp eq {{.*}}, null
 // CHECK-NEXT:  br i1
 
 void f11(A *x, A &y) { typeid(*x ?: y); }
-// CHECK-LABEL: define void @_Z3f11P1ARS_
+// CHECK-LABEL: define {{.*}}void @_Z3f11P1ARS_
 // CHECK:       icmp eq {{.*}}, null
 // CHECK-NEXT:  br i1
 
 void f12(A &x, A *y) { typeid(x ?: *y); }
-// CHECK-LABEL: define void @_Z3f12R1APS_
+// CHECK-LABEL: define {{.*}}void @_Z3f12R1APS_
 // CHECK:       icmp eq {{.*}}, null
 // CHECK-NEXT:  br i1
 
 void f13(A &x, A &y) { typeid(x ?: y); }
-// CHECK-LABEL: define void @_Z3f13R1AS0_
+// CHECK-LABEL: define {{.*}}void @_Z3f13R1AS0_
 // CHECK-NOT:   icmp eq {{.*}}, null
 
 void f14(A *x) { typeid((const A &)(A)*x); }
-// CHECK-LABEL: define void @_Z3f14P1A
+// CHECK-LABEL: define {{.*}}void @_Z3f14P1A
 // CHECK-NOT:   icmp eq {{.*}}, null
 
 void f15(A *x) { typeid((A &&)*(A *)nullptr); }
-// CHECK-LABEL: define void @_Z3f15P1A
+// CHECK-LABEL: define {{.*}}void @_Z3f15P1A
 // CHECK-NOT:   icmp eq {{.*}}, null
diff --git a/test/CodeGenCXX/vararg-non-pod.cpp b/test/CodeGenCXX/vararg-non-pod.cpp
index 613b28c..36891a4 100644
--- a/test/CodeGenCXX/vararg-non-pod.cpp
+++ b/test/CodeGenCXX/vararg-non-pod.cpp
@@ -8,7 +8,7 @@ struct X {
 
 void vararg(...);
 
-// CHECK-LABEL: define void @_Z4test1X
+// CHECK-LABEL: define {{.*}}void @_Z4test1X
 void test(X x) {
   // CHECK: call void @llvm.trap()
   vararg(x);
diff --git a/test/CodeGenCXX/virtual-destructor-synthesis.cpp b/test/CodeGenCXX/virtual-destructor-synthesis.cpp
index 80d1b1e..5927235 100644
--- a/test/CodeGenCXX/virtual-destructor-synthesis.cpp
+++ b/test/CodeGenCXX/virtual-destructor-synthesis.cpp
@@ -12,5 +12,5 @@ pile_box::pile_box(box *pp)
 {
 }
 
-// CHECK: call void @_ZdlPv
+// CHECK: call {{.*}}void @_ZdlPv
 
diff --git a/test/CodeGenCXX/vla-lambda-capturing.cpp b/test/CodeGenCXX/vla-lambda-capturing.cpp
index f2332bf..44b6a25 100644
--- a/test/CodeGenCXX/vla-lambda-capturing.cpp
+++ b/test/CodeGenCXX/vla-lambda-capturing.cpp
@@ -12,7 +12,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 // CHECK-DAG:   [[CAP_TYPE3:%.+]] = type { [[INTPTR_T]]*, [[INTPTR_T]], [[INTPTR_T]], [[INTPTR_T]]*, [[INTPTR_T]]* }
 // CHECK-DAG:   [[CAP_TYPE4:%.+]] = type { [[INTPTR_T]]*, [[INTPTR_T]], [[INTPTR_T]]*, [[INTPTR_T]], [[INTPTR_T]]* }
 
-// CHECK:       define void [[G:@.+]](
+// CHECK:       define {{.*}}void [[G:@.+]](
 // CHECK:       [[N_ADDR:%.+]] = alloca [[INTPTR_T]]
 // CHECK:       store [[INTPTR_T]] %{{.+}}, [[INTPTR_T]]* [[N_ADDR]]
 // CHECK:       [[N_VAL:%.+]] = load [[INTPTR_T]], [[INTPTR_T]]* [[N_ADDR]]
@@ -22,7 +22,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 // CHECK:       store [[INTPTR_T]]* %{{.+}}, [[INTPTR_T]]** [[CAP_BUFFER_ADDR]]
 // CHECK:       [[CAP_N_REF:%.+]] = getelementptr inbounds [[CAP_TYPE1]], [[CAP_TYPE1]]* [[CAP_ARG:%.+]], i{{.+}} 0, i{{.+}} 2
 // CHECK:       store [[INTPTR_T]]* [[N_ADDR]], [[INTPTR_T]]** [[CAP_N_REF]]
-// CHECK:       call{{( x86_thiscallcc)?}} void [[G_LAMBDA:@.+]]([[CAP_TYPE1]]* [[CAP_ARG]])
+// CHECK:       call{{.*}} void [[G_LAMBDA:@.+]]([[CAP_TYPE1]]* [[CAP_ARG]])
 // CHECK:       ret void
 void g(intptr_t n) {
   intptr_t buffer[n];
@@ -70,17 +70,17 @@ void b(intptr_t n, T arg) {
 
 // CHECK-LABEL: @main
 int main() {
-  // CHECK:       call void [[G]]([[INTPTR_T]] [[INTPTR_T_ATTR:(signext )?]]1)
+  // CHECK:       call {{.*}}void [[G]]([[INTPTR_T]] [[INTPTR_T_ATTR:(signext )?]]1)
   g((intptr_t)1);
-  // CHECK:       call void [[F_INT:@.+]]([[INTPTR_T]] [[INTPTR_T_ATTR]]1, [[INTPTR_T]] [[INTPTR_T_ATTR]]2)
+  // CHECK:       call {{.*}}void [[F_INT:@.+]]([[INTPTR_T]] [[INTPTR_T_ATTR]]1, [[INTPTR_T]] [[INTPTR_T_ATTR]]2)
   f((intptr_t)1, (intptr_t)2);
-  // CHECK:       call void [[B_INT:@.+]]([[INTPTR_T]] [[INTPTR_T_ATTR]]12, [[INTPTR_T]] [[INTPTR_T_ATTR]]13)
+  // CHECK:       call {{.*}}void [[B_INT:@.+]]([[INTPTR_T]] [[INTPTR_T_ATTR]]12, [[INTPTR_T]] [[INTPTR_T_ATTR]]13)
   b((intptr_t)12, (intptr_t)13);
   // CHECK:       ret i32 0
   return 0;
 }
 
-// CHECK: define linkonce_odr void [[F_INT]]([[INTPTR_T]]
+// CHECK: define linkonce_odr {{.*}}void [[F_INT]]([[INTPTR_T]]
 // CHECK: [[SIZE:%.+]] = add
 // CHECK: call i{{.+}}* @llvm.stacksave()
 // CHECK: [[BUFFER_ADDR:%.+]] = alloca [[INTPTR_T]], [[INTPTR_T]] [[SIZE]]
@@ -88,11 +88,11 @@ int main() {
 // CHECK: store [[INTPTR_T]] [[SIZE]], [[INTPTR_T]]* [[CAP_SIZE_REF]]
 // CHECK: [[CAP_BUFFER_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TYPE2]], [[CAP_TYPE2]]* [[CAP_ARG]], i{{.+}} 0, i{{.+}} 1
 // CHECK: store [[INTPTR_T]]* [[BUFFER_ADDR]], [[INTPTR_T]]** [[CAP_BUFFER_ADDR_REF]]
-// CHECK: call{{( x86_thiscallcc)?}} void [[F_INT_LAMBDA:@.+]]([[CAP_TYPE2]]* [[CAP_ARG]])
+// CHECK: call{{.*}} void [[F_INT_LAMBDA:@.+]]([[CAP_TYPE2]]* [[CAP_ARG]])
 // CHECK: call void @llvm.stackrestore(
 // CHECK: ret void
 // CHECK: void [[B_INT]]([[INTPTR_T]]
-// CHECK: [[SIZE1:%.+]] = call [[INTPTR_T]]
+// CHECK: [[SIZE1:%.+]] = call {{.*}}[[INTPTR_T]]
 // CHECK: call i{{.+}}* @llvm.stacksave()
 // CHECK: [[BUFFER2_ADDR:%.+]] = alloca [[INTPTR_T]], [[INTPTR_T]] [[SIZE1]]
 // CHECK: [[SIZE2:%.+]] = add
@@ -107,11 +107,11 @@ int main() {
 // CHECK: store [[INTPTR_T]]* [[BUFFER1_ADDR]], [[INTPTR_T]]** [[CAP_BUFFER1_ADDR_REF]]
 // CHECK: [[CAP_BUFFER2_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TYPE3]], [[CAP_TYPE3]]* [[CAP_ARG]], i{{.+}} 0, i{{.+}} 4
 // CHECK: store [[INTPTR_T]]* [[BUFFER2_ADDR]], [[INTPTR_T]]** [[CAP_BUFFER2_ADDR_REF]]
-// CHECK: call{{( x86_thiscallcc)?}} void [[B_INT_LAMBDA:@.+]]([[CAP_TYPE3]]* [[CAP_ARG]])
+// CHECK: call{{.*}} void [[B_INT_LAMBDA:@.+]]([[CAP_TYPE3]]* [[CAP_ARG]])
 // CHECK: call void @llvm.stackrestore(
 // CHECK: ret void
 
-// CHECK: define linkonce_odr{{( x86_thiscallcc)?}} void [[F_INT_LAMBDA]]([[CAP_TYPE2]]*
+// CHECK: define linkonce_odr{{.*}} void [[F_INT_LAMBDA]]([[CAP_TYPE2]]*
 // CHECK: [[THIS:%.+]] = load [[CAP_TYPE2]]*, [[CAP_TYPE2]]**
 // CHECK: [[SIZE_REF:%.+]] = getelementptr inbounds [[CAP_TYPE2]], [[CAP_TYPE2]]* [[THIS]], i{{.+}} 0, i{{.+}} 0
 // CHECK: [[SIZE:%.+]] = load [[INTPTR_T]], [[INTPTR_T]]* [[SIZE_REF]]
@@ -120,7 +120,7 @@ int main() {
 // CHECK: call void @llvm.stackrestore(
 // CHECK: ret void
 
-// CHECK: define linkonce_odr{{( x86_thiscallcc)?}} void [[B_INT_LAMBDA]]([[CAP_TYPE3]]*
+// CHECK: define linkonce_odr{{.*}} void [[B_INT_LAMBDA]]([[CAP_TYPE3]]*
 // CHECK: [[SIZE2_REF:%.+]] = getelementptr inbounds [[CAP_TYPE3]], [[CAP_TYPE3]]* [[THIS:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
 // CHECK: [[SIZE2:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIZE2_REF]]
 // CHECK: [[SIZE1_REF:%.+]] = getelementptr inbounds [[CAP_TYPE3]], [[CAP_TYPE3]]* [[THIS]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
@@ -152,10 +152,10 @@ int main() {
 // CHECK: [[BUFFER1_ADDR_REF_ORIG:%.+]] = getelementptr inbounds [[CAP_TYPE3]], [[CAP_TYPE3]]* [[THIS]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
 // CHECK: [[BUFFER1_ADDR_ORIG:%.+]] = load [[INTPTR_T]]*, [[INTPTR_T]]** [[BUFFER1_ADDR_REF_ORIG]]
 // CHECK: store [[INTPTR_T]]* [[BUFFER1_ADDR_ORIG]], [[INTPTR_T]]** [[BUFFER1_ADDR_REF]]
-// CHECK: call{{( x86_thiscallcc)?}} void [[B_INT_LAMBDA_LAMBDA:@.+]]([[CAP_TYPE4]]* [[CAP]])
+// CHECK: call{{.*}} void [[B_INT_LAMBDA_LAMBDA:@.+]]([[CAP_TYPE4]]* [[CAP]])
 // CHECK: ret void
 
-// CHECK: define linkonce_odr{{( x86_thiscallcc)?}} void [[B_INT_LAMBDA_LAMBDA]]([[CAP_TYPE4]]*
+// CHECK: define linkonce_odr{{.*}} void [[B_INT_LAMBDA_LAMBDA]]([[CAP_TYPE4]]*
 // CHECK: [[SIZE1_REF:%.+]] = getelementptr inbounds [[CAP_TYPE4]], [[CAP_TYPE4]]* [[THIS:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
 // CHECK: [[SIZE1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIZE1_REF]]
 // CHECK: [[SIZE2_REF:%.+]] = getelementptr inbounds [[CAP_TYPE4]], [[CAP_TYPE4]]* [[THIS]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
diff --git a/test/CodeGenCXX/volatile-1.cpp b/test/CodeGenCXX/volatile-1.cpp
index 65eb9f6..f32e428 100644
--- a/test/CodeGenCXX/volatile-1.cpp
+++ b/test/CodeGenCXX/volatile-1.cpp
@@ -17,7 +17,7 @@ volatile struct S {
 int printf(const char *, ...);
 
 
-// CHECK: define void @{{.*}}test
+// CHECK: define {{.*}}void @{{.*}}test
 void test() {
 
   asm("nop"); // CHECK: call void asm
diff --git a/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp b/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
index bb9bd88..3392b32 100644
--- a/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
+++ b/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-nacl -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple=x86_64-unknown-linux-gnux32 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-nacl -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple=x86_64-unknown-linux-gnux32 -emit-llvm -o - %s | FileCheck %s
 
 struct test_struct {};
 typedef int test_struct::* test_struct_mdp;
@@ -42,3 +42,16 @@ struct struct_with_mfp_too_much {
 void f_struct_with_mfp_too_much(struct_with_mfp_too_much a, int x) {
   (void)a;
 }
+
+/* Struct containing an empty struct */
+typedef struct { int* a; test_struct x; double *b; } struct_with_empty;
+
+// CHECK-LABEL: define void @{{.*}}f_pass_struct_with_empty{{.*}}(i64 %x{{.*}}, double* %x
+void f_pass_struct_with_empty(struct_with_empty x) {
+  (void) x;
+}
+
+// CHECK-LABEL: define { i64, double* } @{{.*}}f_return_struct_with_empty
+struct_with_empty f_return_struct_with_empty() {
+  return {0, {}, 0};
+}
diff --git a/test/CodeGenObjC/arc-block-copy-escape.m b/test/CodeGenObjC/arc-block-copy-escape.m
index afe7c0b..16a75a0 100644
--- a/test/CodeGenObjC/arc-block-copy-escape.m
+++ b/test/CodeGenObjC/arc-block-copy-escape.m
@@ -8,15 +8,15 @@ void use_int(int);
 
 void test0(int i) {
   block_t block = ^{ use_int(i); };
-  // CHECK-LABEL:   define void @test0(
-  // CHECK:     call i8* @objc_retainBlock(i8* {{%.*}}) [[NUW:#[0-9]+]], !clang.arc.copy_on_escape
+  // CHECK-LABEL:   define {{.*}}void @test0(
+  // CHECK:     call {{.*}}i8* @objc_retainBlock(i8* {{%.*}}) [[NUW:#[0-9]+]], !clang.arc.copy_on_escape
   // CHECK:     ret void
 }
 
 void test1(int i) {
   id block = ^{ use_int(i); };
-  // CHECK-LABEL:   define void @test1(
-  // CHECK:     call i8* @objc_retainBlock(i8* {{%.*}}) [[NUW]]
+  // CHECK-LABEL:   define {{.*}}void @test1(
+  // CHECK:     call {{.*}}i8* @objc_retainBlock(i8* {{%.*}}) [[NUW]]
   // CHECK-NOT: !clang.arc.copy_on_escape
   // CHECK:     ret void
 }
diff --git a/test/CodeGenObjC/objc2-legacy-dispatch.m b/test/CodeGenObjC/objc2-legacy-dispatch.m
index aa944f8..5584f49 100644
--- a/test/CodeGenObjC/objc2-legacy-dispatch.m
+++ b/test/CodeGenObjC/objc2-legacy-dispatch.m
@@ -7,9 +7,9 @@
 //
 // RUN: %clang_cc1 -fobjc-dispatch-method=legacy -emit-llvm -o - %s | FileCheck -check-prefix=CHECK_OLD_DISPATCH %s
 //
-// CHECK_OLD_DISPATCH-LABEL: define void @f0
+// CHECK_OLD_DISPATCH-LABEL: define {{.*}}void @f0
 // CHECK_OLD_DISPATCH: load {{.*}}OBJC_SELECTOR_REFERENCES
-// CHECK_OLD_DISPATCH-LABEL: define void @f1
+// CHECK_OLD_DISPATCH-LABEL: define {{.*}}void @f1
 // CHECK_OLD_DISPATCH: load {{.*}}OBJC_SELECTOR_REFERENCES
 
 @interface A
diff --git a/test/CodeGenObjC/related-result-type.m b/test/CodeGenObjC/related-result-type.m
index cd78474..cacf2c0 100644
--- a/test/CodeGenObjC/related-result-type.m
+++ b/test/CodeGenObjC/related-result-type.m
@@ -9,7 +9,7 @@
 @interface NSString : NSObject
 @end
 
-// CHECK-LABEL: define void @test1()
+// CHECK-LABEL: define {{.*}}void @test1()
 void test1() {
   // CHECK: {{call.*@objc_msgSend}}
   // CHECK: {{call.*@objc_msgSend}}
@@ -18,7 +18,7 @@ void test1() {
   NSString *str1 = [[[NSString alloc] init] retain];
 }
 
-// CHECK-LABEL: define void @test2()
+// CHECK-LABEL: define {{.*}}void @test2()
 void test2() {
   // CHECK: {{call.*@objc_msgSend}}
   // CHECK: {{call.*@objc_msgSend}}
@@ -32,7 +32,7 @@ void test2() {
 @end
 
 @implementation Test2
-// CHECK: define internal i8* @"\01-[Test2 init]"
+// CHECK: define internal {{.*}}i8* @"\01-[Test2 init]"
 - (id)init {
   // CHECK: {{call.*@objc_msgSendSuper}}
   // CHECK-NEXT: bitcast i8*
@@ -45,7 +45,7 @@ void test2() {
 @end
 
 @implementation Test3
-// CHECK: define internal i8* @"\01-[Test3 init]"
+// CHECK: define internal {{.*}}i8* @"\01-[Test3 init]"
 - (id)init {
   // CHECK: {{call.*@objc_msgSendSuper}}
   // CHECK-NEXT: bitcast i8*
diff --git a/test/CodeGenObjCXX/arc-mangle.mm b/test/CodeGenObjCXX/arc-mangle.mm
index b921a7f..a168d41 100644
--- a/test/CodeGenObjCXX/arc-mangle.mm
+++ b/test/CodeGenObjCXX/arc-mangle.mm
@@ -1,25 +1,25 @@
 // RUN: %clang_cc1 -fobjc-arc -fobjc-runtime-has-weak -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s
 
-// CHECK-LABEL: define void @_Z1fPU8__strongP11objc_object(i8**)
+// CHECK-LABEL: define {{.*}}void @_Z1fPU8__strongP11objc_object(i8**)
 void f(__strong id *) {}
-// CHECK-LABEL: define void @_Z1fPU6__weakP11objc_object(i8**)
+// CHECK-LABEL: define {{.*}}void @_Z1fPU6__weakP11objc_object(i8**)
 void f(__weak id *) {}
-// CHECK-LABEL: define void @_Z1fPU15__autoreleasingP11objc_object(i8**)
+// CHECK-LABEL: define {{.*}}void @_Z1fPU15__autoreleasingP11objc_object(i8**)
 void f(__autoreleasing id *) {}
-// CHECK-LABEL: define void @_Z1fPP11objc_object(i8**)
+// CHECK-LABEL: define {{.*}}void @_Z1fPP11objc_object(i8**)
 void f(__unsafe_unretained id *) {}
-// CHECK-LABEL: define void @_Z1fPKU8__strongP11objc_object(i8**)
+// CHECK-LABEL: define {{.*}}void @_Z1fPKU8__strongP11objc_object(i8**)
 void f(const __strong id *) {}
-// CHECK-LABEL: define void @_Z1fPKU6__weakP11objc_object(i8**)
+// CHECK-LABEL: define {{.*}}void @_Z1fPKU6__weakP11objc_object(i8**)
 void f(const __weak id *) {}
-// CHECK-LABEL: define void @_Z1fPKU15__autoreleasingP11objc_object(i8**)
+// CHECK-LABEL: define {{.*}}void @_Z1fPKU15__autoreleasingP11objc_object(i8**)
 void f(const __autoreleasing id *) {}
-// CHECK-LABEL: define void @_Z1fPKP11objc_object(i8**)
+// CHECK-LABEL: define {{.*}}void @_Z1fPKP11objc_object(i8**)
 void f(const __unsafe_unretained id *) {}
 
 
 template<unsigned N> struct unsigned_c { };
 
-// CHECK-LABEL: define weak_odr void @_Z1gIKvEvP10unsigned_cIXplszv1U8__bridgecvPT_v1U8__bridgecvP11objc_objectcvS3_Li0ELi1EEE
+// CHECK-LABEL: define weak_odr {{.*}}void @_Z1gIKvEvP10unsigned_cIXplszv1U8__bridgecvPT_v1U8__bridgecvP11objc_objectcvS3_Li0ELi1EEE
 template<typename T>void g(unsigned_c<sizeof((__bridge T*)(__bridge id)(T*)0) + 1>*) {}
 template void g<const void>(unsigned_c<sizeof(id) + 1> *);
diff --git a/test/CodeGenObjCXX/debug-info-line.mm b/test/CodeGenObjCXX/debug-info-line.mm
index a7d6a15..6d7321c 100644
--- a/test/CodeGenObjCXX/debug-info-line.mm
+++ b/test/CodeGenObjCXX/debug-info-line.mm
@@ -15,7 +15,7 @@ void f1() {
   }();
 }
 
-// CHECK-LABEL: define internal i8* @"\01-[TNSObject init]"
+// CHECK-LABEL: define internal {{.*}}i8* @"\01-[TNSObject init]"
 @implementation TNSObject
 - (id)init
 {
diff --git a/test/CodeGenOpenCL/event_t.cl b/test/CodeGenOpenCL/event_t.cl
index ddf12a9..a84d8bb 100644
--- a/test/CodeGenOpenCL/event_t.cl
+++ b/test/CodeGenOpenCL/event_t.cl
@@ -6,7 +6,7 @@ void kernel ker() {
   event_t e;
 // CHECK: alloca %opencl.event_t*,
   foo(e);
-// CHECK: call void @foo(%opencl.event_t* %
+// CHECK: call {{.*}}void @foo(%opencl.event_t* %
   foo(0);
-// CHECK: call void @foo(%opencl.event_t* null)
+// CHECK: call {{.*}}void @foo(%opencl.event_t* null)
 }
diff --git a/test/CodeGenOpenCL/local.cl b/test/CodeGenOpenCL/local.cl
index f1031cd..da371f8 100644
--- a/test/CodeGenOpenCL/local.cl
+++ b/test/CodeGenOpenCL/local.cl
@@ -8,7 +8,7 @@ __kernel void foo(void) {
   func(&i);
 }
 
-// CHECK-LABEL: define void @_Z3barPU7CLlocali
+// CHECK-LABEL: define {{.*}}void @_Z3barPU7CLlocali
 __kernel void __attribute__((__overloadable__)) bar(local int *x) {
   *x = 5;
 }
diff --git a/test/CodeGenOpenCL/opencl_types.cl b/test/CodeGenOpenCL/opencl_types.cl
index ac4ed1c..5f4ebb8 100644
--- a/test/CodeGenOpenCL/opencl_types.cl
+++ b/test/CodeGenOpenCL/opencl_types.cl
@@ -22,7 +22,7 @@ void fnc3(image3d_t img) {}
 // CHECK: @fnc3(%opencl.image3d_t*
 
 void fnc4smp(sampler_t s) {}
-// CHECK-LABEL: define void @fnc4smp(i32
+// CHECK-LABEL: define {{.*}}void @fnc4smp(i32
 
 kernel void foo(image1d_t img) {
 	sampler_t smp = 5;
@@ -31,9 +31,9 @@ kernel void foo(image1d_t img) {
 // CHECK: alloca %opencl.event_t*
 // CHECK: store i32 5,
   fnc4smp(smp);
-// CHECK: call void @fnc4smp(i32
+// CHECK: call {{.*}}void @fnc4smp(i32
   fnc4smp(glb_smp);
-// CHECK: call void @fnc4smp(i32
+// CHECK: call {{.*}}void @fnc4smp(i32
 }
 
 void __attribute__((overloadable)) bad1(image1d_t *b, image2d_t *c, image2d_t *d) {}
diff --git a/test/CoverageMapping/implicit-def-in-macro.m b/test/CoverageMapping/implicit-def-in-macro.m
new file mode 100644
index 0000000..7e563ac
--- /dev/null
+++ b/test/CoverageMapping/implicit-def-in-macro.m
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -fprofile-instr-generate -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only -triple x86_64-apple-darwin -fobjc-runtime=macosx-10.10.0 -fblocks -fobjc-arc %s | FileCheck %s
+
+@interface Foo
+@end
+#define Bar Foo
+
+@implementation Blah
+// CHECK-LABEL: +[Blah load]:
++ load { // CHECK: File 0, [[@LINE]]:8 -> [[END:[0-9:]+]] = #0
+  return 0;
+  // CHECK: Expansion,File 0, [[@LINE+1]]:3 -> [[@LINE+1]]:6 = 0
+  Bar *attribute; // CHECK: File 0, [[@LINE]]:6 -> [[END]] = 0
+}
+@end
+
+int main() {}
diff --git a/test/CoverageMapping/ir.c b/test/CoverageMapping/ir.c
index 4b1238c..5ac3495 100644
--- a/test/CoverageMapping/ir.c
+++ b/test/CoverageMapping/ir.c
@@ -9,4 +9,4 @@ int main(void) {
   return 0;
 }
 
-// CHECK: @__llvm_coverage_mapping = internal constant { i32, i32, i32, i32, [2 x { i8*, i32, i32, i64 }], [{{[0-9]+}} x i8] } { i32 2, i32 {{[0-9]+}}, i32 {{[0-9]+}}, i32 0, [2 x { i8*, i32, i32, i64 }] [{ i8*, i32, i32, i64 } { i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i32 3, i32 9, i64 {{[0-9]+}} }, { i8*, i32, i32, i64 } { i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__llvm_profile_name_main, i32 0, i32 0), i32 4, i32 9, i64 {{[0-9]+}} }]
+// CHECK: @__llvm_coverage_mapping = internal constant { i32, i32, i32, i32, [2 x <{ i8*, i32, i32, i64 }>], [{{[0-9]+}} x i8] } { i32 2, i32 {{[0-9]+}}, i32 {{[0-9]+}}, i32 0, [2 x <{ i8*, i32, i32, i64 }>] [<{ i8*, i32, i32, i64 }> <{ i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i32 3, i32 9, i64 {{[0-9]+}} }>, <{ i8*, i32, i32, i64 }> <{ i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__llvm_profile_name_main, i32 0, i32 0), i32 4, i32 9, i64 {{[0-9]+}} }>]
diff --git a/test/Driver/Inputs/debian_multiarch_tree/lib/powerpc64le-linux-gnu/.keep b/test/Driver/Inputs/debian_multiarch_tree/lib/powerpc64le-linux-gnu/.keep
new file mode 100644
index 0000000..e69de29
diff --git a/test/Driver/Inputs/debian_multiarch_tree/usr/lib/gcc/powerpc64le-linux-gnu/4.5/crtbegin.o b/test/Driver/Inputs/debian_multiarch_tree/usr/lib/gcc/powerpc64le-linux-gnu/4.5/crtbegin.o
new file mode 100644
index 0000000..e69de29
diff --git a/test/Driver/Inputs/debian_multiarch_tree/usr/lib/powerpc64le-linux-gnu/.keep b/test/Driver/Inputs/debian_multiarch_tree/usr/lib/powerpc64le-linux-gnu/.keep
new file mode 100644
index 0000000..e69de29
diff --git a/test/Driver/arm-mfpu.c b/test/Driver/arm-mfpu.c
index 8439cdd..93fb0a8 100644
--- a/test/Driver/arm-mfpu.c
+++ b/test/Driver/arm-mfpu.c
@@ -34,6 +34,17 @@
 // CHECK-VFP3: "-target-feature" "-fp-armv8"
 // CHECK-VFP3: "-target-feature" "-neon"
 
+// RUN: %clang -target arm-linux-eabi -mfpu=vfpv3-fp16 %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-VFP3-FP16 %s
+// CHECK-VFP3-FP16: "-target-feature" "-fp-only-sp"
+// CHECK-VFP3-FP16: "-target-feature" "-d16"
+// CHECK-VFP3-FP16: "-target-feature" "+vfp3"
+// CHECK-VFP3-FP16: "-target-feature" "+fp16"
+// CHECK-VFP3-FP16: "-target-feature" "-vfp4"
+// CHECK-VFP3-FP16: "-target-feature" "-fp-armv8"
+// CHECK-VFP3-FP16: "-target-feature" "-neon"
+// CHECK-VFP3-FP16: "-target-feature" "-crypto"
+
 // RUN: %clang -target arm-linux-eabi -mfpu=vfp3-d16 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-VFP3-D16 %s
 // RUN: %clang -target arm-linux-eabi -mfpu=vfpv3-d16 %s -### -o %t.o 2>&1 \
@@ -45,6 +56,39 @@
 // CHECK-VFP3-D16: "-target-feature" "-fp-armv8"
 // CHECK-VFP3-D16: "-target-feature" "-neon"
 
+// RUN: %clang -target arm-linux-eabi -mfpu=vfpv3-d16-fp16 %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-VFP3-D16-FP16 %s
+// CHECK-VFP3-D16-FP16: "-target-feature" "-fp-only-sp"
+// CHECK-VFP3-D16-FP16: "-target-feature" "+d16"
+// CHECK-VFP3-D16-FP16: "-target-feature" "+vfp3"
+// CHECK-VFP3-D16-FP16: "-target-feature" "+fp16"
+// CHECK-VFP3-D16-FP16: "-target-feature" "-vfp4"
+// CHECK-VFP3-D16-FP16: "-target-feature" "-fp-armv8"
+// CHECK-VFP3-D16-FP16: "-target-feature" "-neon"
+// CHECK-VFP3-D16-FP16: "-target-feature" "-crypto"
+
+// RUN: %clang -target arm-linux-eabi -mfpu=vfpv3xd %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-VFP3XD %s
+// CHECK-VFP3XD: "-target-feature" "+fp-only-sp"
+// CHECK-VFP3XD: "-target-feature" "+d16"
+// CHECK-VFP3XD: "-target-feature" "+vfp3"
+// CHECK-VFP3XD: "-target-feature" "-fp16"
+// CHECK-VFP3XD: "-target-feature" "-vfp4"
+// CHECK-VFP3XD: "-target-feature" "-fp-armv8"
+// CHECK-VFP3XD: "-target-feature" "-neon"
+// CHECK-VFP3XD: "-target-feature" "-crypto"
+
+// RUN: %clang -target arm-linux-eabi -mfpu=vfpv3xd-fp16 %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-VFP3XD-FP16 %s
+// CHECK-VFP3XD-FP16: "-target-feature" "+fp-only-sp"
+// CHECK-VFP3XD-FP16: "-target-feature" "+d16"
+// CHECK-VFP3XD-FP16: "-target-feature" "+vfp3"
+// CHECK-VFP3XD-FP16: "-target-feature" "+fp16"
+// CHECK-VFP3XD-FP16: "-target-feature" "-vfp4"
+// CHECK-VFP3XD-FP16: "-target-feature" "-fp-armv8"
+// CHECK-VFP3XD-FP16: "-target-feature" "-neon"
+// CHECK-VFP3XD-FP16: "-target-feature" "-crypto"
+
 // RUN: %clang -target arm-linux-eabi -mfpu=vfp4 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-VFP4 %s
 // RUN: %clang -target arm-linux-eabi -mfpu=vfpv4 %s -### -o %t.o 2>&1 \
@@ -97,6 +141,17 @@
 // RUN:   | FileCheck --check-prefix=CHECK-NEON %s
 // CHECK-NEON: "-target-feature" "+neon"
 
+// RUN: %clang -target arm-linux-eabi -mfpu=neon-fp16 %s -### -o %t.o 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NEON-FP16 %s
+// CHECK-NEON-FP16: "-target-feature" "-fp-only-sp"
+// CHECK-NEON-FP16: "-target-feature" "-d16"
+// CHECK-NEON-FP16: "-target-feature" "+vfp3"
+// CHECK-NEON-FP16: "-target-feature" "+fp16"
+// CHECK-NEON-FP16: "-target-feature" "-vfp4"
+// CHECK-NEON-FP16: "-target-feature" "-fp-armv8"
+// CHECK-NEON-FP16: "-target-feature" "+neon"
+// CHECK-NEON-FP16: "-target-feature" "-crypto"
+
 // RUN: %clang -target arm-linux-eabi -mfpu=neon-vfpv3 %s -### -o %t.o 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NEON-VFPV3 %s
 // CHECK-NEON-VFPV3: "-target-feature" "+neon"
diff --git a/test/Driver/bindings.c b/test/Driver/bindings.c
index dd8ad14..880667b 100644
--- a/test/Driver/bindings.c
+++ b/test/Driver/bindings.c
@@ -1,8 +1,8 @@
 // Basic binding.
 // RUN: %clang -target i386-unknown-unknown -ccc-print-bindings -no-integrated-as %s 2>&1 | FileCheck %s --check-prefix=CHECK01
 // CHECK01: "clang", inputs: ["{{.*}}bindings.c"], output: "{{.*}}.s"
-// CHECK01: "GNU::Assemble", inputs: ["{{.*}}.s"], output: "{{.*}}.o"
-// CHECK01: "gcc::Link", inputs: ["{{.*}}.o"], output: "a.out"
+// CHECK01: "GNU::Assembler", inputs: ["{{.*}}.s"], output: "{{.*}}.o"
+// CHECK01: "gcc::Linker", inputs: ["{{.*}}.o"], output: "a.out"
 
 // Clang control options
 
@@ -21,5 +21,5 @@
 // Darwin bindings
 // RUN: %clang -target i386-apple-darwin9 -no-integrated-as -ccc-print-bindings %s 2>&1 | FileCheck %s --check-prefix=CHECK14
 // CHECK14: "clang", inputs: ["{{.*}}bindings.c"], output: "{{.*}}.s"
-// CHECK14: "darwin::Assemble", inputs: ["{{.*}}.s"], output: "{{.*}}.o"
-// CHECK14: "darwin::Link", inputs: ["{{.*}}.o"], output: "a.out"
+// CHECK14: "darwin::Assembler", inputs: ["{{.*}}.s"], output: "{{.*}}.o"
+// CHECK14: "darwin::Linker", inputs: ["{{.*}}.o"], output: "a.out"
diff --git a/test/Driver/cl-link.c b/test/Driver/cl-link.c
index 5bd2001..8572b77 100644
--- a/test/Driver/cl-link.c
+++ b/test/Driver/cl-link.c
@@ -11,7 +11,7 @@
 // LINK: "bar"
 // LINK: "baz"
 
-// RUN: %clang_cl /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN %s
+// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN %s
 // ASAN: link.exe
 // ASAN: "-debug"
 // ASAN: "-incremental:no"
@@ -19,7 +19,7 @@
 // ASAN: "{{.*}}clang_rt.asan_cxx-i386.lib"
 // ASAN: "{{.*}}cl-link{{.*}}.obj"
 
-// RUN: %clang_cl /MD /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s
+// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s
 // ASAN-MD: link.exe
 // ASAN-MD: "-debug"
 // ASAN-MD: "-incremental:no"
@@ -33,8 +33,8 @@
 // DLL: link.exe
 // "-dll"
 
-// RUN: %clang_cl /LD /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s
-// RUN: %clang_cl /LDd /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s
+// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LD /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s
+// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LDd /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s
 // ASAN-DLL: link.exe
 // ASAN-DLL: "-dll"
 // ASAN-DLL: "-debug"
diff --git a/test/Driver/cl-x86-flags.c b/test/Driver/cl-x86-flags.c
index 5aae4c4..d4f7fb5 100644
--- a/test/Driver/cl-x86-flags.c
+++ b/test/Driver/cl-x86-flags.c
@@ -8,10 +8,10 @@
 // RUN:     --target=i386-pc-win32 -### -- 2>&1 %s | FileCheck -check-prefix=MFLAGS %s
 // MFLAGS-NOT: argument unused during compilation
 
-// -arch:IA32 is no-op.
 // RUN: %clang_cl -m32 -arch:IA32 --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=IA32 %s
-// IA32-NOT: argument unused during compilation
+// IA32: "-target-cpu" "i386"
 // IA32-NOT: -target-feature
+// IA32-NOT: argument unused during compilation
 
 // RUN: %clang_cl -m32 -arch:ia32 --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=ia32 %s
 // ia32: argument unused during compilation
@@ -22,6 +22,7 @@
 // IA3264-NOT: -target-feature
 
 // RUN: %clang_cl -m32 -arch:SSE --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=SSE %s
+// SSE: "-target-cpu" "pentium3"
 // SSE: -target-feature
 // SSE: +sse
 // SSE-NOT: argument unused during compilation
@@ -31,6 +32,7 @@
 // sse-NOT: -target-feature
 
 // RUN: %clang_cl -m32 -arch:SSE2 --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=SSE2 %s
+// SSE2: "-target-cpu" "pentium4"
 // SSE2: -target-feature
 // SSE2: +sse2
 // SSE2-NOT: argument unused during compilation
@@ -42,12 +44,14 @@
 // RUN: %clang_cl -m64 -arch:SSE --target=x86_64 -### -- 2>&1 %s | FileCheck -check-prefix=SSE64 %s
 // SSE64: argument unused during compilation
 // SSE64-NOT: -target-feature
+// SSE64-NOT: pentium3
 
 // RUN: %clang_cl -m64 -arch:SSE2 --target=x86_64 -### -- 2>&1 %s | FileCheck -check-prefix=SSE264 %s
 // SSE264: argument unused during compilation
 // SSE264-NOT: -target-feature
 
 // RUN: %clang_cl -m32 -arch:AVX --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=AVX %s
+// AVX: "-target-cpu" "sandybridge"
 // AVX: -target-feature
 // AVX: +avx
 
@@ -56,6 +60,7 @@
 // avx-NOT: -target-feature
 
 // RUN: %clang_cl -m32 -arch:AVX2 --target=i386 -### -- 2>&1 %s | FileCheck -check-prefix=AVX2 %s
+// AVX2: "-target-cpu" "haswell"
 // AVX2: -target-feature
 // AVX2: +avx2
 
@@ -64,6 +69,7 @@
 // avx2-NOT: -target-feature
 
 // RUN: %clang_cl -m64 -arch:AVX --target=x86_64 -### -- 2>&1 %s | FileCheck -check-prefix=AVX64 %s
+// AVX64: "-target-cpu" "sandybridge"
 // AVX64: -target-feature
 // AVX64: +avx
 
@@ -72,6 +78,7 @@
 // avx64-NOT: -target-feature
 
 // RUN: %clang_cl -m64 -arch:AVX2 --target=x86_64 -### -- 2>&1 %s | FileCheck -check-prefix=AVX264 %s
+// AVX264: "-target-cpu" "haswell"
 // AVX264: -target-feature
 // AVX264: +avx2
 
diff --git a/test/Driver/darwin-debug-flags.c b/test/Driver/darwin-debug-flags.c
index 17b0bba..5080a59e 100644
--- a/test/Driver/darwin-debug-flags.c
+++ b/test/Driver/darwin-debug-flags.c
@@ -5,7 +5,7 @@
 // <rdar://problem/12955296>
 // RUN: %clang -### -target i386-apple-darwin9 -c -g %t.s 2>&1 | FileCheck -check-prefix=P %s
 
-// CHECK: !0 = !DICompileUnit(
+// CHECK: !0 = distinct !DICompileUnit(
 // CHECK-SAME:                flags:
 // CHECK-SAME:                -I path\5C with\5C \5C\5Cspaces
 // CHECK-SAME:                -g -Os
diff --git a/test/Driver/darwin-dsymutil.c b/test/Driver/darwin-dsymutil.c
index 0e2c490..09451a8 100644
--- a/test/Driver/darwin-dsymutil.c
+++ b/test/Driver/darwin-dsymutil.c
@@ -28,7 +28,7 @@
 // RUN:   -o foo %s -g 2> %t
 // RUN: FileCheck -check-prefix=CHECK-OUTPUT-NAME < %t %s
 //
-// CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Link", inputs: [{{.*}}], output: "foo"
+// CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Linker", inputs: [{{.*}}], output: "foo"
 // CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Dsymutil", inputs: ["foo"], output: "foo.dSYM"
 
 // Check that we only use dsymutil when needed.
diff --git a/test/Driver/darwin-sdkroot.c b/test/Driver/darwin-sdkroot.c
index 58bc683..3d2a413 100644
--- a/test/Driver/darwin-sdkroot.c
+++ b/test/Driver/darwin-sdkroot.c
@@ -40,3 +40,39 @@
 //   env SDKROOT=/ cmd //c echo %SDKROOT%
 //
 // This test passes using env.exe from GnuWin32.
+
+// Check if clang set the correct deployment target from -sysroot
+//
+// RUN: rm -rf %t/SDKs/iPhoneOS8.0.0.sdk
+// RUN: mkdir -p %t/SDKs/iPhoneOS8.0.0.sdk
+// RUN: env SDKROOT=%t/SDKs/iPhoneOS8.0.0.sdk %clang -target arm64-apple-darwin %s -### 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-IPHONE %s
+//
+// CHECK-IPHONE: clang
+// CHECK-IPHONE: "-cc1"
+// CHECK-IPHONE: "-triple" "arm64-apple-ios8.0.0"
+// CHECK-IPHONE: ld
+// CHECK-IPHONE: "-iphoneos_version_min" "8.0.0"
+//
+//
+// RUN: rm -rf %t/SDKs/iPhoneSimulator8.0.sdk
+// RUN: mkdir -p %t/SDKs/iPhoneSimulator8.0.sdk
+// RUN: env SDKROOT=%t/SDKs/iPhoneSimulator8.0.sdk %clang -target x86_64-apple-darwin %s -### 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SIMULATOR %s
+//
+// CHECK-SIMULATOR: clang
+// CHECK-SIMULATOR: "-cc1"
+// CHECK-SIMULATOR: "-triple" "x86_64-apple-ios8.0.0"
+// CHECK-SIMULATOR: ld
+// CHECK-SIMULATOR: "-ios_simulator_version_min" "8.0.0"
+//
+// RUN: rm -rf %t/SDKs/MacOSX10.10.0.sdk
+// RUN: mkdir -p %t/SDKs/MacOSX10.10.0.sdk
+// RUN: env SDKROOT=%t/SDKs/MacOSX10.10.0.sdk %clang -target x86_64-apple-darwin %s -### 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-MACOSX %s
+//
+// CHECK-MACOSX: clang
+// CHECK-MACOSX: "-cc1"
+// CHECK-MACOSX: "-triple" "x86_64-apple-macosx10.10.0"
+// CHECK-MACOSX: ld
+// CHECK-MACOSX: "-macosx_version_min" "10.10.0"
diff --git a/test/Driver/darwin-verify-debug.c b/test/Driver/darwin-verify-debug.c
index ebbf89a..6b91290 100644
--- a/test/Driver/darwin-verify-debug.c
+++ b/test/Driver/darwin-verify-debug.c
@@ -21,7 +21,7 @@
 // RUN:   --verify-debug-info -o foo %s -g 2> %t
 // RUN: FileCheck -check-prefix=CHECK-OUTPUT-NAME < %t %s
 //
-// CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Link", inputs: [{{.*}}], output: "foo"
+// CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Linker", inputs: [{{.*}}], output: "foo"
 // CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Dsymutil", inputs: ["foo"], output: "foo.dSYM"
 // CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::VerifyDebug", inputs: ["foo.dSYM"], output: (nothing)
 
diff --git a/test/Driver/fsanitize.c b/test/Driver/fsanitize.c
index bac12dc..96d0781 100644
--- a/test/Driver/fsanitize.c
+++ b/test/Driver/fsanitize.c
@@ -11,11 +11,20 @@
 // CHECK-UNDEFINED: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|function|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|vptr|object-size|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){19}"}}
 
 // RUN: %clang -target x86_64-apple-darwin10 -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-DARWIN
-// CHECK-UNDEFINED-DARWIN: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|vptr|object-size|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){18}"}}
+// CHECK-UNDEFINED-DARWIN: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|object-size|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){17}"}}
 
 // RUN: %clang -target i386-unknown-openbsd -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-OPENBSD
 // CHECK-UNDEFINED-OPENBSD: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|object-size|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){17}"}}
 
+// RUN: %clang -target i386-pc-win32 -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-WIN --check-prefix=CHECK-UNDEFINED-WIN32
+// RUN: %clang -target i386-pc-win32 -fsanitize=undefined -x c++ %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-WIN --check-prefix=CHECK-UNDEFINED-WIN32 --check-prefix=CHECK-UNDEFINED-WIN-CXX
+// RUN: %clang -target x86_64-pc-win32 -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-WIN --check-prefix=CHECK-UNDEFINED-WIN64
+// RUN: %clang -target x86_64-pc-win32 -fsanitize=undefined -x c++ %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-WIN --check-prefix=CHECK-UNDEFINED-WIN64 --check-prefix=CHECK-UNDEFINED-WIN-CXX
+// CHECK-UNDEFINED-WIN: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|object-size|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){17}"}}
+// CHECK-UNDEFINED-WIN32-SAME: "--dependent-lib={{[^"]*}}ubsan_standalone-i386.lib"
+// CHECK-UNDEFINED-WIN64-SAME: "--dependent-lib={{[^"]*}}ubsan_standalone-x86_64.lib"
+// CHECK-UNDEFINED-WIN-CXX-SAME: "--dependent-lib={{[^"]*}}ubsan_standalone_cxx{{[^"]*}}.lib"
+
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=integer %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-INTEGER
 // CHECK-INTEGER: "-fsanitize={{((signed-integer-overflow|unsigned-integer-overflow|integer-divide-by-zero|shift-base|shift-exponent),?){5}"}}
 
@@ -205,6 +214,12 @@
 // RUN: %clang -target x86_64-apple-darwin10 -fsanitize=function -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FSAN-UBSAN-DARWIN
 // CHECK-FSAN-UBSAN-DARWIN: unsupported option '-fsanitize=function' for target 'x86_64-apple-darwin10'
 
+// RUN: %clang -target x86_64-apple-darwin10 -mmacosx-version-min=10.8 -fsanitize=vptr %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-VPTR-DARWIN-OLD
+// CHECK-VPTR-DARWIN-OLD: unsupported option '-fsanitize=vptr' for target 'x86_64-apple-darwin10'
+
+// RUN: %clang -target x86_64-apple-darwin10 -mmacosx-version-min=10.9 -fsanitize=alignment,vptr %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-VPTR-DARWIN-NEW
+// CHECK-VPTR-DARWIN-NEW: -fsanitize=alignment,vptr
+
 // RUN: %clang -target armv7-apple-ios7 -miphoneos-version-min=7.0 -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-IOS
 // CHECK-ASAN-IOS: unsupported option '-fsanitize=address' for target 'arm-apple-ios7'
 
@@ -228,6 +243,9 @@
 // RUN: %clang -target x86_64-linux-gnu -fsanitize-trap=address -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-TRAP
 // CHECK-ASAN-TRAP: error: unsupported argument 'address' to option '-fsanitize-trap'
 
+// RUN: %clang -target x86_64-apple-darwin10 -mmacosx-version-min=10.7 -flto -fsanitize=cfi-vcall -fno-sanitize-trap=cfi -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-NOTRAP-OLD-MACOS
+// CHECK-CFI-NOTRAP-OLD-MACOS: error: unsupported option '-fno-sanitize-trap=cfi-vcall' for target 'x86_64-apple-darwin10'
+
 // RUN: %clang_cl -fsanitize=address -c -MDd -### -- %s 2>&1 | FileCheck %s -check-prefix=CHECK-ASAN-DEBUGRTL
 // RUN: %clang_cl -fsanitize=address -c -MTd -### -- %s 2>&1 | FileCheck %s -check-prefix=CHECK-ASAN-DEBUGRTL
 // RUN: %clang_cl -fsanitize=address -c -LDd -### -- %s 2>&1 | FileCheck %s -check-prefix=CHECK-ASAN-DEBUGRTL
@@ -257,3 +275,7 @@
 // SP: "-fsanitize=safe-stack"
 // SP-ASAN-NOT: stack-protector
 // SP-ASAN: "-fsanitize=address,safe-stack"
+
+// RUN: %clang -target powerpc64-unknown-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-SANM
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-SANM
+// CHECK-SANM: "-fsanitize=memory"
diff --git a/test/Driver/integrated-as.s b/test/Driver/integrated-as.s
index 9a7d2c5..181f790 100644
--- a/test/Driver/integrated-as.s
+++ b/test/Driver/integrated-as.s
@@ -43,3 +43,6 @@
 
 // RUN: %clang -### -c -integrated-as %s -Wa,-gdwarf-2 2>&1 | FileCheck --check-prefix=DWARF2WA %s
 // DWARF2WA: "-gdwarf-2"
+
+// RUN: %clang -### -x assembler -c -integrated-as %s -I myincludedir 2>&1 | FileCheck --check-prefix=INCLUDEPATH %s
+// INCLUDEPATH: "-I" "myincludedir"
diff --git a/test/Driver/ios-version-min.c b/test/Driver/ios-version-min.c
new file mode 100644
index 0000000..aa536cf7
--- /dev/null
+++ b/test/Driver/ios-version-min.c
@@ -0,0 +1,7 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: arm-registered-target
+// RUN: %clang -target i386-apple-darwin10 -miphonesimulator-version-min=7.0 -arch i386 -S -o - %s | FileCheck %s
+// RUN: %clang -target armv7s-apple-darwin10 -miphoneos-version-min=7.0 -arch armv7s -S -o - %s | FileCheck %s
+
+int main() { return 0; }
+// CHECK: .ios_version_min 7, 0
diff --git a/test/Driver/linux-ld.c b/test/Driver/linux-ld.c
index 5c4778b..5e865a9 100644
--- a/test/Driver/linux-ld.c
+++ b/test/Driver/linux-ld.c
@@ -817,6 +817,19 @@
 // CHECK-DEBIAN-PPC: "-L[[SYSROOT]]/lib"
 // CHECK-DEBIAN-PPC: "-L[[SYSROOT]]/usr/lib"
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     --target=powerpc64le-linux-gnu \
+// RUN:     --gcc-toolchain="" \
+// RUN:     --sysroot=%S/Inputs/debian_multiarch_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-DEBIAN-PPC64LE %s
+// CHECK-DEBIAN-PPC64LE: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
+// CHECK-DEBIAN-PPC64LE: "{{.*}}/usr/lib/gcc/powerpc64le-linux-gnu/4.5{{/|\\\\}}crtbegin.o"
+// CHECK-DEBIAN-PPC64LE: "-L[[SYSROOT]]/usr/lib/gcc/powerpc64le-linux-gnu/4.5"
+// CHECK-DEBIAN-PPC64LE: "-L[[SYSROOT]]/usr/lib/gcc/powerpc64le-linux-gnu/4.5/../../../powerpc64le-linux-gnu"
+// CHECK-DEBIAN-PPC64LE: "-L[[SYSROOT]]/usr/lib/powerpc64le-linux-gnu"
+// CHECK-DEBIAN-PPC64LE: "-L[[SYSROOT]]/usr/lib/gcc/powerpc64le-linux-gnu/4.5/../../.."
+// CHECK-DEBIAN-PPC64LE: "-L[[SYSROOT]]/lib"
+// CHECK-DEBIAN-PPC64LE: "-L[[SYSROOT]]/usr/lib"
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
 // RUN:     --target=powerpc64-linux-gnu \
 // RUN:     --gcc-toolchain="" \
 // RUN:     --sysroot=%S/Inputs/debian_multiarch_tree \
diff --git a/test/Driver/lit.local.cfg b/test/Driver/lit.local.cfg
index af6d021..6c2373b 100644
--- a/test/Driver/lit.local.cfg
+++ b/test/Driver/lit.local.cfg
@@ -4,3 +4,15 @@ config.substitutions = list(config.substitutions)
 config.substitutions.insert(0,
     ('%clang_cc1',
      """*** Do not use 'clang -cc1' in Driver tests. ***""") )
+
+# Remove harmful environmental variables for clang Driver tests.
+# Some might be useful for other tests so they are only removed here.
+driver_overwrite_env_vars = ['MACOSX_DEPLOYMENT_TARGET',
+                             'IPHONEOS_DEPLOYMENT_TARGET',
+                             'SDKROOT', 'CCC_OVERRIDE_OPTIONS',
+                             'CC_PRINT_OPTIONS', 'CC_PRINT_HEADERS',
+                             'CC_LOG_DIAGNOSTICS']
+
+for name in driver_overwrite_env_vars:
+  if name in config.environment:
+    del config.environment[name]
diff --git a/test/Driver/msan.c b/test/Driver/msan.c
new file mode 100644
index 0000000..22f7471
--- /dev/null
+++ b/test/Driver/msan.c
@@ -0,0 +1,12 @@
+// RUN: %clang     -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang -O1 -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang -O2 -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang -O3 -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang -target mips64-linux-gnu -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang -target mips64el-unknown-linux-gnu -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang -target powerpc64-unknown-linux-gnu -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s
+// Verify that -fsanitize=memory invokes msan instrumentation.
+
+int foo(int *a) { return *a; }
+// CHECK: __msan_init
diff --git a/test/Driver/pic.c b/test/Driver/pic.c
index a515f81..120e66a 100644
--- a/test/Driver/pic.c
+++ b/test/Driver/pic.c
@@ -242,3 +242,9 @@
 // RUN:   | FileCheck %s --check-prefix=CHECK-PIC1
 // RUN: %clang -c %s -target arm64-linux-android -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-PIC1
+//
+// On Windows-X64 PIC is enabled by default
+// RUN: %clang -c %s -target x86_64-pc-windows-msvc18.0.0 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
+// RUN: %clang -c %s -target x86_64-pc-windows-gnu -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
diff --git a/test/FixIt/fixit-nullability-declspec.cpp b/test/FixIt/fixit-nullability-declspec.cpp
index 2ac20b9..17989c3 100644
--- a/test/FixIt/fixit-nullability-declspec.cpp
+++ b/test/FixIt/fixit-nullability-declspec.cpp
@@ -4,6 +4,6 @@
 // RUN: not %clang_cc1 -fixit -fblocks -Werror=nullability-declspec -x c++ %t
 // RUN: %clang_cc1 -fblocks -Werror=nullability-declspec -x c++ %t
 
-__nullable int *ip1; // expected-error{{nullability specifier '__nullable' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the pointer?}}
-__nullable int (*fp1)(int); // expected-error{{nullability specifier '__nullable' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the function pointer?}}
-__nonnull int (^bp1)(int); // expected-error{{nullability specifier '__nonnull' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the block pointer?}}
+_Nullable int *ip1; // expected-error{{nullability specifier '_Nullable' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the pointer?}}
+_Nullable int (*fp1)(int); // expected-error{{nullability specifier '_Nullable' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the function pointer?}}
+_Nonnull int (^bp1)(int); // expected-error{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the block pointer?}}
diff --git a/test/Index/annotate-literals.m b/test/Index/annotate-literals.m
index 20bfd2c..a1e4fb2 100644
--- a/test/Index/annotate-literals.m
+++ b/test/Index/annotate-literals.m
@@ -29,44 +29,61 @@ typedef unsigned char BOOL;
 + (id)dictionaryWithObjects:(const id [])objects forKeys:(const id [])keys count:(unsigned long)cnt;
 @end
 
-void test_literals(id k1, id o1, id k2, id o2, id k3) {
+@interface NSValue
++ (NSValue *)valueWithBytes:(const void *)value objCType:(const char *)type;
+@end
+
+typedef struct __attribute__((objc_boxable)) _c_struct {
+  int dummy;
+} c_struct;
+
+void test_literals(id k1, id o1, id k2, id o2, id k3, c_struct s) {
   id objects = @[ o1, o2 ];
   id dict = @{ k1 : o1,
                k2 : o2,
                k3 : @17 };
+  id val = @(s);
 }
 
 
-// RUN: c-index-test -test-annotate-tokens=%s:33:1:37:1 %s | FileCheck -check-prefix=CHECK-LITERALS %s
+// RUN: c-index-test -test-annotate-tokens=%s:41:1:46:1 %s | FileCheck -check-prefix=CHECK-LITERALS %s
 
-// CHECK-LITERALS: Identifier: "id" [33:3 - 33:5] TypeRef=id:0:0
-// CHECK-LITERALS: Identifier: "objects" [33:6 - 33:13] VarDecl=objects:33:6 (Definition)
-// CHECK-LITERALS: Punctuation: "=" [33:14 - 33:15] VarDecl=objects:33:6 (Definition)
-// CHECK-LITERALS: Punctuation: "@" [33:16 - 33:17] UnexposedExpr=
-// CHECK-LITERALS: Punctuation: "[" [33:17 - 33:18] UnexposedExpr=
-// CHECK-LITERALS: Identifier: "o1" [33:19 - 33:21] DeclRefExpr=o1:32:30
-// CHECK-LITERALS: Punctuation: "," [33:21 - 33:22] UnexposedExpr=
-// CHECK-LITERALS: Identifier: "o2" [33:23 - 33:25] DeclRefExpr=o2:32:44
-// CHECK-LITERALS: Punctuation: "]" [33:26 - 33:27] UnexposedExpr=
-// CHECK-LITERALS: Punctuation: ";" [33:27 - 33:28] DeclStmt=
-// CHECK-LITERALS: Identifier: "id" [34:3 - 34:5] TypeRef=id:0:0
-// CHECK-LITERALS: Identifier: "dict" [34:6 - 34:10] VarDecl=dict:34:6 (Definition)
-// CHECK-LITERALS: Punctuation: "=" [34:11 - 34:12] VarDecl=dict:34:6 (Definition)
-// CHECK-LITERALS: Punctuation: "@" [34:13 - 34:14] UnexposedExpr=
-// CHECK-LITERALS: Punctuation: "{" [34:14 - 34:15] UnexposedExpr=
-// CHECK-LITERALS: Identifier: "k1" [34:16 - 34:18] DeclRefExpr=k1:32:23
-// CHECK-LITERALS: Punctuation: ":" [34:19 - 34:20] UnexposedExpr=
-// CHECK-LITERALS: Identifier: "o1" [34:21 - 34:23] DeclRefExpr=o1:32:30
-// CHECK-LITERALS: Punctuation: "," [34:23 - 34:24] UnexposedExpr=
-// CHECK-LITERALS: Identifier: "k2" [35:16 - 35:18] DeclRefExpr=k2:32:37
-// CHECK-LITERALS: Punctuation: ":" [35:19 - 35:20] UnexposedExpr=
-// CHECK-LITERALS: Identifier: "o2" [35:21 - 35:23] DeclRefExpr=o2:32:44
-// CHECK-LITERALS: Punctuation: "," [35:23 - 35:24] UnexposedExpr=
-// CHECK-LITERALS: Identifier: "k3" [36:16 - 36:18] DeclRefExpr=k3:32:51
-// CHECK-LITERALS: Punctuation: ":" [36:19 - 36:20] UnexposedExpr=
-// CHECK-LITERALS: Punctuation: "@" [36:21 - 36:22] UnexposedExpr=
-// CHECK-LITERALS: Literal: "17" [36:22 - 36:24] IntegerLiteral=
-// CHECK-LITERALS: Punctuation: "}" [36:25 - 36:26] UnexposedExpr=
-// CHECK-LITERALS: Punctuation: ";" [36:26 - 36:27] DeclStmt=
-// CHECK-LITERALS: Punctuation: "}" [37:1 - 37:2] CompoundStmt=
+// CHECK-LITERALS: Identifier: "id" [41:3 - 41:5] TypeRef=id:0:0
+// CHECK-LITERALS: Identifier: "objects" [41:6 - 41:13] VarDecl=objects:41:6 (Definition)
+// CHECK-LITERALS: Punctuation: "=" [41:14 - 41:15] VarDecl=objects:41:6 (Definition)
+// CHECK-LITERALS: Punctuation: "@" [41:16 - 41:17] UnexposedExpr=
+// CHECK-LITERALS: Punctuation: "[" [41:17 - 41:18] UnexposedExpr=
+// CHECK-LITERALS: Identifier: "o1" [41:19 - 41:21] DeclRefExpr=o1:40:30
+// CHECK-LITERALS: Punctuation: "," [41:21 - 41:22] UnexposedExpr=
+// CHECK-LITERALS: Identifier: "o2" [41:23 - 41:25] DeclRefExpr=o2:40:44
+// CHECK-LITERALS: Punctuation: "]" [41:26 - 41:27] UnexposedExpr=
+// CHECK-LITERALS: Punctuation: ";" [41:27 - 41:28] DeclStmt=
+// CHECK-LITERALS: Identifier: "id" [42:3 - 42:5] TypeRef=id:0:0
+// CHECK-LITERALS: Identifier: "dict" [42:6 - 42:10] VarDecl=dict:42:6 (Definition)
+// CHECK-LITERALS: Punctuation: "=" [42:11 - 42:12] VarDecl=dict:42:6 (Definition)
+// CHECK-LITERALS: Punctuation: "@" [42:13 - 42:14] UnexposedExpr=
+// CHECK-LITERALS: Punctuation: "{" [42:14 - 42:15] UnexposedExpr=
+// CHECK-LITERALS: Identifier: "k1" [42:16 - 42:18] DeclRefExpr=k1:40:23
+// CHECK-LITERALS: Punctuation: ":" [42:19 - 42:20] UnexposedExpr=
+// CHECK-LITERALS: Identifier: "o1" [42:21 - 42:23] DeclRefExpr=o1:40:30
+// CHECK-LITERALS: Punctuation: "," [42:23 - 42:24] UnexposedExpr=
+// CHECK-LITERALS: Identifier: "k2" [43:16 - 43:18] DeclRefExpr=k2:40:37
+// CHECK-LITERALS: Punctuation: ":" [43:19 - 43:20] UnexposedExpr=
+// CHECK-LITERALS: Identifier: "o2" [43:21 - 43:23] DeclRefExpr=o2:40:44
+// CHECK-LITERALS: Punctuation: "," [43:23 - 43:24] UnexposedExpr=
+// CHECK-LITERALS: Identifier: "k3" [44:16 - 44:18] DeclRefExpr=k3:40:51
+// CHECK-LITERALS: Punctuation: ":" [44:19 - 44:20] UnexposedExpr=
+// CHECK-LITERALS: Punctuation: "@" [44:21 - 44:22] UnexposedExpr=
+// CHECK-LITERALS: Literal: "17" [44:22 - 44:24] IntegerLiteral=
+// CHECK-LITERALS: Punctuation: "}" [44:25 - 44:26] UnexposedExpr=
+// CHECK-LITERALS: Punctuation: ";" [44:26 - 44:27] DeclStmt=
+// CHECK-LITERALS: Identifier: "id" [45:3 - 45:5] TypeRef=id:0:0
+// CHECK-LITERALS: Identifier: "val" [45:6 - 45:9] VarDecl=val:45:6 (Definition)
+// CHECK-LITERALS: Punctuation: "=" [45:10 - 45:11] VarDecl=val:45:6 (Definition)
+// CHECK-LITERALS: Punctuation: "@" [45:12 - 45:13] UnexposedExpr=
+// CHECK-LITERALS: Punctuation: "(" [45:13 - 45:14] ParenExpr=
+// CHECK-LITERALS: Identifier: "s" [45:14 - 45:15] DeclRefExpr=s:40:64
+// CHECK-LITERALS: Punctuation: ")" [45:15 - 45:16] ParenExpr=
+// CHECK-LITERALS: Punctuation: ";" [45:16 - 45:17] DeclStmt=
+// CHECK-LITERALS: Punctuation: "}" [46:1 - 46:2] CompoundStmt=
 
diff --git a/test/Index/complete-objc-message.m b/test/Index/complete-objc-message.m
index a62b491..193f1f8 100644
--- a/test/Index/complete-objc-message.m
+++ b/test/Index/complete-objc-message.m
@@ -345,4 +345,4 @@ void test_Nullability(Nullability *n, A* a) {
 // CHECK-DISTRIB-OBJECTS: ObjCInstanceMethodDecl:{ResultType void}{TypedText method:}{Placeholder (in bycopy A *)}{HorizontalSpace  }{TypedText result:}{Placeholder (out byref A **)} (35)
 
 // RUN: c-index-test -code-completion-at=%s:197:6 %s | FileCheck -check-prefix=CHECK-NULLABLE %s
-// CHECK-NULLABLE: ObjCInstanceMethodDecl:{ResultType A * __nonnull}{TypedText method:}{Placeholder (nullable A *)}
+// CHECK-NULLABLE: ObjCInstanceMethodDecl:{ResultType A * _Nonnull}{TypedText method:}{Placeholder (nullable A *)}
diff --git a/test/Index/complete-stmt.c b/test/Index/complete-stmt.c
index 8bbfe2d..0deb4d3 100644
--- a/test/Index/complete-stmt.c
+++ b/test/Index/complete-stmt.c
@@ -16,8 +16,8 @@ void f(int x) {
 // CHECK-IF-ELSE-SIMPLE: NotImplemented:{TypedText else}{HorizontalSpace  }{Text if}{HorizontalSpace  }{LeftParen (}{Placeholder expression}{RightParen )} (40)
 
 // RUN: c-index-test -code-completion-at=%s:6:1 %s | FileCheck -check-prefix=CHECK-STMT %s
-// CHECK-STMT: NotImplemented:{TypedText __nonnull} (50)
-// CHECK-STMT: NotImplemented:{TypedText __nullable} (50)
+// CHECK-STMT: NotImplemented:{TypedText _Nonnull} (50)
+// CHECK-STMT: NotImplemented:{TypedText _Nullable} (50)
 // CHECK-STMT: NotImplemented:{TypedText char} (50)
 // CHECK-STMT: NotImplemented:{TypedText const} (50)
 // CHECK-STMT: NotImplemented:{TypedText double} (50)
diff --git a/test/Modules/Inputs/DebugModule.h b/test/Modules/Inputs/DebugModule.h
new file mode 100644
index 0000000..5612b73
--- /dev/null
+++ b/test/Modules/Inputs/DebugModule.h
@@ -0,0 +1 @@
+@class F;
diff --git a/test/Modules/Inputs/ImportNameInDir.h b/test/Modules/Inputs/ImportNameInDir.h
new file mode 100644
index 0000000..ae7b1a0
--- /dev/null
+++ b/test/Modules/Inputs/ImportNameInDir.h
@@ -0,0 +1,4 @@
+#import <NameInDir/NameInDir.h>
+
+// Don't crash.
+#undef NAME_IN_DIR
diff --git a/test/Modules/Inputs/NameInDir.framework/Headers/NameInDir.h b/test/Modules/Inputs/NameInDir.framework/Headers/NameInDir.h
new file mode 100644
index 0000000..bea2391
--- /dev/null
+++ b/test/Modules/Inputs/NameInDir.framework/Headers/NameInDir.h
@@ -0,0 +1,2 @@
+// NameInDir.h
+#define NAME_IN_DIR 1
diff --git a/test/Modules/Inputs/NameInDir.framework/Modules/module.modulemap b/test/Modules/Inputs/NameInDir.framework/Modules/module.modulemap
new file mode 100644
index 0000000..48e1c56
--- /dev/null
+++ b/test/Modules/Inputs/NameInDir.framework/Modules/module.modulemap
@@ -0,0 +1,5 @@
+framework module NameInModMap {
+  umbrella header "NameInDir.h"
+  export *
+  module * { export * }
+}
diff --git a/test/Modules/Inputs/NameInDir2.framework/Headers/NameInDir2.h b/test/Modules/Inputs/NameInDir2.framework/Headers/NameInDir2.h
new file mode 100644
index 0000000..6dc3eea
--- /dev/null
+++ b/test/Modules/Inputs/NameInDir2.framework/Headers/NameInDir2.h
@@ -0,0 +1 @@
+// NameInDir2.h
diff --git a/test/Modules/Inputs/NameInDir2.framework/Modules/module.modulemap b/test/Modules/Inputs/NameInDir2.framework/Modules/module.modulemap
new file mode 100644
index 0000000..24f15f8
--- /dev/null
+++ b/test/Modules/Inputs/NameInDir2.framework/Modules/module.modulemap
@@ -0,0 +1,5 @@
+framework module NameInDir2 {
+  umbrella header "NameInDir2.h"
+  export *
+  module * { export * }
+}
diff --git a/test/Modules/Inputs/NameInDirInferred.framework/Headers/NameInDirInferred.h b/test/Modules/Inputs/NameInDirInferred.framework/Headers/NameInDirInferred.h
new file mode 100644
index 0000000..c0b12e6
--- /dev/null
+++ b/test/Modules/Inputs/NameInDirInferred.framework/Headers/NameInDirInferred.h
@@ -0,0 +1 @@
+// NameInDirInferred.h
diff --git a/test/Modules/Inputs/crash.h b/test/Modules/Inputs/crash.h
new file mode 100644
index 0000000..bc878fb
--- /dev/null
+++ b/test/Modules/Inputs/crash.h
@@ -0,0 +1 @@
+#pragma clang __debug crash
diff --git a/test/Modules/Inputs/explicit-build-prefer-self/a.h b/test/Modules/Inputs/explicit-build-prefer-self/a.h
new file mode 100644
index 0000000..d457612
--- /dev/null
+++ b/test/Modules/Inputs/explicit-build-prefer-self/a.h
@@ -0,0 +1,2 @@
+// a
+#include "x.h"
diff --git a/test/Modules/Inputs/explicit-build-prefer-self/b.h b/test/Modules/Inputs/explicit-build-prefer-self/b.h
new file mode 100644
index 0000000..76e2042
--- /dev/null
+++ b/test/Modules/Inputs/explicit-build-prefer-self/b.h
@@ -0,0 +1,2 @@
+// b
+#include "x.h"
diff --git a/test/Modules/Inputs/explicit-build-prefer-self/map b/test/Modules/Inputs/explicit-build-prefer-self/map
new file mode 100644
index 0000000..26be8e6
--- /dev/null
+++ b/test/Modules/Inputs/explicit-build-prefer-self/map
@@ -0,0 +1,2 @@
+module a { header "a.h" header "x.h" }
+module b { header "b.h" header "x.h" }
diff --git a/test/Modules/Inputs/explicit-build-prefer-self/x.h b/test/Modules/Inputs/explicit-build-prefer-self/x.h
new file mode 100644
index 0000000..e69de29
diff --git a/test/Modules/Inputs/merge-class-definition-visibility/b.h b/test/Modules/Inputs/merge-class-definition-visibility/b.h
index 2b8f5f8..03c0ad9 100644
--- a/test/Modules/Inputs/merge-class-definition-visibility/b.h
+++ b/test/Modules/Inputs/merge-class-definition-visibility/b.h
@@ -1,2 +1,4 @@
 // Include definition of A into the same module as c.h
 #include "a.h"
+
+struct B {};
diff --git a/test/Modules/Inputs/merge-class-definition-visibility/d.h b/test/Modules/Inputs/merge-class-definition-visibility/d.h
index 2243de1..c51edab 100644
--- a/test/Modules/Inputs/merge-class-definition-visibility/d.h
+++ b/test/Modules/Inputs/merge-class-definition-visibility/d.h
@@ -1 +1 @@
-#include "a.h"
+struct B {};
diff --git a/test/Modules/Inputs/merge-class-definition-visibility/e.h b/test/Modules/Inputs/merge-class-definition-visibility/e.h
new file mode 100644
index 0000000..f126b50
--- /dev/null
+++ b/test/Modules/Inputs/merge-class-definition-visibility/e.h
@@ -0,0 +1,3 @@
+#include "a.h"
+
+struct B {};
diff --git a/test/Modules/Inputs/merge-class-definition-visibility/modmap b/test/Modules/Inputs/merge-class-definition-visibility/modmap
index 7d988fb..dcb6587 100644
--- a/test/Modules/Inputs/merge-class-definition-visibility/modmap
+++ b/test/Modules/Inputs/merge-class-definition-visibility/modmap
@@ -3,5 +3,6 @@ module Def1 {
   module C { header "c.h" }
 }
 module Def2 {
-  header "d.h"
+  module D { header "d.h" }
+  module E { header "e.h" }
 }
diff --git a/test/Modules/Inputs/module.map b/test/Modules/Inputs/module.map
index 8ec3e21..ffaa53e 100644
--- a/test/Modules/Inputs/module.map
+++ b/test/Modules/Inputs/module.map
@@ -324,3 +324,15 @@ module recursive1 {
 module recursive2 {
   header "recursive2.h"
 }
+module crash {
+  header "crash.h"
+}
+
+module DebugModule {
+  header "DebugModule.h"
+}
+
+module ImportNameInDir {
+  header "ImportNameInDir.h"
+  export *
+}
diff --git a/test/Modules/Inputs/submodule-visibility/a.h b/test/Modules/Inputs/submodule-visibility/a.h
index d8805c9..e4965d7 100644
--- a/test/Modules/Inputs/submodule-visibility/a.h
+++ b/test/Modules/Inputs/submodule-visibility/a.h
@@ -1 +1,9 @@
 int n;
+
+#ifdef B
+#error B is defined
+#endif
+
+#define A
+
+#include "c.h"
diff --git a/test/Modules/Inputs/submodule-visibility/b.h b/test/Modules/Inputs/submodule-visibility/b.h
index fa419c0..67ef652 100644
--- a/test/Modules/Inputs/submodule-visibility/b.h
+++ b/test/Modules/Inputs/submodule-visibility/b.h
@@ -1 +1,10 @@
 int m = n;
+
+#include "other.h"
+#include "c.h"
+
+#if defined(A) && !defined(ALLOW_NAME_LEAKAGE)
+#error A is defined
+#endif
+
+#define B
diff --git a/test/Modules/Inputs/submodule-visibility/c.h b/test/Modules/Inputs/submodule-visibility/c.h
new file mode 100644
index 0000000..259b8c7
--- /dev/null
+++ b/test/Modules/Inputs/submodule-visibility/c.h
@@ -0,0 +1,6 @@
+#ifndef C_H_INCLUDED
+#define C_H_INCLUDED
+
+struct C {};
+
+#endif
diff --git a/test/Modules/Inputs/submodule-visibility/module.modulemap b/test/Modules/Inputs/submodule-visibility/module.modulemap
index 2e13344..d2f0c77 100644
--- a/test/Modules/Inputs/submodule-visibility/module.modulemap
+++ b/test/Modules/Inputs/submodule-visibility/module.modulemap
@@ -1,4 +1,5 @@
 module x { module a { header "a.h" } module b { header "b.h" } }
+module other { header "other.h" }
 
 module cycles {
   module cycle1 { header "cycle1.h" }
diff --git a/test/Modules/Inputs/submodule-visibility/other.h b/test/Modules/Inputs/submodule-visibility/other.h
new file mode 100644
index 0000000..f40c757
--- /dev/null
+++ b/test/Modules/Inputs/submodule-visibility/other.h
@@ -0,0 +1 @@
+#include "c.h"
diff --git a/test/Modules/Inputs/submodules-merge-defs/defs.h b/test/Modules/Inputs/submodules-merge-defs/defs.h
index 247b05c..bda0567 100644
--- a/test/Modules/Inputs/submodules-merge-defs/defs.h
+++ b/test/Modules/Inputs/submodules-merge-defs/defs.h
@@ -10,6 +10,7 @@ public:
 // Check that lookup and access checks are performed in the right context.
 struct B::Inner2 : Inner1 {};
 template<typename T> void B::f() {}
+template<> inline void B::f<int>() {}
 
 // Check that base-specifiers are correctly disambiguated.
 template<int N> struct C_Base { struct D { constexpr operator int() const { return 0; } }; };
@@ -31,7 +32,8 @@ template<typename T> struct F {
 template<typename T> int F<T>::f() { return 0; }
 template<typename T> template<typename U> int F<T>::g() { return 0; }
 template<typename T> int F<T>::n = 0;
-//template<> template<typename U> int F<char>::g() { return 0; } // FIXME: Re-enable this once we support merging member specializations.
+template<> inline int F<char>::f() { return 0; }
+template<> template<typename U> int F<char>::g() { return 0; }
 template<> struct F<void> { int h(); };
 inline int F<void>::h() { return 0; }
 template<typename T> struct F<T *> { int i(); };
@@ -74,3 +76,21 @@ namespace FriendDefArg {
     template<typename, int, template<typename> class> friend struct D;
   };
 }
+
+namespace SeparateInline {
+  inline void f();
+  void f() {}
+  constexpr int g() { return 0; }
+}
+
+namespace TrailingAttributes {
+  template<typename T> struct X {} __attribute__((aligned(8)));
+}
+
+namespace MergeFunctionTemplateSpecializations {
+  template<typename T> T f();
+  template<typename T> struct X {
+    template<typename U> using Q = decltype(f<T>() + U());
+  };
+  using xiq = X<int>::Q<int>;
+}
diff --git a/test/Modules/cxx-irgen.cpp b/test/Modules/cxx-irgen.cpp
index 37de23f..7cdb0d6 100644
--- a/test/Modules/cxx-irgen.cpp
+++ b/test/Modules/cxx-irgen.cpp
@@ -12,7 +12,7 @@ CtorInit<int> x;
 
 // Keep these two namespace definitions separate; merging them hides the bug.
 namespace EmitInlineMethods {
-  // CHECK-DAG: define linkonce_odr [[CC:(x86_thiscallcc[ ]+)?]]void @_ZN17EmitInlineMethods1C1fEPNS_1AE(
+  // CHECK-DAG: define linkonce_odr [[CC:([0-9_a-z]*cc[ ]+)?]]void @_ZN17EmitInlineMethods1C1fEPNS_1AE(
   // CHECK-DAG: declare [[CC]]void @_ZN17EmitInlineMethods1A1gEv(
   struct C {
     __attribute__((used)) void f(A *p) { p->g(); }
@@ -26,14 +26,14 @@ namespace EmitInlineMethods {
   };
 }
 
-// CHECK-DAG: define available_externally hidden {{signext i32|i32}} @_ZN1SIiE1gEv({{.*}} #[[ALWAYS_INLINE:.*]] align
+// CHECK-DAG: define available_externally hidden {{.*}}{{signext i32|i32}} @_ZN1SIiE1gEv({{.*}} #[[ALWAYS_INLINE:.*]] align
 int a = S<int>::g();
 
 int b = h();
 
-// CHECK-DAG: define linkonce_odr {{signext i32|i32}} @_Z3minIiET_S0_S0_(i32
+// CHECK-DAG: define linkonce_odr {{.*}}{{signext i32|i32}} @_Z3minIiET_S0_S0_(i32
 int c = min(1, 2);
-// CHECK: define available_externally {{signext i32|i32}} @_ZN1SIiE1fEv({{.*}} #[[ALWAYS_INLINE]] align
+// CHECK: define available_externally {{.*}}{{signext i32|i32}} @_ZN1SIiE1fEv({{.*}} #[[ALWAYS_INLINE]] align
 
 namespace ImplicitSpecialMembers {
   // CHECK-LABEL: define {{.*}} @_ZN22ImplicitSpecialMembers1BC2ERKS0_(
@@ -49,9 +49,9 @@ namespace ImplicitSpecialMembers {
   // CHECK-LABEL: define {{.*}} @_ZN22ImplicitSpecialMembers1DC2EOS0_(
   // CHECK: call {{.*}} @_ZN22ImplicitSpecialMembers1AC1ERKS0_(
   // CHECK-LABEL: define {{.*}} @_ZN20OperatorDeleteLookup1AD0Ev(
-  // CHECK: call void @_ZN20OperatorDeleteLookup1AdlEPv(
+  // CHECK: call {{.*}}void @_ZN20OperatorDeleteLookup1AdlEPv(
 
-  // CHECK-DAG: call {{[a-z]*[ ]?i32}} @_ZN8CtorInitIiE1fEv(
+  // CHECK-DAG: call {{[a-z\_\d]*[ ]?i32}} @_ZN8CtorInitIiE1fEv(
 
   extern B b1;
   B b2(b1);
diff --git a/test/Modules/debug-info-moduleimport.m b/test/Modules/debug-info-moduleimport.m
new file mode 100644
index 0000000..1f12b02
--- /dev/null
+++ b/test/Modules/debug-info-moduleimport.m
@@ -0,0 +1,7 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -g -fmodules -DGREETING="Hello World" -UNDEBUG -fimplicit-module-maps -fmodules-cache-path=%t %s -I %S/Inputs -isysroot /tmp/.. -I %t -emit-llvm -o - | FileCheck %s
+
+// CHECK: ![[CU:.*]] = distinct !DICompileUnit
+@import DebugModule;
+// CHECK: !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: ![[CU]], entity: ![[MODULE:.*]], line: 5)
+// CHECK: ![[MODULE]] = !DIModule(scope: null, name: "DebugModule", configMacros: "\22-DGREETING=Hello World\22 \22-UNDEBUG\22", includePath: "{{.*}}test{{.*}}Modules{{.*}}Inputs", isysroot: "/tmp/..")
diff --git a/test/Modules/direct-module-import.m b/test/Modules/direct-module-import.m
index 3216eb9..bf9248e 100644
--- a/test/Modules/direct-module-import.m
+++ b/test/Modules/direct-module-import.m
@@ -1,7 +1,7 @@
 // RUN: rm -rf %t
 // RUN: %clang_cc1 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -include Module/Module.h %s -emit-llvm -o - | FileCheck %s
 
-// CHECK: call i8* @getModuleVersion
+// CHECK: call {{.*}}i8* @getModuleVersion
 const char* getVer(void) {
   return getModuleVersion();
 }
diff --git a/test/Modules/explicit-build-prefer-self.cpp b/test/Modules/explicit-build-prefer-self.cpp
new file mode 100644
index 0000000..13fbdbd
--- /dev/null
+++ b/test/Modules/explicit-build-prefer-self.cpp
@@ -0,0 +1,3 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fno-implicit-modules -emit-module -fmodule-name=a %S/Inputs/explicit-build-prefer-self/map -o %t/a.pcm
+// RUN: %clang_cc1 -fmodules -fno-implicit-modules -emit-module -fmodule-name=b %S/Inputs/explicit-build-prefer-self/map -o %t/b.pcm
diff --git a/test/Modules/framework-name.m b/test/Modules/framework-name.m
new file mode 100644
index 0000000..a63e206
--- /dev/null
+++ b/test/Modules/framework-name.m
@@ -0,0 +1,33 @@
+// REQUIRES: shell
+// RUN: rm -rf %t.mcp %t
+// RUN: mkdir -p %t
+// RUN: ln -s %S/Inputs/NameInDir2.framework %t/NameInImport.framework
+// RUN: ln -s %S/Inputs/NameInDirInferred.framework %t/NameInImportInferred.framework
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t.mcp -fimplicit-module-maps -I %S/Inputs -F %S/Inputs -F %t -Wauto-import -verify %s
+
+// Sanity check that we won't somehow find non-canonical module names or
+// modules where we shouldn't search the framework.
+// RUN: echo '@import NameInModMap' | not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -F %S/Inputs -F %t -Wauto-import -x objective-c - 2>&1 | FileCheck %s
+// RUN: echo '@import NameInDir' | not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -F %S/Inputs -F %t -Wauto-import -x objective-c - 2>&1 | FileCheck %s
+// RUN: echo '@import NameInImport' | not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -F %S/Inputs -F %t -Wauto-import -x objective-c - 2>&1 | FileCheck %s
+// RUN: echo '@import NameInImportInferred' | not %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -F %S/Inputs -F %t -Wauto-import -x objective-c - 2>&1 | FileCheck %s
+// CHECK: module '{{.*}}' not found
+
+// FIXME: We might want to someday lock down framework modules so that these
+// name mismatches are disallowed. However, as long as we *don't* prevent them
+// it's important that they map correctly to module imports.
+
+// The module map name doesn't match the directory name.
+#import <NameInDir/NameInDir.h> // expected-warning {{import of module 'NameInModMap'}}
+
+// The name in the import doesn't match the module name.
+#import <NameInImport/NameInDir2.h> // expected-warning {{import of module 'NameInDir2'}}
+@import NameInDir2;                 // OK
+
+// The name in the import doesn't match the module name (inferred framework module).
+#import <NameInImportInferred/NameInDirInferred.h> // expected-warning {{import of module 'NameInDirInferred'}}
+
+@import ImportNameInDir;
+#ifdef NAME_IN_DIR
+#error NAME_IN_DIR should be undef'd
+#endif
diff --git a/test/Modules/merge-class-definition-visibility.cpp b/test/Modules/merge-class-definition-visibility.cpp
index e8602c0..ac4c951 100644
--- a/test/Modules/merge-class-definition-visibility.cpp
+++ b/test/Modules/merge-class-definition-visibility.cpp
@@ -1,15 +1,23 @@
 // RUN: rm -rf %t
 // RUN: %clang_cc1 -fmodules -fmodule-map-file=%S/Inputs/merge-class-definition-visibility/modmap \
 // RUN:            -I%S/Inputs/merge-class-definition-visibility \
-// RUN:            -fmodules-cache-path=%t %s -verify
+// RUN:            -fmodules-cache-path=%t %s -verify \
+// RUN:            -fmodules-local-submodule-visibility
 // expected-no-diagnostics
 
 #include "c.h"
 template<typename T> struct X { T t; };
 typedef X<A> XA;
+struct B;
 
-#include "d.h"
-// Ensure that this triggers the import of the second definition from d.h,
+#include "e.h"
+// Ensure that this triggers the import of the second definition from e.h,
 // which is necessary to make the definition of A visible in the template
 // instantiation.
 XA xa;
+
+// Ensure that we make the definition of B visible. We made the parse-merged
+// definition from e.h visible, which makes the definition from d.h visible,
+// and that definition was merged into the canonical definition from b.h,
+// so that becomes visible, and we have a visible definition.
+B b;
diff --git a/test/Modules/module-feature.m b/test/Modules/module-feature.m
new file mode 100644
index 0000000..4926d26
--- /dev/null
+++ b/test/Modules/module-feature.m
@@ -0,0 +1,14 @@
+// RUN: rm -rf %t %t.nohash
+
+// Each set of features gets its own cache.
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t -fimplicit-module-maps -fmodule-feature f1 -fmodule-feature f2 -F %S/Inputs %s -verify -Rmodule-build
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t -fimplicit-module-maps -fmodule-feature f2 -F %S/Inputs %s -verify -Rmodule-build
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t -fimplicit-module-maps -fmodule-feature f2 -fmodule-feature f1 -F %S/Inputs %s -Rmodule-build 2>&1 | FileCheck %s -allow-empty -check-prefix=ALREADY_BUILT
+// ALREADY_BUILT-NOT: building module
+
+// Errors if we try to force the load.
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t.nohash -fimplicit-module-maps -fdisable-module-hash -fmodule-feature f1 -fmodule-feature f2 -F %S/Inputs %s -verify -Rmodule-build
+// RUN: not %clang_cc1 -fmodules -fmodules-cache-path=%t.nohash -fimplicit-module-maps -fdisable-module-hash -fmodule-feature f2 -F %S/Inputs %s 2>&1 | FileCheck %s -check-prefix=DIFFERS
+// DIFFERS: error: module features differs
+
+@import Module; // expected-remark {{building module 'Module'}} expected-remark {{finished}}
diff --git a/test/Modules/module_file_info.m b/test/Modules/module_file_info.m
index 01d5073..8693d2b 100644
--- a/test/Modules/module_file_info.m
+++ b/test/Modules/module_file_info.m
@@ -2,7 +2,7 @@
 @import DependsOnModule;
 
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -w -Wunused -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t -F %S/Inputs -DBLARG -DWIBBLE=WOBBLE %s
+// RUN: %clang_cc1 -w -Wunused -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t -F %S/Inputs -DBLARG -DWIBBLE=WOBBLE -fmodule-feature myfeature %s
 // RUN: %clang_cc1 -module-file-info %t/DependsOnModule.pcm | FileCheck %s
 
 // CHECK: Generated by this Clang:
@@ -14,6 +14,8 @@
 // CHECK:   C99: Yes
 // CHECK:   Objective-C 1: Yes
 // CHECK:   modules extension to C: Yes
+// CHECK:   Module features:
+// CHECK:     myfeature
 
 // CHECK: Target options:
 // CHECK:     Triple:
diff --git a/test/Modules/modules-with-same-name.m b/test/Modules/modules-with-same-name.m
index b5cb6fa..b6925df 100644
--- a/test/Modules/modules-with-same-name.m
+++ b/test/Modules/modules-with-same-name.m
@@ -7,19 +7,19 @@
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -fmodules-ignore-macro=EXPECTED_PATH -fmodules-ignore-macro=DIRECT -fsyntax-only %s -verify -I %S/Inputs/modules-with-same-name/path2/A -DDIRECT -DEXPECTED_PATH=2
 
 // Confirm that we have two pcm files (one for each 'A').
-// RUN: find %t -name "A-*.pcm" | count 2
+// RUN: find %t -name "A-*.pc[m]" | count 2
 
 // DependsOnA, using A from path 1
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -fmodules-ignore-macro=EXPECTED_PATH -fmodules-ignore-macro=DIRECT -fsyntax-only %s -verify -I %S/Inputs/modules-with-same-name/DependsOnA -I %S/Inputs/modules-with-same-name/path1/A -DEXPECTED_PATH=1
 
 // Confirm that we have three pcm files (one for each 'A', and one for 'DependsOnA')
-// RUN: find %t -name "*.pcm" | count 3
+// RUN: find %t -name "*.pc[m]" | count 3
 
 // DependsOnA, using A from path 2
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -fmodules-ignore-macro=EXPECTED_PATH -fmodules-ignore-macro=DIRECT -fsyntax-only %s -verify -I %S/Inputs/modules-with-same-name/DependsOnA -I %S/Inputs/modules-with-same-name/path2/A -DEXPECTED_PATH=2
 
 // Confirm that we still have three pcm files, since DependsOnA will be rebuilt
-// RUN: find %t -name "*.pcm" | count 3
+// RUN: find %t -name "*.pc[m]" | count 3
 
 #ifdef DIRECT
 @import A;
diff --git a/test/Modules/pch-used.m b/test/Modules/pch-used.m
index cdfadb2..0711d13 100644
--- a/test/Modules/pch-used.m
+++ b/test/Modules/pch-used.m
@@ -6,4 +6,4 @@
 void f() { SPXTrace(); }
 void g() { double x = DBL_MAX; }
 
-// CHECK: define internal void @SPXTrace
+// CHECK: define internal {{.*}}void @SPXTrace
diff --git a/test/Modules/signal.m b/test/Modules/signal.m
new file mode 100644
index 0000000..30059e9
--- /dev/null
+++ b/test/Modules/signal.m
@@ -0,0 +1,11 @@
+// REQUIRES: shell
+// RUN: rm -rf %t
+
+// Crash building module.
+// RUN: not --crash %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I%S/Inputs %s
+
+// The dead symlink is still around, but the underlying lock file is gone.
+// RUN: find %t -name "crash-*.pcm.lock" | count 1
+// RUN: find %t -name "crash-*.pcm.lock-*" | count 0
+
+@import crash;
diff --git a/test/Modules/submodule-visibility.cpp b/test/Modules/submodule-visibility.cpp
index 084f811..b2c5fc7 100644
--- a/test/Modules/submodule-visibility.cpp
+++ b/test/Modules/submodule-visibility.cpp
@@ -20,3 +20,11 @@
 #endif
 
 int k = n + m; // OK, a and b are visible here.
+
+#ifndef A
+#error A is not defined
+#endif
+
+#ifndef B
+#error B is not defined
+#endif
diff --git a/test/Modules/submodules-merge-defs.cpp b/test/Modules/submodules-merge-defs.cpp
index 94c0fd3..92c7844 100644
--- a/test/Modules/submodules-merge-defs.cpp
+++ b/test/Modules/submodules-merge-defs.cpp
@@ -3,7 +3,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs/submodules-merge-defs %s -verify -fno-modules-error-recovery
 // RUN: %clang_cc1 -x c++ -std=c++11 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs/submodules-merge-defs %s -verify -fno-modules-error-recovery -fmodules-local-submodule-visibility -DTEXTUAL
 // RUN: %clang_cc1 -x c++ -std=c++11 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs/submodules-merge-defs %s -verify -fno-modules-error-recovery -fmodules-local-submodule-visibility
-// RUN: %clang_cc1 -x c++ -std=c++11 -fmodules-cache-path=%t -fimplicit-module-maps -I %S/Inputs/submodules-merge-defs %s -verify -fno-modules-error-recovery -fmodules-local-submodule-visibility -DTEXTUAL -DEARLY_INDIRECT_INCLUDE
+// RUN: %clang_cc1 -x c++ -std=c++11 -fmodules-cache-path=%t -fimplicit-module-maps -I %S/Inputs/submodules-merge-defs %s -verify -fno-modules-error-recovery -fmodules-local-submodule-visibility -DTEXTUAL -DEARLY_INDIRECT_INCLUDE -fno-modules-hide-internal-linkage
 // RUN: %clang_cc1 -x c++ -std=c++11 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs/submodules-merge-defs %s -verify -fno-modules-error-recovery -fmodules-local-submodule-visibility -fmodule-feature use_defs_twice -DIMPORT_USE_2
 
 // Trigger import of definitions, but don't make them visible.
@@ -27,33 +27,37 @@ int pre_use_a = use_a(pre_a); // expected-error {{'A' must be imported}} expecte
 B::Inner2 pre_bi; // expected-error +{{must be imported}}
 // expected-note@defs.h:4 +{{here}}
 // expected-note@defs.h:11 +{{here}}
+void pre_bfi(B b) { // expected-error {{must use 'class'}} expected-error +{{must be imported}}
+  b.f<int>(); // expected-error +{{must be imported}} expected-error +{{}}
+  // expected-note@defs.h:12 +{{here}}
+}
 
 C_Base<1> pre_cb1; // expected-error +{{must be imported}}
-// expected-note@defs.h:15 +{{here}}
+// expected-note@defs.h:16 +{{here}}
 C1 pre_c1; // expected-error +{{must be imported}} expected-error {{must use 'struct'}}
-// expected-note@defs.h:17 +{{here}}
-C2 pre_c2; // expected-error +{{must be imported}} expected-error {{must use 'struct'}}
 // expected-note@defs.h:18 +{{here}}
+C2 pre_c2; // expected-error +{{must be imported}} expected-error {{must use 'struct'}}
+// expected-note@defs.h:19 +{{here}}
 
 D::X pre_dx; // expected-error +{{must be imported}}
-// expected-note@defs.h:20 +{{here}}
 // expected-note@defs.h:21 +{{here}}
+// expected-note@defs.h:22 +{{here}}
 // FIXME: We should warn that use_dx is being used without being imported.
 int pre_use_dx = use_dx(pre_dx);
 
 int pre_e = E(0); // expected-error {{must be imported}}
-// expected-note@defs.h:24 +{{here}}
+// expected-note@defs.h:25 +{{here}}
 
 int pre_ff = F<int>().f(); // expected-error +{{must be imported}}
 int pre_fg = F<int>().g<int>(); // expected-error +{{must be imported}}
-// expected-note@defs.h:26 +{{here}}
+// expected-note@defs.h:27 +{{here}}
 
 G::A pre_ga // expected-error +{{must be imported}}
   = G::a; // expected-error +{{must be imported}}
-// expected-note@defs.h:40 +{{here}}
-// expected-note@defs.h:41 +{{here}}
-decltype(G::h) pre_gh = G::h; // expected-error +{{must be imported}}
 // expected-note@defs.h:42 +{{here}}
+// expected-note@defs.h:43 +{{here}}
+decltype(G::h) pre_gh = G::h; // expected-error +{{must be imported}}
+// expected-note@defs.h:44 +{{here}}
 
 J<> pre_j; // expected-error {{declaration of 'J' must be imported}}
 #ifdef IMPORT_USE_2
@@ -63,7 +67,7 @@ J<> pre_j; // expected-error {{declaration of 'J' must be imported}}
 #else
 // expected-error@-6 {{default argument of 'J' must be imported from module 'stuff.use'}}
 #endif
-// expected-note@defs.h:49 +{{here}}
+// expected-note@defs.h:51 +{{here}}
 
 // Make definitions from second module visible.
 #ifdef TEXTUAL
@@ -77,6 +81,9 @@ J<> pre_j; // expected-error {{declaration of 'J' must be imported}}
 A post_a;
 int post_use_a = use_a(post_a);
 B::Inner2 post_bi;
+void post_bfi(B b) {
+  b.f<int>();
+}
 C_Base<1> post_cb1;
 C1 c1;
 C2 c2;
@@ -92,3 +99,10 @@ template<typename T, int N, template<typename> class K> struct J;
 J<> post_j2;
 FriendDefArg::Y<int> friend_def_arg;
 FriendDefArg::D<> friend_def_arg_d;
+
+MergeFunctionTemplateSpecializations::X<int>::Q<char> xiqc;
+
+#ifdef TEXTUAL
+#include "use-defs.h"
+void use_static_inline() { StaticInline::g({}); }
+#endif
diff --git a/test/OpenMP/barrier_codegen.cpp b/test/OpenMP/barrier_codegen.cpp
index 12d4c44..9e393ac 100644
--- a/test/OpenMP/barrier_codegen.cpp
+++ b/test/OpenMP/barrier_codegen.cpp
@@ -24,7 +24,7 @@ int main(int argc, char **argv) {
   static int a;
 #pragma omp barrier
   // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* [[LOC]])
-  // CHECK: call i32 @__kmpc_cancel_barrier([[IDENT_T]]* [[EXPLICIT_BARRIER_LOC]], i32 [[GTID]])
+  // CHECK: call void @__kmpc_barrier([[IDENT_T]]* [[EXPLICIT_BARRIER_LOC]], i32 [[GTID]])
   // CHECK: call {{.+}} [[TMAIN_INT:@.+]](i{{[0-9][0-9]}}
   // CHECK: call {{.+}} [[TMAIN_CHAR:@.+]](i{{[0-9]}}
   return tmain(argc) + tmain(argv[0][0]) + a;
@@ -32,10 +32,10 @@ int main(int argc, char **argv) {
 
 // CHECK: define {{.+}} [[TMAIN_INT]](
 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* [[LOC]])
-// CHECK: call i32 @__kmpc_cancel_barrier([[IDENT_T]]* [[EXPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call void @__kmpc_barrier([[IDENT_T]]* [[EXPLICIT_BARRIER_LOC]], i32 [[GTID]])
 
 // CHECK: define {{.+}} [[TMAIN_CHAR]](
 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* [[LOC]])
-// CHECK: call i32 @__kmpc_cancel_barrier([[IDENT_T]]* [[EXPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call void @__kmpc_barrier([[IDENT_T]]* [[EXPLICIT_BARRIER_LOC]], i32 [[GTID]])
 
 #endif
diff --git a/test/OpenMP/cancel_ast_print.cpp b/test/OpenMP/cancel_ast_print.cpp
new file mode 100644
index 0000000..f244c78
--- /dev/null
+++ b/test/OpenMP/cancel_ast_print.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+int main (int argc, char **argv) {
+// CHECK: int main(int argc, char **argv) {
+#pragma omp parallel
+{
+#pragma omp cancel parallel
+}
+// CHECK: #pragma omp parallel
+// CHECK-NEXT: {
+// CHECK-NEXT: #pragma omp cancel parallel
+// CHECK-NEXT: }
+#pragma omp sections
+{
+#pragma omp cancel sections
+}
+// CHECK-NEXT: #pragma omp sections
+// CHECK: {
+// CHECK: #pragma omp cancel sections
+// CHECK: }
+#pragma omp for
+for (int i = 0; i < argc; ++i) {
+#pragma omp cancel for
+}
+// CHECK: #pragma omp for
+// CHECK-NEXT: for (int i = 0; i < argc; ++i) {
+// CHECK-NEXT: #pragma omp cancel for
+// CHECK-NEXT: }
+#pragma omp task
+{
+#pragma omp cancel taskgroup
+}
+// CHECK: #pragma omp task
+// CHECK: {
+// CHECK: #pragma omp cancel taskgroup
+// CHECK: }
+// CHECK: return argc;
+  return argc;
+}
+
+#endif
diff --git a/test/OpenMP/cancel_messages.cpp b/test/OpenMP/cancel_messages.cpp
new file mode 100644
index 0000000..0708838
--- /dev/null
+++ b/test/OpenMP/cancel_messages.cpp
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+int main(int argc, char **argv) {
+#pragma omp cancellation       // expected-error {{expected an OpenMP directive}}
+#pragma omp cancel // expected-error {{one of 'for', 'parallel', 'sections' or 'taskgroup' is expected}}
+  ;
+#pragma omp cancel parallel untied // expected-error {{unexpected OpenMP clause 'untied' in directive '#pragma omp cancel'}}
+#pragma omp cancel unknown         // expected-error {{one of 'for', 'parallel', 'sections' or 'taskgroup' is expected}}
+#pragma omp cancel sections(       // expected-warning {{extra tokens at the end of '#pragma omp cancel' are ignored}}
+#pragma omp cancel for, )          // expected-warning {{extra tokens at the end of '#pragma omp cancel' are ignored}}
+#pragma omp cancel taskgroup()     // expected-warning {{extra tokens at the end of '#pragma omp cancel' are ignored}}
+#pragma omp cancel parallel, if    // expected-warning {{extra tokens at the end of '#pragma omp cancel' are ignored}}
+  if (argc)
+#pragma omp cancel for // expected-error {{'#pragma omp cancel' cannot be an immediate substatement}}
+    if (argc) {
+#pragma omp taskgroup
+#pragma omp task
+#pragma omp parallel
+      {
+#pragma omp cancel taskgroup // expected-error {{region cannot be closely nested inside 'parallel' region}}
+      }
+    }
+#pragma omp parallel
+#pragma omp taskgroup
+  {
+#pragma omp cancel taskgroup // expected-error {{region cannot be closely nested inside 'taskgroup' region}}
+  }
+#pragma omp parallel
+  {
+#pragma omp cancel for // expected-error {{region cannot be closely nested inside 'parallel' region}}
+  }
+#pragma omp task
+  {
+#pragma omp cancel sections // expected-error {{region cannot be closely nested inside 'task' region}}
+  }
+#pragma omp sections
+  {
+#pragma omp cancel parallel // expected-error {{region cannot be closely nested inside 'sections' region}}
+  }
+  while (argc)
+#pragma omp cancel for // expected-error {{'#pragma omp cancel' cannot be an immediate substatement}}
+    while (argc) {
+#pragma omp cancel sections
+    }
+  do
+#pragma omp cancel parallel // expected-error {{'#pragma omp cancel' cannot be an immediate substatement}}
+    while (argc)
+      ;
+  do {
+#pragma omp cancel taskgroup
+  } while (argc);
+  switch (argc)
+#pragma omp cancel parallel // expected-error {{'#pragma omp cancel' cannot be an immediate substatement}}
+    switch (argc)
+    case 1:
+#pragma omp cancel sections // expected-error {{'#pragma omp cancel' cannot be an immediate substatement}}
+  switch (argc)
+  case 1: {
+#pragma omp cancel for
+  }
+  switch (argc) {
+#pragma omp cancel taskgroup
+  case 1:
+#pragma omp cancel parallel // expected-error {{'#pragma omp cancel' cannot be an immediate substatement}}
+    break;
+  default: {
+#pragma omp cancel sections
+  } break;
+  }
+  for (;;)
+#pragma omp cancel for // expected-error {{'#pragma omp cancel' cannot be an immediate substatement}}
+    for (;;) {
+#pragma omp cancel taskgroup
+    }
+label:
+#pragma omp cancel parallel // expected-error {{'#pragma omp cancel' cannot be an immediate substatement}}
+label1 : {
+#pragma omp cancel sections
+}
+
+  return 0;
+}
+
diff --git a/test/OpenMP/cancellation_point_ast_print.cpp b/test/OpenMP/cancellation_point_ast_print.cpp
new file mode 100644
index 0000000..c8b133c
--- /dev/null
+++ b/test/OpenMP/cancellation_point_ast_print.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+int main (int argc, char **argv) {
+// CHECK: int main(int argc, char **argv) {
+#pragma omp parallel
+{
+#pragma omp cancellation point parallel
+}
+// CHECK: #pragma omp parallel
+// CHECK-NEXT: {
+// CHECK-NEXT: #pragma omp cancellation point parallel
+// CHECK-NEXT: }
+#pragma omp sections
+{
+#pragma omp cancellation point sections
+}
+// CHECK-NEXT: #pragma omp sections
+// CHECK: {
+// CHECK: #pragma omp cancellation point sections
+// CHECK: }
+#pragma omp for
+for (int i = 0; i < argc; ++i) {
+#pragma omp cancellation point for
+}
+// CHECK: #pragma omp for
+// CHECK-NEXT: for (int i = 0; i < argc; ++i) {
+// CHECK-NEXT: #pragma omp cancellation point for
+// CHECK-NEXT: }
+#pragma omp task
+{
+#pragma omp cancellation point taskgroup
+}
+// CHECK: #pragma omp task
+// CHECK: {
+// CHECK: #pragma omp cancellation point taskgroup
+// CHECK: }
+// CHECK: return argc;
+  return argc;
+}
+
+#endif
diff --git a/test/OpenMP/cancellation_point_codegen.cpp b/test/OpenMP/cancellation_point_codegen.cpp
new file mode 100644
index 0000000..47903c1
--- /dev/null
+++ b/test/OpenMP/cancellation_point_codegen.cpp
@@ -0,0 +1,95 @@
+// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+int main (int argc, char **argv) {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
+#pragma omp parallel
+{
+#pragma omp cancellation point parallel
+  argv[0][0] = argc;
+}
+// CHECK: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+#pragma omp sections
+{
+#pragma omp cancellation point sections
+}
+// CHECK: call i32 @__kmpc_single(
+// CHECK-NOT: @__kmpc_cancellationpoint
+// CHECK: call void @__kmpc_end_single(
+// CHECK: call void @__kmpc_barrier(%ident_t*
+#pragma omp sections
+{
+#pragma omp cancellation point sections
+#pragma omp section
+  {
+#pragma omp cancellation point sections
+  }
+}
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancellationpoint(%ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
+// CHECK: [[EXIT]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t*
+// CHECK: br label
+// CHECK: [[CONTINUE]]
+// CHECK: br label
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancellationpoint(%ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
+// CHECK: [[EXIT]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t*
+// CHECK: br label
+// CHECK: [[CONTINUE]]
+// CHECK: br label
+// CHECK: call void @__kmpc_for_static_fini(
+#pragma omp for
+for (int i = 0; i < argc; ++i) {
+#pragma omp cancellation point for
+}
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancellationpoint(%ident_t* {{[^,]+}}, i32 [[GTID]], i32 2)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
+// CHECK: [[EXIT]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t*
+// CHECK: br label
+// CHECK: [[CONTINUE]]
+// CHECK: br label
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: call void @__kmpc_barrier(%ident_t*
+#pragma omp task
+{
+#pragma omp cancellation point taskgroup
+}
+// CHECK: call i8* @__kmpc_omp_task_alloc(
+// CHECK: call i32 @__kmpc_omp_task(
+  return argc;
+}
+
+// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}},
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancellationpoint(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[EXIT:[^,]+]],
+// CHECK: [[EXIT]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t*
+// CHECK: br label %[[RETURN:.+]]
+// CHECK: [[RETURN]]
+// CHECK: ret void
+
+// CHECK: define internal i32 @{{[^(]+}}(i32
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancellationpoint(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 4)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[EXIT:[^,]+]],
+// CHECK: [[EXIT]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t*
+// CHECK: br label %[[RETURN:.+]]
+// CHECK: [[RETURN]]
+// CHECK: ret i32 0
+
+#endif
diff --git a/test/OpenMP/cancellation_point_messages.cpp b/test/OpenMP/cancellation_point_messages.cpp
new file mode 100644
index 0000000..d25cb61
--- /dev/null
+++ b/test/OpenMP/cancellation_point_messages.cpp
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+int main(int argc, char **argv) {
+#pragma omp cancellation       // expected-error {{expected an OpenMP directive}}
+#pragma omp cancellation point // expected-error {{one of 'for', 'parallel', 'sections' or 'taskgroup' is expected}}
+  ;
+#pragma omp cancellation point parallel untied // expected-error {{unexpected OpenMP clause 'untied' in directive '#pragma omp cancellation point'}}
+#pragma omp cancellation point unknown         // expected-error {{one of 'for', 'parallel', 'sections' or 'taskgroup' is expected}}
+#pragma omp cancellation point sections(       // expected-warning {{extra tokens at the end of '#pragma omp cancellation point' are ignored}}
+#pragma omp cancellation point for, )          // expected-warning {{extra tokens at the end of '#pragma omp cancellation point' are ignored}}
+#pragma omp cancellation point taskgroup()     // expected-warning {{extra tokens at the end of '#pragma omp cancellation point' are ignored}}
+#pragma omp cancellation point parallel, if    // expected-warning {{extra tokens at the end of '#pragma omp cancellation point' are ignored}}
+  if (argc)
+#pragma omp cancellation point for // expected-error {{'#pragma omp cancellation point' cannot be an immediate substatement}}
+    if (argc) {
+#pragma omp taskgroup
+#pragma omp task
+#pragma omp parallel
+      {
+#pragma omp cancellation point taskgroup // expected-error {{region cannot be closely nested inside 'parallel' region}}
+      }
+    }
+#pragma omp parallel
+#pragma omp taskgroup
+  {
+#pragma omp cancellation point taskgroup // expected-error {{region cannot be closely nested inside 'taskgroup' region}}
+  }
+#pragma omp parallel
+  {
+#pragma omp cancellation point for // expected-error {{region cannot be closely nested inside 'parallel' region}}
+  }
+#pragma omp task
+  {
+#pragma omp cancellation point sections // expected-error {{region cannot be closely nested inside 'task' region}}
+  }
+#pragma omp sections
+  {
+#pragma omp cancellation point parallel // expected-error {{region cannot be closely nested inside 'sections' region}}
+  }
+  while (argc)
+#pragma omp cancellation point for // expected-error {{'#pragma omp cancellation point' cannot be an immediate substatement}}
+    while (argc) {
+#pragma omp cancellation point sections
+    }
+  do
+#pragma omp cancellation point parallel // expected-error {{'#pragma omp cancellation point' cannot be an immediate substatement}}
+    while (argc)
+      ;
+  do {
+#pragma omp cancellation point taskgroup
+  } while (argc);
+  switch (argc)
+#pragma omp cancellation point parallel // expected-error {{'#pragma omp cancellation point' cannot be an immediate substatement}}
+    switch (argc)
+    case 1:
+#pragma omp cancellation point sections // expected-error {{'#pragma omp cancellation point' cannot be an immediate substatement}}
+  switch (argc)
+  case 1: {
+#pragma omp cancellation point for
+  }
+  switch (argc) {
+#pragma omp cancellation point taskgroup
+  case 1:
+#pragma omp cancellation point parallel // expected-error {{'#pragma omp cancellation point' cannot be an immediate substatement}}
+    break;
+  default: {
+#pragma omp cancellation point sections
+  } break;
+  }
+  for (;;)
+#pragma omp cancellation point for // expected-error {{'#pragma omp cancellation point' cannot be an immediate substatement}}
+    for (;;) {
+#pragma omp cancellation point taskgroup
+    }
+label:
+#pragma omp cancellation point parallel // expected-error {{'#pragma omp cancellation point' cannot be an immediate substatement}}
+label1 : {
+#pragma omp cancellation point sections
+}
+
+  return 0;
+}
+
diff --git a/test/OpenMP/critical_codegen.cpp b/test/OpenMP/critical_codegen.cpp
index d350d6e..26f5edb 100644
--- a/test/OpenMP/critical_codegen.cpp
+++ b/test/OpenMP/critical_codegen.cpp
@@ -11,7 +11,7 @@
 // CHECK:       [[UNNAMED_LOCK:@.+]] = common global [8 x i32] zeroinitializer
 // CHECK:       [[THE_NAME_LOCK:@.+]] = common global [8 x i32] zeroinitializer
 
-// CHECK:       define void [[FOO:@.+]]()
+// CHECK:       define {{.*}}void [[FOO:@.+]]()
 
 void foo() {}
 
@@ -21,15 +21,15 @@ int main() {
 // CHECK:       [[A_ADDR:%.+]] = alloca i8
   char a;
 
-// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
-// CHECK:       call void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]])
+// CHECK:       [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
+// CHECK:       call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]])
 // CHECK-NEXT:  store i8 2, i8* [[A_ADDR]]
-// CHECK-NEXT:  call void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]])
+// CHECK-NEXT:  call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]])
 #pragma omp critical
   a = 2;
-// CHECK:       call void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]])
-// CHECK-NEXT:  invoke void [[FOO]]()
-// CHECK:       call void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]])
+// CHECK:       call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]])
+// CHECK-NEXT:  invoke {{.*}}void [[FOO]]()
+// CHECK:       call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]])
 #pragma omp critical(the_name)
   foo();
 // CHECK-NOT:   call void @__kmpc_critical
diff --git a/test/OpenMP/flush_codegen.cpp b/test/OpenMP/flush_codegen.cpp
index 2c41b3a..4ebdf52 100644
--- a/test/OpenMP/flush_codegen.cpp
+++ b/test/OpenMP/flush_codegen.cpp
@@ -19,16 +19,16 @@ int main() {
   static int a;
 #pragma omp flush
 #pragma omp flush(a)
-  // CHECK: call void @__kmpc_flush(%{{.+}}* {{(@|%).+}})
-  // CHECK: call void @__kmpc_flush(%{{.+}}* {{(@|%).+}})
+  // CHECK: call {{.*}}void @__kmpc_flush(%{{.+}}* {{(@|%).+}})
+  // CHECK: call {{.*}}void @__kmpc_flush(%{{.+}}* {{(@|%).+}})
   return tmain(a);
   // CHECK: call {{.*}} [[TMAIN:@.+]](
   // CHECK: ret
 }
 
 // CHECK: [[TMAIN]]
-// CHECK: call void @__kmpc_flush(%{{.+}}* {{(@|%).+}})
-// CHECK: call void @__kmpc_flush(%{{.+}}* {{(@|%).+}})
+// CHECK: call {{.*}}void @__kmpc_flush(%{{.+}}* {{(@|%).+}})
+// CHECK: call {{.*}}void @__kmpc_flush(%{{.+}}* {{(@|%).+}})
 // CHECK: ret
 
 #endif
diff --git a/test/OpenMP/for_codegen.cpp b/test/OpenMP/for_codegen.cpp
index 5dcacb2..30cf484 100644
--- a/test/OpenMP/for_codegen.cpp
+++ b/test/OpenMP/for_codegen.cpp
@@ -50,7 +50,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK-NOT: __kmpc_cancel_barrier
+// CHECK-NOT: __kmpc_barrier
 // CHECK: ret void
 }
 
@@ -91,7 +91,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -151,7 +151,7 @@ void static_chunked(float *a, float *b, float *c, float *d) {
 
 // CHECK: [[O_LOOP1_END]]
 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -192,7 +192,7 @@ void dynamic1(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -233,7 +233,7 @@ void guided7(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -278,7 +278,7 @@ void test_auto(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -320,7 +320,7 @@ void runtime(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 }
 
diff --git a/test/OpenMP/for_firstprivate_codegen.cpp b/test/OpenMP/for_firstprivate_codegen.cpp
index 2402b50..1ef3866 100644
--- a/test/OpenMP/for_firstprivate_codegen.cpp
+++ b/test/OpenMP/for_firstprivate_codegen.cpp
@@ -82,7 +82,7 @@ int main() {
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G]]
     // LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
-    // LAMBDA: call i32 @__kmpc_cancel_barrier(
+    // LAMBDA: call void @__kmpc_barrier(
     g = 1;
     // LAMBDA: call void @__kmpc_for_static_init_4(
     // LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
@@ -123,7 +123,7 @@ int main() {
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
     // BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G]]
     // BLOCKS: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
-    // BLOCKS: call i32 @__kmpc_cancel_barrier(
+    // BLOCKS: call void @__kmpc_barrier(
     g = 1;
     // BLOCKS: call void @__kmpc_for_static_init_4(
     // BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
@@ -194,7 +194,7 @@ int main() {
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
 
 // Synchronization for initialization.
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 
 // CHECK: call void @__kmpc_for_static_init_4(
 // CHECK: call void @__kmpc_for_static_fini(
@@ -202,7 +202,7 @@ int main() {
 // ~(firstprivate var), ~(firstprivate s_arr)
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 
 // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
 
@@ -264,7 +264,7 @@ int main() {
 // Synchronization for initialization.
 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 
 // CHECK: call void @__kmpc_for_static_init_4(
 // CHECK: call void @__kmpc_for_static_fini(
diff --git a/test/OpenMP/for_private_codegen.cpp b/test/OpenMP/for_private_codegen.cpp
index 8fc5fe7..8172912 100644
--- a/test/OpenMP/for_private_codegen.cpp
+++ b/test/OpenMP/for_private_codegen.cpp
@@ -19,9 +19,10 @@ struct S {
 volatile double g;
 
 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK: [[CAP_MAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]* }
+// CHECK: [[CAP_MAIN_TY:%.+]] = type { i8 }
+// CHECK: type { i8 }
 // CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
+// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i8 }
 template <typename T>
 T tmain() {
   S<T> test;
@@ -42,10 +43,10 @@ int main() {
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global double
   // LAMBDA-LABEL: @main
-  // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
 #pragma omp parallel
 #pragma omp for private(g)
   for (int i = 0; i < 2; ++i) {
@@ -53,12 +54,12 @@ int main() {
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
     // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
     g = 1;
-    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
     // LAMBDA: store volatile double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]]
-    // LAMBDA: call{{( x86_thiscallcc)?}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
-    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+    // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
@@ -74,10 +75,10 @@ int main() {
 #elif defined(BLOCKS)
   // BLOCKS: [[G:@.+]] = global double
   // BLOCKS-LABEL: @main
-  // BLOCKS: call void {{%.+}}(i8
+  // BLOCKS: call {{.*}}void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* {{.+}})
+  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* {{.+}})
 #pragma omp parallel
 #pragma omp for private(g)
   for (int i = 0; i < 2; ++i) {
@@ -85,13 +86,13 @@ int main() {
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca double,
     // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
     g = 1;
-    // BLOCKS: call void @__kmpc_for_static_init_4(
+    // BLOCKS: call {{.*}}void @__kmpc_for_static_init_4(
     // BLOCKS: store volatile double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: double* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
-    // BLOCKS: call void {{%.+}}(i8
-    // BLOCKS: call void @__kmpc_for_static_fini(
+    // BLOCKS: call {{.*}}void {{%.+}}(i8
+    // BLOCKS: call {{.*}}void @__kmpc_for_static_fini(
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
diff --git a/test/OpenMP/for_simd_codegen.cpp b/test/OpenMP/for_simd_codegen.cpp
index 00ec6cb..9361192 100644
--- a/test/OpenMP/for_simd_codegen.cpp
+++ b/test/OpenMP/for_simd_codegen.cpp
@@ -50,7 +50,7 @@ void simple(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[SIMPLE_LOOP1_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
   long long k = get_val();
 
@@ -101,7 +101,7 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
 // CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
 // CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_VAR]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
   int lin = 12;
   #pragma omp for simd linear(lin : get_val()), linear(g_ptr)
@@ -172,7 +172,7 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: store i32 {{.+}}, i32* [[LIN_VAR]],
 // CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]
 // CHECK: store double* {{.*}}[[GLIN_VAR]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
   #pragma omp for simd
 // CHECK: call void @__kmpc_for_static_init_4(%ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i32* [[LB:%[^,]+]], i32* [[UB:%[^,]+]], i32* [[STRIDE:%[^,]+]], i32 1, i32 1)
@@ -209,7 +209,7 @@ void simple(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[SIMPLE_LOOP4_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
   #pragma omp for simd
 // CHECK: call void @__kmpc_for_static_init_4(%ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i32* [[LB:%[^,]+]], i32* [[UB:%[^,]+]], i32* [[STRIDE:%[^,]+]], i32 1, i32 1)
@@ -246,7 +246,7 @@ void simple(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[SIMPLE_LOOP5_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
 // CHECK-NOT: mul i32 %{{.+}}, 10
   #pragma omp for simd
@@ -413,7 +413,7 @@ int templ1(T a, T *z) {
 // CHECK-NEXT: br label {{%.+}}
 // CHECK: [[T1_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 // CHECK: ret i32 0
 //
 void inst_templ1() {
@@ -505,7 +505,7 @@ void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) {
   }
 // CHECK: [[IT_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 // CHECK: ret void
 }
 
@@ -584,7 +584,7 @@ void collapsed(float *a, float *b, float *c, float *d) {
 // CHECK-NEXT: store i32 3, i32* [[I:%[^,]+]]
 // CHECK-NEXT: store i32 5, i32* [[I:%[^,]+]]
 // CHECK-NEXT: store i16 9, i16* [[I:%[^,]+]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 // CHECK: ret void
 }
 
diff --git a/test/OpenMP/master_codegen.cpp b/test/OpenMP/master_codegen.cpp
index 1eb47e4..e6ea21a 100644
--- a/test/OpenMP/master_codegen.cpp
+++ b/test/OpenMP/master_codegen.cpp
@@ -9,7 +9,7 @@
 
 // CHECK:       [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 
-// CHECK:       define void [[FOO:@.+]]()
+// CHECK:       define {{.*}}void [[FOO:@.+]]()
 
 void foo() {}
 
@@ -19,23 +19,23 @@ int main() {
   // CHECK:       [[A_ADDR:%.+]] = alloca i8
   char a;
 
-// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
-// CHECK:       [[RES:%.+]] = call i32 @__kmpc_master([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK:       [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
+// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
 // CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
 // CHECK:       [[THEN]]
 // CHECK-NEXT:  store i8 2, i8* [[A_ADDR]]
-// CHECK-NEXT:  call void @__kmpc_end_master([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK-NEXT:  call {{.*}}void @__kmpc_end_master([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
 // CHECK-NEXT:  br label {{%?}}[[EXIT]]
 // CHECK:       [[EXIT]]
 #pragma omp master
   a = 2;
-// CHECK:       [[RES:%.+]] = call i32 @__kmpc_master([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
 // CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
 // CHECK:       [[THEN]]
-// CHECK-NEXT:  invoke void [[FOO]]()
-// CHECK:       call void @__kmpc_end_master([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK-NEXT:  invoke {{.*}}void [[FOO]]()
+// CHECK:       call {{.*}}void @__kmpc_end_master([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
 // CHECK-NEXT:  br label {{%?}}[[EXIT]]
 // CHECK:       [[EXIT]]
 #pragma omp master
diff --git a/test/OpenMP/ordered_codegen.cpp b/test/OpenMP/ordered_codegen.cpp
index adc6ff6..7684623 100644
--- a/test/OpenMP/ordered_codegen.cpp
+++ b/test/OpenMP/ordered_codegen.cpp
@@ -53,7 +53,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -104,7 +104,7 @@ void dynamic1(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -158,7 +158,7 @@ void test_auto(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -209,7 +209,7 @@ void runtime(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 }
 
diff --git a/test/OpenMP/parallel_codegen.cpp b/test/OpenMP/parallel_codegen.cpp
index 16c9c59..907d135 100644
--- a/test/OpenMP/parallel_codegen.cpp
+++ b/test/OpenMP/parallel_codegen.cpp
@@ -34,14 +34,14 @@ int main (int argc, char **argv) {
   return tmain(argv);
 }
 
-// CHECK-LABEL: define {{[a-z]*[ ]?i32}} @main({{i32[ ]?[a-z]*}} %argc, i8** %argv)
+// CHECK-LABEL: define {{[a-z\_\b]*[ ]?i32}} @main({{i32[ ]?[a-z]*}} %argc, i8** %argv)
 // CHECK:       [[AGG_CAPTURED:%.+]] = alloca %struct.anon
 // CHECK:       [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
 // CHECK-NEXT:  store i32* {{%[a-z0-9.]+}}, i32** [[ARGC_REF]]
 // CHECK-NEXT:  [[BITCAST:%.+]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8*
-// CHECK-NEXT:  call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]])
+// CHECK-NEXT:  call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]])
 // CHECK-NEXT:  [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
-// CHECK-NEXT:  [[RET:%.+]] = call {{[a-z]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
+// CHECK-NEXT:  [[RET:%.+]] = call {{[a-z\_\b]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
 // CHECK-NEXT:  ret i32 [[RET]]
 // CHECK-NEXT:  }
 // CHECK-DEBUG-LABEL: define i32 @main(i32 %argc, i8** %argv)
@@ -61,7 +61,7 @@ int main (int argc, char **argv) {
 // CHECK-DEBUG-NEXT:  ret i32 [[RET]]
 // CHECK-DEBUG-NEXT:  }
 
-// CHECK:       define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context)
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context)
 // CHECK:       #[[FN_ATTRS:[0-9]+]]
 // CHECK:       [[CONTEXT_ADDR:%.+]] = alloca %struct.anon*
 // CHECK:       store %struct.anon* %__context, %struct.anon** [[CONTEXT_ADDR]]
@@ -69,10 +69,10 @@ int main (int argc, char **argv) {
 // CHECK-NEXT:  [[ARGC_PTR_REF:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[CONTEXT_PTR]], i32 0, i32 0
 // CHECK-NEXT:  [[ARGC_REF:%.+]] = load i32*, i32** [[ARGC_PTR_REF]]
 // CHECK-NEXT:  [[ARGC:%.+]] = load i32, i32* [[ARGC_REF]]
-// CHECK-NEXT:  invoke void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[ARGC]])
+// CHECK-NEXT:  invoke {{.*}}void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[ARGC]])
 // CHECK:       call {{.+}} @__kmpc_cancel_barrier(
 // CHECK:       ret void
-// CHECK:       call void @{{.+terminate.*|abort}}(
+// CHECK:       call {{.*}}void @{{.+terminate.*|abort}}(
 // CHECK-NEXT:  unreachable
 // CHECK-NEXT:  }
 // CHECK-DEBUG:       define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context)
@@ -90,17 +90,17 @@ int main (int argc, char **argv) {
 // CHECK-DEBUG-NEXT:  unreachable
 // CHECK-DEBUG-NEXT:  }
 
-// CHECK-DAG: define linkonce_odr void [[FOO]]({{i32[ ]?[a-z]*}} %argc)
-// CHECK-DAG: declare void @__kmpc_fork_call(%ident_t*, i32, void (i32*, i32*, ...)*, ...)
+// CHECK-DAG: define linkonce_odr {{.*}}void [[FOO]]({{i32[ ]?[a-z]*}} %argc)
+// CHECK-DAG: declare {{.*}}void @__kmpc_fork_call(%ident_t*, i32, void (i32*, i32*, ...)*, ...)
 // CHECK-DEBUG-DAG: define linkonce_odr void [[FOO]](i32 %argc)
 // CHECK-DEBUG-DAG: declare void @__kmpc_fork_call(%ident_t*, i32, void (i32*, i32*, ...)*, ...)
 
-// CHECK:       define linkonce_odr {{[a-z]*[ ]?i32}} [[TMAIN]](i8** %argc)
+// CHECK:       define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc)
 // CHECK:       [[AGG_CAPTURED:%.+]] = alloca %struct.anon.0
 // CHECK:       [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0
 // CHECK-NEXT:  store i8*** {{%[a-z0-9.]+}}, i8**** [[ARGC_REF]]
 // CHECK-NEXT:  [[BITCAST:%.+]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8*
-// CHECK-NEXT:  call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]])
+// CHECK-NEXT:  call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]])
 // CHECK-NEXT:  ret i32 0
 // CHECK-NEXT:  }
 // CHECK-DEBUG:       define linkonce_odr i32 [[TMAIN]](i8** %argc)
@@ -118,17 +118,17 @@ int main (int argc, char **argv) {
 // CHECK-DEBUG-NEXT:  ret i32 0
 // CHECK-DEBUG-NEXT:  }
 
-// CHECK:       define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context)
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context)
 // CHECK:       [[CONTEXT_ADDR:%.+]] = alloca %struct.anon.0*
 // CHECK:       store %struct.anon.0* %__context, %struct.anon.0** [[CONTEXT_ADDR]]
 // CHECK:       [[CONTEXT_PTR:%.+]] = load %struct.anon.0*, %struct.anon.0** [[CONTEXT_ADDR]]
 // CHECK-NEXT:  [[ARGC_PTR_REF:%.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* [[CONTEXT_PTR]], i32 0, i32 0
 // CHECK-NEXT:  [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_REF]]
 // CHECK-NEXT:  [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
-// CHECK-NEXT:  invoke void [[FOO1:@.+foo.+]](i8** [[ARGC]])
+// CHECK-NEXT:  invoke {{.*}}void [[FOO1:@.+foo.+]](i8** [[ARGC]])
 // CHECK:       call {{.+}} @__kmpc_cancel_barrier(
 // CHECK:       ret void
-// CHECK:       call void @{{.+terminate.*|abort}}(
+// CHECK:       call {{.*}}void @{{.+terminate.*|abort}}(
 // CHECK-NEXT:  unreachable
 // CHECK-NEXT:  }
 // CHECK-DEBUG:       define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context)
@@ -145,7 +145,7 @@ int main (int argc, char **argv) {
 // CHECK-DEBUG-NEXT:  unreachable
 // CHECK-DEBUG-NEXT:  }
 
-// CHECK: define linkonce_odr void [[FOO1]](i8** %argc)
+// CHECK: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc)
 // CHECK-DEBUG: define linkonce_odr void [[FOO1]](i8** %argc)
 
 // CHECK: attributes #[[FN_ATTRS]] = {{.+}} nounwind
diff --git a/test/OpenMP/parallel_copyin_codegen.cpp b/test/OpenMP/parallel_copyin_codegen.cpp
index e69ace7..dd0a9b6 100644
--- a/test/OpenMP/parallel_copyin_codegen.cpp
+++ b/test/OpenMP/parallel_copyin_codegen.cpp
@@ -58,16 +58,16 @@ int main() {
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
   // LAMBDA-LABEL: @main
-  // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8*
+  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8*
 #pragma omp parallel copyin(g)
   {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
 
     // threadprivate_g = g;
-    // LAMBDA: call i8* @__kmpc_threadprivate_cached({{.+}} [[G]]
+    // LAMBDA: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[G]]
     // LAMBDA: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}}
     // LAMBDA: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[G]] to i{{[0-9]+}}), %{{.+}}
     // LAMBDA: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
@@ -76,9 +76,9 @@ int main() {
     // LAMBDA: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
     // LAMBDA: [[DONE]]
 
-    // LAMBDA: call i32 @__kmpc_cancel_barrier(
+    // LAMBDA: call {{.*}}i32 @__kmpc_cancel_barrier(
     g = 1;
-    // LAMBDA: call{{( x86_thiscallcc)?}} void [[INNER_LAMBDA:@.+]](%{{.+}}*
+    // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}*
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
@@ -91,16 +91,16 @@ int main() {
 #elif defined(BLOCKS)
   // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212,
   // BLOCKS-LABEL: @main
-  // BLOCKS: call void {{%.+}}(i8
+  // BLOCKS: call {{.*}}void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8*
+  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8*
 #pragma omp parallel copyin(g)
   {
     // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
 
     // threadprivate_g = g;
-    // BLOCKS: call i8* @__kmpc_threadprivate_cached({{.+}} [[G]]
+    // BLOCKS: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[G]]
     // BLOCKS: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}}
     // BLOCKS: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[G]] to i{{[0-9]+}}), %{{.+}}
     // BLOCKS: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
@@ -109,16 +109,16 @@ int main() {
     // BLOCKS: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
     // BLOCKS: [[DONE]]
 
-    // BLOCKS: call i32 @__kmpc_cancel_barrier(
+    // BLOCKS: call {{.*}}i32 @__kmpc_cancel_barrier(
     g = 1;
     // BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}*
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
-    // BLOCKS: call void {{%.+}}(i8
+    // BLOCKS: call {{.*}}void {{%.+}}(i8
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
-      // BLOCKS: call i8* @__kmpc_threadprivate_cached({{.+}} [[G]]
+      // BLOCKS: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[G]]
       // BLOCKS: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}*
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
       // BLOCKS: ret
@@ -148,19 +148,19 @@ int main() {
 // CHECK-LABEL: @main
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN:@.+]]([[S_FLOAT_TY]]* [[TEST]], [[S_FLOAT_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[MAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[MAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
+// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[MAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
+// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[MAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
 // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
+// CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
 
 // threadprivate_t_var = t_var;
-// CHECK: call i8* @__kmpc_threadprivate_cached({{.+}} [[T_VAR]]
+// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[T_VAR]]
 // CHECK: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}}
 // CHECK: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[T_VAR]] to i{{[0-9]+}}), %{{.+}}
 // CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
@@ -169,11 +169,11 @@ int main() {
 // CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
 
 // threadprivate_vec = vec;
-// CHECK: call i8* @__kmpc_threadprivate_cached({{.+}} [[VEC]]
+// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VEC]]
 // CHECK: call void @llvm.memcpy{{.*}}(i8* %{{.+}}, i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
 
 // threadprivate_s_arr = s_arr;
-// CHECK: call i8* @__kmpc_threadprivate_cached({{.+}} [[S_ARR]]
+// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[S_ARR]]
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
 // CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
@@ -183,20 +183,20 @@ int main() {
 // CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
 
 // threadprivate_var = var;
-// CHECK: call i8* @__kmpc_threadprivate_cached({{.+}} [[VAR]]
+// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VAR]]
 // CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{%.+}}, [[S_FLOAT_TY]]* {{.*}}[[VAR]])
 // CHECK: [[DONE]]
 
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 
-// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
+// CHECK: define internal {{.*}}void [[MAIN_MICROTASK1]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
 
 // threadprivate_t_var = t_var;
-// CHECK: call i8* @__kmpc_threadprivate_cached({{.+}} [[T_VAR]]
+// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[T_VAR]]
 // CHECK: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}}
 // CHECK: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[T_VAR]] to i{{[0-9]+}}), %{{.+}}
 // CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
@@ -205,24 +205,24 @@ int main() {
 // CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
 // CHECK: [[DONE]]
 
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[TEST]], [[S_INT_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
+// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
+// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
+// CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
 
 // threadprivate_t_var = t_var;
-// CHECK: call i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_T_VAR]]
+// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_T_VAR]]
 // CHECK: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}}
 // CHECK: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}), %{{.+}}
 // CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
@@ -231,11 +231,11 @@ int main() {
 // CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
 
 // threadprivate_vec = vec;
-// CHECK: call i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VEC]]
-// CHECK: call void @llvm.memcpy{{.*}}(i8* %{{.+}}, i8* bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*),
+// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VEC]]
+// CHECK: call {{.*}}void @llvm.memcpy{{.*}}(i8* %{{.+}}, i8* bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*),
 
 // threadprivate_s_arr = s_arr;
-// CHECK: call i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_S_ARR]]
+// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_S_ARR]]
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
 // CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
@@ -245,20 +245,20 @@ int main() {
 // CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
 
 // threadprivate_var = var;
-// CHECK: call i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VAR]]
+// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VAR]]
 // CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{%.+}}, [[S_INT_TY]]* {{.*}}[[TMAIN_VAR]])
 // CHECK: [[DONE]]
 
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 
-// CHECK: define internal void [[TMAIN_MICROTASK1]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
+// CHECK: define internal {{.*}}void [[TMAIN_MICROTASK1]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
 
 // threadprivate_t_var = t_var;
-// CHECK: call i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_T_VAR]]
+// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_T_VAR]]
 // CHECK: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}}
 // CHECK: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}), %{{.+}}
 // CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
@@ -267,7 +267,7 @@ int main() {
 // CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
 // CHECK: [[DONE]]
 
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 
 #endif
diff --git a/test/OpenMP/parallel_firstprivate_codegen.cpp b/test/OpenMP/parallel_firstprivate_codegen.cpp
index 3f61362a..760b961 100644
--- a/test/OpenMP/parallel_firstprivate_codegen.cpp
+++ b/test/OpenMP/parallel_firstprivate_codegen.cpp
@@ -56,13 +56,13 @@ int main() {
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
   // LAMBDA-LABEL: @main
-  // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
   // LAMBDA: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // LAMBDA: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]]
   // LAMBDA: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8*
-  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
+  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
 #pragma omp parallel firstprivate(g)
   {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
@@ -73,12 +73,12 @@ int main() {
     // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR]]
     // LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G_REF]]
     // LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
-    // LAMBDA: call i32 @__kmpc_cancel_barrier(
+    // LAMBDA: call {{.*}}i32 @__kmpc_cancel_barrier(
     g = 1;
     // LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
-    // LAMBDA: call{{( x86_thiscallcc)?}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+    // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
@@ -94,13 +94,13 @@ int main() {
 #elif defined(BLOCKS)
   // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212,
   // BLOCKS-LABEL: @main
-  // BLOCKS: call void {{%.+}}(i8
+  // BLOCKS: call {{.*}}void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
   // BLOCKS: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // BLOCKS: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]]
   // BLOCKS: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8*
-  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
+  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
 #pragma omp parallel firstprivate(g)
   {
     // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
@@ -111,13 +111,13 @@ int main() {
     // BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR]]
     // BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G_REF]]
     // BLOCKS: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
-    // BLOCKS: call i32 @__kmpc_cancel_barrier(
+    // BLOCKS: call {{.*}}i32 @__kmpc_cancel_barrier(
     g = 1;
     // BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
-    // BLOCKS: call void {{%.+}}(i8
+    // BLOCKS: call {{.*}}void {{%.+}}(i8
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
@@ -150,12 +150,12 @@ int main() {
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
 // CHECK: %{{.+}} = bitcast [[CAP_MAIN_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void
 // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}})
+// CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}})
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
@@ -189,7 +189,7 @@ int main() {
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
 // CHECK: ret void
@@ -197,11 +197,11 @@ int main() {
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
+// CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
@@ -235,7 +235,7 @@ int main() {
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
 // CHECK: ret void
diff --git a/test/OpenMP/parallel_for_codegen.cpp b/test/OpenMP/parallel_for_codegen.cpp
index 6262e76..2a387d9 100644
--- a/test/OpenMP/parallel_for_codegen.cpp
+++ b/test/OpenMP/parallel_for_codegen.cpp
@@ -27,7 +27,7 @@ void with_var_schedule() {
 // CHECK: [[CHUNK_SIZE:%.+]] = sext i8 [[CHUNK_VAL]] to i64
 // CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC:@[^,]+]], i32 [[GTID:%[^,]+]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]])
 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: __kmpc_cancel_barrier
+// CHECK: __kmpc_barrier
 #pragma omp parallel for schedule(static, char(a))
   for (unsigned long long i = 1; i < 2; ++i) {
   }
@@ -73,7 +73,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -117,7 +117,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -180,7 +180,7 @@ void static_chunked(float *a, float *b, float *c, float *d) {
 
 // CHECK: [[O_LOOP1_END]]
 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -225,7 +225,7 @@ void dynamic1(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -270,7 +270,7 @@ void guided7(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -322,7 +322,7 @@ void test_auto(float *a, float *b, float *c, float *d) {
 // CHECK: [[O_LOOP1_END]]
 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -369,7 +369,7 @@ void runtime(float *a, float *b, float *c, float *d) {
 // CHECK: [[O_LOOP1_END]]
 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
-// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
+// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -386,14 +386,14 @@ void parallel_for(float *a) {
   // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
   // TERM_DEBUG-NOT: __kmpc_global_thread_num
   // TERM_DEBUG:     call void @__kmpc_for_static_fini({{.+}}), !dbg [[DBG_LOC_END:![0-9]+]]
-  // TERM_DEBUG:     call {{.+}} @__kmpc_cancel_barrier({{.+}}), !dbg [[DBG_LOC_CANCEL:![0-9]+]]
+  // TERM_DEBUG:     call {{.+}} @__kmpc_barrier({{.+}}), !dbg [[DBG_LOC_CANCEL:![0-9]+]]
   // TERM_DEBUG:     [[TERM_LPAD]]
   // TERM_DEBUG:     call void @__clang_call_terminate
   // TERM_DEBUG:     unreachable
   // CLEANUP-NOT: __kmpc_global_thread_num
   // CLEANUP:     call void @__kmpc_for_static_init_4u({{.+}})
   // CLEANUP:     call void @__kmpc_for_static_fini({{.+}})
-  // CLEANUP:     call {{.+}} @__kmpc_cancel_barrier({{.+}})
+  // CLEANUP:     call {{.+}} @__kmpc_barrier({{.+}})
   for (unsigned i = 131071; i <= 2147483647; i += 127)
     a[i] += foo();
 }
diff --git a/test/OpenMP/parallel_for_simd_codegen.cpp b/test/OpenMP/parallel_for_simd_codegen.cpp
index 3490b8f..adde424 100644
--- a/test/OpenMP/parallel_for_simd_codegen.cpp
+++ b/test/OpenMP/parallel_for_simd_codegen.cpp
@@ -63,7 +63,7 @@ void simple(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[SIMPLE_LOOP1_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
   long long k = get_val();
 
@@ -112,7 +112,7 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
 // CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
 // CHECK-NEXT: store i64 [[LIN_ADD2]], i64* %{{.+}}
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
   int lin = 12;
   #pragma omp parallel for simd linear(lin : get_val()), linear(g_ptr)
@@ -186,7 +186,7 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[GLIN_VAR:%.+]] = load double**, double*** %
 // CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]
 // CHECK: store double* {{.*}}[[GLIN_VAR]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
   #pragma omp parallel for simd
 // CHECK: call void @__kmpc_for_static_init_4(%ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i32* [[LB:%[^,]+]], i32* [[UB:%[^,]+]], i32* [[STRIDE:%[^,]+]], i32 1, i32 1)
@@ -223,7 +223,7 @@ void simple(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[SIMPLE_LOOP4_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
   #pragma omp parallel for simd
 // CHECK: call void @__kmpc_for_static_init_4(%ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i32* [[LB:%[^,]+]], i32* [[UB:%[^,]+]], i32* [[STRIDE:%[^,]+]], i32 1, i32 1)
@@ -260,7 +260,7 @@ void simple(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[SIMPLE_LOOP5_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
 // CHECK-NOT: mul i32 %{{.+}}, 10
   #pragma omp parallel for simd
@@ -315,7 +315,7 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[A_PRIV_VAL:%.+]] = load i32, i32* [[A_PRIV]],
 // CHECK-NEXT: store i32 [[A_PRIV_VAL]], i32* %{{.+}},
 // CHECK-NEXT: br label
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
   }
   int R;
   {
@@ -364,7 +364,7 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[RED:%.+]] = mul nsw i32 %{{.+}}, [[R_PRIV_VAL]]
 // CHECK-NEXT: store i32 [[RED]], i32* %{{.+}},
 // CHECK-NEXT: call void @__kmpc_end_reduce_nowait(
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
   }
 }
 
@@ -473,7 +473,7 @@ void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) {
   }
 // CHECK: [[IT_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 // CHECK: ret void
 }
 
@@ -552,7 +552,7 @@ void collapsed(float *a, float *b, float *c, float *d) {
 // CHECK: store i32 3, i32* [[I:%[^,]+]]
 // CHECK: store i32 5, i32* [[I:%[^,]+]]
 // CHECK: store i16 9, i16* [[I:%[^,]+]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 // CHECK: ret void
 }
 
@@ -672,7 +672,7 @@ void widened(float *a, float *b, float *c, float *d) {
 // CHECK-NEXT: br label {{%.+}}
 // CHECK: [[T1_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 // CHECK: ret void
 //
 // TERM_DEBUG-LABEL: bar
diff --git a/test/OpenMP/parallel_if_codegen.cpp b/test/OpenMP/parallel_if_codegen.cpp
index 3461743..ace20ec 100644
--- a/test/OpenMP/parallel_if_codegen.cpp
+++ b/test/OpenMP/parallel_if_codegen.cpp
@@ -14,27 +14,27 @@ void fn6();
 
 int Arg;
 
-// CHECK-LABEL: define void @{{.+}}gtid_test
+// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
-// CHECK:  call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, {{.+}}* [[GTID_TEST_REGION1:@.+]] to void
+// CHECK:  call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, {{.+}}* [[GTID_TEST_REGION1:@.+]] to void
 #pragma omp parallel
 #pragma omp parallel if (false)
   gtid_test();
 // CHECK: ret void
 }
 
-// CHECK: define internal void [[GTID_TEST_REGION1]](i{{.+}}* [[GTID_PARAM:%.+]], i
+// CHECK: define internal {{.*}}void [[GTID_TEST_REGION1]](i{{.+}}* [[GTID_PARAM:%.+]], i
 // CHECK: store i{{[0-9]+}}* [[GTID_PARAM]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_ADDR]]
-// CHECK: call void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
 // CHECK: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
 // CHECK: call void [[GTID_TEST_REGION2:@.+]](i{{[0-9]+}}* [[GTID_ADDR]]
-// CHECK: call void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
 // CHECK: ret void
 
-// CHECK: define internal void [[GTID_TEST_REGION2]](
-// CHECK: call void @{{.+}}gtid_test
+// CHECK: define internal {{.*}}void [[GTID_TEST_REGION2]](
+// CHECK: call {{.*}}void @{{.+}}gtid_test
 // CHECK: ret void
 
 template <typename T>
@@ -50,26 +50,26 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
-// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
-// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN4:@.+]] to void
+// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN4:@.+]] to void
 #pragma omp parallel if (true)
   fn4();
-// CHECK: call void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]],
 // CHECK: call void [[CAP_FN5:@.+]](i32* [[GTID_ADDR]],
-// CHECK: call void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 #pragma omp parallel if (false)
   fn5();
 
 // CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
 // CHECK: [[OMP_THEN]]
-// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN6:@.+]] to void
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN6:@.+]] to void
 // CHECK: br label %[[OMP_END:.+]]
 // CHECK: [[OMP_ELSE]]
-// CHECK: call void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]],
 // CHECK: call void [[CAP_FN6]](i32* [[GTID_ADDR]],
-// CHECK: call void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: br label %[[OMP_END]]
 // CHECK: [[OMP_END]]
 #pragma omp parallel if (Arg)
@@ -78,47 +78,47 @@ int main() {
   return tmain(Arg);
 }
 
-// CHECK: define internal void [[CAP_FN4]]
-// CHECK: call void @{{.+}}fn4
+// CHECK: define internal {{.*}}void [[CAP_FN4]]
+// CHECK: call {{.*}}void @{{.+}}fn4
 // CHECK: ret void
 
-// CHECK: define internal void [[CAP_FN5]]
-// CHECK: call void @{{.+}}fn5
+// CHECK: define internal {{.*}}void [[CAP_FN5]]
+// CHECK: call {{.*}}void @{{.+}}fn5
 // CHECK: ret void
 
-// CHECK: define internal void [[CAP_FN6]]
-// CHECK: call void @{{.+}}fn6
+// CHECK: define internal {{.*}}void [[CAP_FN6]]
+// CHECK: call {{.*}}void @{{.+}}fn6
 // CHECK: ret void
 
 // CHECK-LABEL: define {{.+}} @{{.+}}tmain
-// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
-// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN1:@.+]] to void
-// CHECK: call void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
+// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN1:@.+]] to void
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]],
 // CHECK: call void [[CAP_FN2:@.+]](i32* [[GTID_ADDR]],
-// CHECK: call void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
 // CHECK: [[OMP_THEN]]
-// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN3:@.+]] to void
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN3:@.+]] to void
 // CHECK: br label %[[OMP_END:.+]]
 // CHECK: [[OMP_ELSE]]
-// CHECK: call void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]],
 // CHECK: call void [[CAP_FN3]](i32* [[GTID_ADDR]],
-// CHECK: call void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: br label %[[OMP_END]]
 // CHECK: [[OMP_END]]
 
-// CHECK: define internal void [[CAP_FN1]]
-// CHECK: call void @{{.+}}fn1
+// CHECK: define internal {{.*}}void [[CAP_FN1]]
+// CHECK: call {{.*}}void @{{.+}}fn1
 // CHECK: ret void
 
-// CHECK: define internal void [[CAP_FN2]]
-// CHECK: call void @{{.+}}fn2
+// CHECK: define internal {{.*}}void [[CAP_FN2]]
+// CHECK: call {{.*}}void @{{.+}}fn2
 // CHECK: ret void
 
-// CHECK: define internal void [[CAP_FN3]]
-// CHECK: call void @{{.+}}fn3
+// CHECK: define internal {{.*}}void [[CAP_FN3]]
+// CHECK: call {{.*}}void @{{.+}}fn3
 // CHECK: ret void
 
 #endif
diff --git a/test/OpenMP/parallel_num_threads_codegen.cpp b/test/OpenMP/parallel_num_threads_codegen.cpp
index 2342c47..d744e5e 100644
--- a/test/OpenMP/parallel_num_threads_codegen.cpp
+++ b/test/OpenMP/parallel_num_threads_codegen.cpp
@@ -43,16 +43,16 @@ int main() {
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 // CHECK-DAG:   [[S_ADDR:%.+]] = alloca [[S_TY]]
 // CHECK-DAG:   [[A_ADDR:%.+]] = alloca i8
-// CHECK-DAG:   [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
+// CHECK-DAG:   [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
 // CHECK-DAG:   call {{.*}} [[S_TY_CONSTR:@.+]]([[S_TY]]* [[S_ADDR]], [[INTPTR_T_TY]] [[INTPTR_T_TY_ATTR:(signext )?]]0)
 // CHECK:       [[S_CHAR_OP:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* [[S_ADDR]])
 // CHECK:       store i8 [[S_CHAR_OP]], i8* [[A_ADDR]]
-// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 2)
-// CHECK:       call void {{.*}} @__kmpc_fork_call(
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 2)
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
 // CHECK:       [[A_VAL:%.+]] = load i8, i8* [[A_ADDR]]
 // CHECK:       [[RES:%.+]] = sext i8 [[A_VAL]] to i32
-// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]])
-// CHECK:       call void {{.*}} @__kmpc_fork_call(
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]])
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
 // CHECK:       invoke{{.*}} [[INT_TY:i[0-9]+]] [[TMAIN_CHAR_5:@.+]]()
 // CHECK:       invoke{{.*}} [[INT_TY]] [[TMAIN_S_1:@.+]]()
 // CHECK:       call {{.*}} [[S_TY_DESTR:@.+]]([[S_TY]]* [[S_ADDR]])
@@ -60,24 +60,24 @@ int main() {
 // CHECK:       }
 
 // CHECK:       define{{.*}} [[INT_TY]] [[TMAIN_CHAR_5]]()
-// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
-// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 5)
-// CHECK:       call void {{.*}} @__kmpc_fork_call(
-// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 23)
-// CHECK:       call void {{.*}} @__kmpc_fork_call(
+// CHECK:       [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 5)
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 23)
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
 // CHECK:       ret [[INT_TY]] 0
 // CHECK-NEXT:  }
 
 // CHECK:       define{{.*}} [[INT_TY]] [[TMAIN_S_1]]()
-// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
-// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 1)
-// CHECK:       call void {{.*}} @__kmpc_fork_call(
+// CHECK:       [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 1)
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
 // CHECK:       call {{.*}} [[S_TY_CONSTR]]([[S_TY]]* [[S_TEMP:%.+]], [[INTPTR_T_TY]] [[INTPTR_T_TY_ATTR]]23)
 // CHECK:       [[S_CHAR_OP:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP]]([[S_TY]]* [[S_TEMP]])
 // CHECK:       [[RES:%.+]] = sext {{.*}}i8 [[S_CHAR_OP]] to i32
-// CHECK:       call void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]])
+// CHECK:       call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]])
 // CHECK:       call {{.*}} [[S_TY_DESTR]]([[S_TY]]* [[S_TEMP]])
-// CHECK:       call void {{.*}} @__kmpc_fork_call(
+// CHECK:       call {{.*}}void {{.*}} @__kmpc_fork_call(
 // CHECK:       ret [[INT_TY]] 0
 // CHECK:       }
 
diff --git a/test/OpenMP/parallel_private_codegen.cpp b/test/OpenMP/parallel_private_codegen.cpp
index 99e2d4d..55f25c6 100644
--- a/test/OpenMP/parallel_private_codegen.cpp
+++ b/test/OpenMP/parallel_private_codegen.cpp
@@ -19,9 +19,9 @@ struct S {
 volatile int g = 1212;
 
 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK: [[CAP_MAIN_TY:%.+]] = type { [2 x i{{[0-9]+}}]*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]* }
+// CHECK: [[CAP_MAIN_TY:%.+]] = type { i8 }
 // CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK: [[CAP_TMAIN_TY:%.+]] = type { [2 x i{{[0-9]+}}]*, i{{[0-9]+}}*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
+// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i8 }
 template <typename T>
 T tmain() {
   S<T> test;
@@ -41,13 +41,12 @@ int main() {
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
   // LAMBDA-LABEL: @main
-  // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // LAMBDA: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]]
-  // LAMBDA: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8*
-  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
+  // LAMBDA-NOT: = getelementptr inbounds %{{.+}},
+  // LAMBDA: [[ARG:%.+]] = bitcast %{{.+}}* %{{.+}} to i8*
+  // LAMBDA: call{{.*}} void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
 #pragma omp parallel private(g)
   {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
@@ -57,7 +56,7 @@ int main() {
     // LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
-    // LAMBDA: call{{( x86_thiscallcc)?}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+    // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
@@ -73,13 +72,12 @@ int main() {
 #elif defined(BLOCKS)
   // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212,
   // BLOCKS-LABEL: @main
-  // BLOCKS: call void {{%.+}}(i8
+  // BLOCKS: call{{.*}} void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // BLOCKS: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]]
-  // BLOCKS: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8*
-  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
+  // BLOCKS-NOT: = getelementptr inbounds %{{.+}},
+  // BLOCKS: [[ARG:%.+]] = bitcast %{{.+}}* %{{.+}} to i8*
+  // BLOCKS: call{{.*}} void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
 #pragma omp parallel private(g)
   {
     // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
@@ -90,7 +88,7 @@ int main() {
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
-    // BLOCKS: call void {{%.+}}(i8
+    // BLOCKS: call{{.*}} void {{%.+}}(i8
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
diff --git a/test/OpenMP/parallel_proc_bind_codegen.cpp b/test/OpenMP/parallel_proc_bind_codegen.cpp
index 2a8eaee..34a64de 100644
--- a/test/OpenMP/parallel_proc_bind_codegen.cpp
+++ b/test/OpenMP/parallel_proc_bind_codegen.cpp
@@ -36,16 +36,16 @@ int main() {
 }
 
 // CHECK-LABEL: @main
-// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
-// CHECK:       call void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 4)
-// CHECK:       call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
-// CHECK:       call void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 3)
-// CHECK:       call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK:       [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
+// CHECK:       call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 4)
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK:       call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 3)
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
 
 // CHECK-LABEL: @{{.+}}tmain
-// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
-// CHECK:       call void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 2)
-// CHECK:       call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+// CHECK:       [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEF_LOC_2]])
+// CHECK:       call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID]], i32 2)
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
 // CHECK:       ret i32 0
 // CHECK-NEXT:  }
 
diff --git a/test/OpenMP/parallel_sections_codegen.cpp b/test/OpenMP/parallel_sections_codegen.cpp
index c16cc8d..ba1bf6c 100644
--- a/test/OpenMP/parallel_sections_codegen.cpp
+++ b/test/OpenMP/parallel_sections_codegen.cpp
@@ -73,7 +73,7 @@ int main() {
 // CHECK:      [[INNER_LOOP_END]]
   }
 // CHECK:      call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK:      call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]],
+// CHECK:      call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]],
   return tmain<int>();
 }
 
@@ -89,7 +89,7 @@ int main() {
 // CHECK:       call void @__kmpc_end_single(
 // CHECK-NEXT:  br label %[[END]]
 // CHECK:       [[END]]
-// CHECK-NEXT:  call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]],
+// CHECK-NEXT:  call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]],
 // CHECK-NEXT:  ret
 // CHECK:       [[TERM_LPAD]]
 // CHECK:       call void @__clang_call_terminate(i8*
diff --git a/test/OpenMP/sections_codegen.cpp b/test/OpenMP/sections_codegen.cpp
index 11cc900..d25230a 100644
--- a/test/OpenMP/sections_codegen.cpp
+++ b/test/OpenMP/sections_codegen.cpp
@@ -72,7 +72,7 @@ int main() {
 // CHECK:      [[INNER_LOOP_END]]
   }
 // CHECK:      call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK:      call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_SECTIONS_LOC]],
+// CHECK:      call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_SECTIONS_LOC]],
 #pragma omp sections nowait
   {
     foo();
@@ -96,8 +96,8 @@ int main() {
 // CHECK-NEXT:  br label %[[END]]
 // CHECK:       [[END]]
 // CHECK-NEXT:  call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_SINGLE_LOC]],
-// CHECK-NEXT:  call i32 @__kmpc_cancel_barrier(
-// CHECK-NEXT:  ret
+// CHECK:  call i32 @__kmpc_cancel_barrier(
+// CHECK:  ret
 // CHECK:       [[TERM_LPAD]]
 // CHECK:       call void @__clang_call_terminate(i8*
 // CHECK-NEXT:  unreachable
diff --git a/test/OpenMP/sections_firstprivate_codegen.cpp b/test/OpenMP/sections_firstprivate_codegen.cpp
index ba49864..4ec16ba 100644
--- a/test/OpenMP/sections_firstprivate_codegen.cpp
+++ b/test/OpenMP/sections_firstprivate_codegen.cpp
@@ -84,7 +84,7 @@ int main() {
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G]]
     // LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
-    // LAMBDA: call i32 @__kmpc_cancel_barrier(
+    // LAMBDA: call void @__kmpc_barrier(
     g = 1;
     // LAMBDA: call void @__kmpc_for_static_init_4(
     // LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
@@ -126,7 +126,7 @@ int main() {
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
     // BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G]]
     // BLOCKS: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
-    // BLOCKS: call i32 @__kmpc_cancel_barrier(
+    // BLOCKS: call void @__kmpc_barrier(
     g = 1;
     // BLOCKS: call void @__kmpc_for_static_init_4(
     // BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
@@ -199,7 +199,7 @@ int main() {
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
 // CHECK: call void @__kmpc_end_single(
 
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 
 // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
 
@@ -261,7 +261,7 @@ int main() {
 // Synchronization for initialization.
 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 
 // CHECK: call void @__kmpc_for_static_init_4(
 // CHECK: call void @__kmpc_for_static_fini(
diff --git a/test/OpenMP/sections_private_codegen.cpp b/test/OpenMP/sections_private_codegen.cpp
index 4bad714..90331cb 100644
--- a/test/OpenMP/sections_private_codegen.cpp
+++ b/test/OpenMP/sections_private_codegen.cpp
@@ -19,9 +19,9 @@ struct S {
 volatile double g;
 
 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK: [[CAP_MAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]* }
+// CHECK: [[CAP_MAIN_TY:%.+]] = type { i8 }
 // CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
+// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i8 }
 template <typename T>
 T tmain() {
   S<T> test;
@@ -43,10 +43,10 @@ int main() {
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global double
   // LAMBDA-LABEL: @main
-  // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
 #pragma omp parallel
 #pragma omp sections private(g)
   {
@@ -54,12 +54,12 @@ int main() {
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
     // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
     g = 1;
-    // LAMBDA: call void @__kmpc_for_static_init_4(
+    // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
     // LAMBDA: store volatile double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]]
-    // LAMBDA: call{{( x86_thiscallcc)?}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
-    // LAMBDA: call void @__kmpc_for_static_fini(
+    // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+    // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
 #pragma omp section
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
@@ -76,10 +76,10 @@ int main() {
 #elif defined(BLOCKS)
   // BLOCKS: [[G:@.+]] = global double
   // BLOCKS-LABEL: @main
-  // BLOCKS: call void {{%.+}}(i8
+  // BLOCKS: call {{.*}}void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* {{.+}})
+  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* {{.+}})
 #pragma omp parallel
 #pragma omp sections private(g)
     {
@@ -87,13 +87,13 @@ int main() {
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca double,
     // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
     g = 1;
-    // BLOCKS: call void @__kmpc_for_static_init_4(
+    // BLOCKS: call {{.*}}void @__kmpc_for_static_init_4(
     // BLOCKS: store volatile double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: double* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
-    // BLOCKS: call void {{%.+}}(i8
-    // BLOCKS: call void @__kmpc_for_static_fini(
+    // BLOCKS: call {{.*}}void {{%.+}}(i8
+    // BLOCKS: call {{.*}}void @__kmpc_for_static_fini(
 #pragma omp section
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
diff --git a/test/OpenMP/simd_metadata.c b/test/OpenMP/simd_metadata.c
index 2a95fef1..e7e35dd 100644
--- a/test/OpenMP/simd_metadata.c
+++ b/test/OpenMP/simd_metadata.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
+// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
 // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC
 // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX
 
@@ -16,6 +17,7 @@ void h1(float *c, float *a, double b[], int size)
 
 // X86-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
 // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
+// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
 // PPC-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
 // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
 
@@ -25,6 +27,7 @@ void h1(float *c, float *a, double b[], int size)
 
 // X86-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
 // X86-AVX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
+// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
 // PPC-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
 // PPC-QPX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
 
diff --git a/test/OpenMP/single_codegen.cpp b/test/OpenMP/single_codegen.cpp
index 0593b2a..d81739e 100644
--- a/test/OpenMP/single_codegen.cpp
+++ b/test/OpenMP/single_codegen.cpp
@@ -63,7 +63,7 @@ int main() {
 // CHECK-NEXT:  call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
 // CHECK-NEXT:  br label {{%?}}[[EXIT]]
 // CHECK:       [[EXIT]]
-// CHECK:       call{{.*}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_SINGLE_LOC]], i32 [[GTID]])
+// CHECK:       call{{.*}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_SINGLE_LOC]], i32 [[GTID]])
 #pragma omp single
   a = 2;
 // CHECK:       store i32 0, i32* [[DID_IT]]
diff --git a/test/OpenMP/single_firstprivate_codegen.cpp b/test/OpenMP/single_firstprivate_codegen.cpp
index e30c00d..059108b 100644
--- a/test/OpenMP/single_firstprivate_codegen.cpp
+++ b/test/OpenMP/single_firstprivate_codegen.cpp
@@ -182,7 +182,7 @@ int main() {
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
 // CHECK: call void @__kmpc_end_single(
 
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 
 // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
 
diff --git a/test/OpenMP/single_private_codegen.cpp b/test/OpenMP/single_private_codegen.cpp
index 4f78d08..a7fb2ed 100644
--- a/test/OpenMP/single_private_codegen.cpp
+++ b/test/OpenMP/single_private_codegen.cpp
@@ -19,9 +19,9 @@ struct S {
 volatile double g;
 
 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK: [[CAP_MAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]* }
+// CHECK: [[CAP_MAIN_TY:%.+]] = type { i8 }
 // CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
+// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i8 }
 template <typename T>
 T tmain() {
   S<T> test;
@@ -42,10 +42,10 @@ int main() {
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global double
   // LAMBDA-LABEL: @main
-  // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
+  // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
 #pragma omp parallel
 #pragma omp single private(g)
   {
@@ -53,12 +53,12 @@ int main() {
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
     // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
     g = 1;
-    // LAMBDA: call i32 @__kmpc_single(
+    // LAMBDA: call {{.*}}i32 @__kmpc_single(
     // LAMBDA: store volatile double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]]
-    // LAMBDA: call{{( x86_thiscallcc)?}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
-    // LAMBDA: call void @__kmpc_end_single(
+    // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+    // LAMBDA: call {{.*}}void @__kmpc_end_single(
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
@@ -74,10 +74,10 @@ int main() {
 #elif defined(BLOCKS)
   // BLOCKS: [[G:@.+]] = global double
   // BLOCKS-LABEL: @main
-  // BLOCKS: call void {{%.+}}(i8
+  // BLOCKS: call {{.*}}void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* {{.+}})
+  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* {{.+}})
 #pragma omp parallel
 #pragma omp single private(g)
   {
@@ -85,13 +85,13 @@ int main() {
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca double,
     // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
     g = 1;
-    // BLOCKS: call i32 @__kmpc_single(
+    // BLOCKS: call {{.*}}i32 @__kmpc_single(
     // BLOCKS: store volatile double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: double* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
-    // BLOCKS: call void {{%.+}}(i8
-    // BLOCKS: call void @__kmpc_end_single(
+    // BLOCKS: call {{.*}}void {{%.+}}(i8
+    // BLOCKS: call {{.*}}void @__kmpc_end_single(
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
diff --git a/test/OpenMP/task_ast_print.cpp b/test/OpenMP/task_ast_print.cpp
index 55407c1..5fd4103 100644
--- a/test/OpenMP/task_ast_print.cpp
+++ b/test/OpenMP/task_ast_print.cpp
@@ -33,7 +33,7 @@ T tmain(T argc, T *argv) {
   T b = argc, c, d, e, f, g;
   static T a;
   S<T> s;
-#pragma omp task untied
+#pragma omp task untied depend(in : argc)
   a = 2;
 #pragma omp task default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) final(S<T>::TS > 0)
   foo();
@@ -46,7 +46,7 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: int b = argc, c, d, e, f, g;
 // CHECK-NEXT: static int a;
 // CHECK-NEXT: S<int> s;
-// CHECK-NEXT: #pragma omp task untied
+// CHECK-NEXT: #pragma omp task untied depend(in : argc)
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<int>::TS > 0)
 // CHECK-NEXT: foo()
@@ -56,7 +56,7 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: long b = argc, c, d, e, f, g;
 // CHECK-NEXT: static long a;
 // CHECK-NEXT: S<long> s;
-// CHECK-NEXT: #pragma omp task untied
+// CHECK-NEXT: #pragma omp task untied depend(in : argc)
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<long>::TS > 0)
 // CHECK-NEXT: foo()
@@ -66,7 +66,7 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: T b = argc, c, d, e, f, g;
 // CHECK-NEXT: static T a;
 // CHECK-NEXT: S<T> s;
-// CHECK-NEXT: #pragma omp task untied
+// CHECK-NEXT: #pragma omp task untied depend(in : argc)
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<T>::TS > 0)
 // CHECK-NEXT: foo()
@@ -82,12 +82,12 @@ int main(int argc, char **argv) {
 #pragma omp threadprivate(a)
   Enum ee;
 // CHECK: Enum ee;
-#pragma omp task untied mergeable
-  // CHECK-NEXT: #pragma omp task untied mergeable
+#pragma omp task untied mergeable depend(out:argv[1])
+  // CHECK-NEXT: #pragma omp task untied mergeable depend(out : argv[1])
   a = 2;
 // CHECK-NEXT: a = 2;
-#pragma omp task default(none), private(argc, b) firstprivate(argv) if (argc > 0) final(a > 0)
-  // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) if(argc > 0) final(a > 0)
+#pragma omp task default(none), private(argc, b) firstprivate(argv) if (argc > 0) final(a > 0) depend(inout : a)
+  // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) if(argc > 0) final(a > 0) depend(inout : a)
   foo();
   // CHECK-NEXT: foo();
   return tmain<int, 5>(b, &b) + tmain<long, 1>(x, &x);
diff --git a/test/OpenMP/task_codegen.cpp b/test/OpenMP/task_codegen.cpp
index 1c28fbc..cce0a13 100644
--- a/test/OpenMP/task_codegen.cpp
+++ b/test/OpenMP/task_codegen.cpp
@@ -7,8 +7,10 @@
 #define HEADER
 
 // CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* }
-// CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [[STRUCT_S:%.+]]* }
+// CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [2 x [[STRUCT_S:%.+]]]* }
+// CHECK-DAG: [[STRUCT_SHAREDS1:%.+]] = type { [2 x [[STRUCT_S:%.+]]]* }
 // CHECK-DAG: [[KMP_TASK_T:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)* }
+// CHECK-DAG: [[KMP_DEPEND_INFO:%.+]] = type { i64, i64, i8 }
 struct S {
   int a;
   S() : a(0) {}
@@ -19,14 +21,14 @@ int a;
 // CHECK-LABEL : @main
 int main() {
 // CHECK: [[B:%.+]] = alloca i8
-// CHECK: [[S:%.+]] = alloca [[STRUCT_S]]
+// CHECK: [[S:%.+]] = alloca [2 x [[STRUCT_S]]]
   char b;
-  S s;
+  S s[2];
 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* @{{.+}})
 // CHECK: [[B_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES:%.+]], i32 0, i32 0
 // CHECK: store i8* [[B]], i8** [[B_REF]]
 // CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES]], i32 0, i32 1
-// CHECK: store [[STRUCT_S]]* [[S]], [[STRUCT_S]]** [[S_REF]]
+// CHECK: store [2 x [[STRUCT_S]]]* [[S]], [2 x [[STRUCT_S]]]** [[S_REF]]
 // CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 1, i64 32, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY1:@.+]] to i32 (i32, i8*)*))
 // CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]]
@@ -39,7 +41,47 @@ int main() {
   {
     a = 15;
     b = a;
-    s.a = 10;
+    s[0].a = 10;
+  }
+// CHECK: [[S_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS1]], [[STRUCT_SHAREDS1]]* [[CAPTURES:%.+]], i32 0, i32 0
+// CHECK: store [2 x [[STRUCT_S]]]* [[S]], [2 x [[STRUCT_S]]]** [[S_REF]]
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 8,
+// CHECK: [[SHAREDS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR:%.+]], i32 0, i32 0
+// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_PTR]]
+// CHECK: [[BITCAST:%.+]] = bitcast [[STRUCT_SHAREDS1]]* [[CAPTURES]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[BITCAST]], i64 8, i32 8, i1 false)
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: getelementptr inbounds [3 x [[KMP_DEPEND_INFO]]], [3 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: store i64 ptrtoint (i32* @{{.+}} to i64), i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
+// CHECK: store i64 4, i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
+// CHECK: store i8 1, i8*
+// CHECK: getelementptr inbounds [3 x [[KMP_DEPEND_INFO]]], [3 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 1
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: ptrtoint i8* [[B]] to i64
+// CHECK: store i64 %{{[^,]+}}, i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
+// CHECK: store i64 1, i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
+// CHECK: store i8 1, i8*
+// CHECK: getelementptr inbounds [3 x [[KMP_DEPEND_INFO]]], [3 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 2
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: ptrtoint [2 x [[STRUCT_S]]]* [[S]] to i64
+// CHECK: store i64 %{{[^,]+}}, i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
+// CHECK: store i64 8, i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
+// CHECK: store i8 1, i8*
+// CHECK: getelementptr inbounds [3 x [[KMP_DEPEND_INFO]]], [3 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8*
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 3, i8* %{{[^,]+}}, i32 0, i8* null)
+#pragma omp task shared(a, s) depend(in : a, b, s)
+  {
+    a = 15;
+    s[1].a = 10;
   }
 // CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
 // CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
@@ -49,6 +91,51 @@ int main() {
   {
     a = 1;
   }
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 32, i64 1,
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: getelementptr inbounds [2 x [[STRUCT_S]]], [2 x [[STRUCT_S]]]* [[S]], i32 0, i64 0
+// CHECK: getelementptr inbounds [1 x [[KMP_DEPEND_INFO]]], [1 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: ptrtoint [[STRUCT_S]]* %{{.+}} to i64
+// CHECK: store i64 %{{[^,]+}}, i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
+// CHECK: store i64 4, i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
+// CHECK: store i8 2, i8*
+// CHECK: getelementptr inbounds [1 x [[KMP_DEPEND_INFO]]], [1 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8*
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 1, i8* %{{[^,]+}}, i32 0, i8* null)
+#pragma omp task untied depend(out : s[0])
+  {
+    a = 1;
+  }
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 32, i64 1,
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: getelementptr inbounds [2 x [[KMP_DEPEND_INFO]]], [2 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: store i64 ptrtoint (i32* @{{.+}} to i64), i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
+// CHECK: store i64 4, i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
+// CHECK: store i8 3, i8*
+// CHECK: getelementptr inbounds [2 x [[STRUCT_S]]], [2 x [[STRUCT_S]]]* [[S]], i32 0, i64 1
+// CHECK: getelementptr inbounds [2 x [[KMP_DEPEND_INFO]]], [2 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 1
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: ptrtoint [[STRUCT_S]]* %{{.+}} to i64
+// CHECK: store i64 %{{[^,]+}}, i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
+// CHECK: store i64 4, i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
+// CHECK: store i8 3, i8*
+// CHECK: getelementptr inbounds [2 x [[KMP_DEPEND_INFO]]], [2 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8*
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null)
+#pragma omp task final(true) depend(inout: a, s[1])
+  {
+    a = 2;
+  }
 // CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
 // CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
 // CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
diff --git a/test/OpenMP/task_depend_messages.cpp b/test/OpenMP/task_depend_messages.cpp
new file mode 100644
index 0000000..152350c
--- /dev/null
+++ b/test/OpenMP/task_depend_messages.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+class vector {
+  public:
+    int operator[](int index) { return 0; }
+};
+
+int main(int argc, char **argv) {
+  vector vec;
+  typedef float V __attribute__((vector_size(16)));
+  V a;
+
+  #pragma omp task depend // expected-error {{expected '(' after 'depend'}}
+  #pragma omp task depend ( // expected-error {{expected 'in', 'out' or 'inout' in OpenMP clause 'depend'}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-warning {{missing ':' after dependency type - ignoring}}
+  #pragma omp task depend () // expected-error {{expected 'in', 'out' or 'inout' in OpenMP clause 'depend'}} expected-warning {{missing ':' after dependency type - ignoring}}
+  #pragma omp task depend (argc // expected-error {{expected 'in', 'out' or 'inout' in OpenMP clause 'depend'}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task depend (in : argc)) // expected-warning {{extra tokens at the end of '#pragma omp task' are ignored}}
+  #pragma omp task depend (out: ) // expected-error {{expected expression}}
+  #pragma omp task depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}}
+  #pragma omp task depend (out :S1) // expected-error {{'S1' does not refer to a value}}
+  #pragma omp task depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}}
+  #pragma omp task depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}}
+  #pragma omp task depend (in : argv[0])
+  #pragma omp task depend (in : ) // expected-error {{expected expression}}
+  #pragma omp task depend (in : main) // expected-error {{expected variable name, array element or array section}}
+  #pragma omp task depend(in : a[0]) // expected-error{{expected variable name, array element or array section}}
+  foo();
+
+  return 0;
+}
diff --git a/test/OpenMP/task_if_codegen.cpp b/test/OpenMP/task_if_codegen.cpp
index d4fd6bb..be9b47b 100644
--- a/test/OpenMP/task_if_codegen.cpp
+++ b/test/OpenMP/task_if_codegen.cpp
@@ -11,6 +11,10 @@ void fn3();
 void fn4();
 void fn5();
 void fn6();
+void fn7();
+void fn8();
+void fn9();
+void fn10();
 
 int Arg;
 
@@ -46,25 +50,31 @@ int tmain(T Arg) {
   fn2();
 #pragma omp task if (Arg)
   fn3();
+#pragma omp task if (Arg) depend(in : Arg)
+  fn4();
+#pragma omp task if (Arg) depend(out : Arg)
+  fn5();
+#pragma omp task if (Arg) depend(inout : Arg)
+  fn6();
   return 0;
 }
 
 // CHECK-LABEL: @main
 int main() {
 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc({{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN4:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc({{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN7:[^ ]+]] to i32 (i32, i8*)*))
 // CHECK: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
 #pragma omp task if (true)
-  fn4();
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(
+  fn7();
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc({{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN8:[^ ]+]] to i32 (i32, i8*)*))
 // CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
 // CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
-// CHECK: call i32 [[CAP_FN5:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call i32 [[CAP_FN8]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
 // CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
 #pragma omp task if (false)
-  fn5();
+  fn8();
 
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc({{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN6:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc({{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN9:[^ ]+]] to i32 (i32, i8*)*))
 // CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
 // CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
 // CHECK: [[OMP_THEN]]
@@ -72,26 +82,45 @@ int main() {
 // CHECK: br label %[[OMP_END:.+]]
 // CHECK: [[OMP_ELSE]]
 // CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
-// CHECK: call i32 [[CAP_FN6:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call i32 [[CAP_FN9]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
 // CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
 // CHECK: br label %[[OMP_END]]
 // CHECK: [[OMP_END]]
 #pragma omp task if (Arg)
-  fn6();
+  fn9();
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc({{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN10:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
+// CHECK: [[OMP_THEN]]
+// CHECK: call i32 @__kmpc_omp_task_with_deps(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 1, i8* [[LIST:%[^,]+]], i32 0, i8* null)
+// CHECK: br label %[[OMP_END:.+]]
+// CHECK: [[OMP_ELSE]]
+// CHECK: call void @__kmpc_omp_wait_deps(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 1, i8* [[LIST]], i32 0, i8* null)
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[CAP_FN10]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: br label %[[OMP_END]]
+// CHECK: [[OMP_END]]
+#pragma omp task if (Arg) depend(inout : Arg)
+  fn10();
   // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
   return tmain(Arg);
 }
 
-// CHECK: define internal i32 [[CAP_FN4]]
-// CHECK: call void @{{.+}}fn4
+// CHECK: define internal i32 [[CAP_FN7]]
+// CHECK: call void @{{.+}}fn7
 // CHECK: ret i32
 
-// CHECK: define internal i32 [[CAP_FN5]]
-// CHECK: call void @{{.+}}fn5
+// CHECK: define internal i32 [[CAP_FN8]]
+// CHECK: call void @{{.+}}fn8
 // CHECK: ret i32
 
-// CHECK: define internal i32 [[CAP_FN6]]
-// CHECK: call void @{{.+}}fn6
+// CHECK: define internal i32 [[CAP_FN9]]
+// CHECK: call void @{{.+}}fn9
+// CHECK: ret i32
+
+// CHECK: define internal i32 [[CAP_FN10]]
+// CHECK: call void @{{.+}}fn10
 // CHECK: ret i32
 
 // CHECK-LABEL: define {{.+}} @{{.+}}tmain
@@ -113,7 +142,49 @@ int main() {
 // CHECK: br label %[[OMP_END:.+]]
 // CHECK: [[OMP_ELSE]]
 // CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
-// CHECK: call i32 [[CAP_FN3:@.+]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call i32 [[CAP_FN3]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: br label %[[OMP_END]]
+// CHECK: [[OMP_END]]
+
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN4:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
+// CHECK: [[OMP_THEN]]
+// CHECK: call i32 @__kmpc_omp_task_with_deps(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 1, i8* [[LIST:%.+]], i32 0, i8* null)
+// CHECK: br label %[[OMP_END:.+]]
+// CHECK: [[OMP_ELSE]]
+// CHECK: call void @__kmpc_omp_wait_deps(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 1, i8* [[LIST]], i32 0, i8* null)
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[CAP_FN4]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: br label %[[OMP_END]]
+// CHECK: [[OMP_END]]
+
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN5:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
+// CHECK: [[OMP_THEN]]
+// CHECK: call i32 @__kmpc_omp_task_with_deps(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 1, i8* [[LIST:%.+]], i32 0, i8* null)
+// CHECK: br label %[[OMP_END:.+]]
+// CHECK: [[OMP_ELSE]]
+// CHECK: call void @__kmpc_omp_wait_deps(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 1, i8* [[LIST]], i32 0, i8* null)
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[CAP_FN5]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
+// CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: br label %[[OMP_END]]
+// CHECK: [[OMP_END]]
+
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^,]+}}, i32 [[GTID]], i32 1, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[CAP_FN6:[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_PTR:%.+]] = bitcast i8* [[ORIG_TASK_PTR]] to
+// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
+// CHECK: [[OMP_THEN]]
+// CHECK: call i32 @__kmpc_omp_task_with_deps(%{{.+}}* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 1, i8* [[LIST:%.+]], i32 0, i8* null)
+// CHECK: br label %[[OMP_END:.+]]
+// CHECK: [[OMP_ELSE]]
+// CHECK: call void @__kmpc_omp_wait_deps(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 1, i8* [[LIST]], i32 0, i8* null)
+// CHECK: call void @__kmpc_omp_task_begin_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
+// CHECK: call i32 [[CAP_FN6]](i32 [[GTID]], %{{.+}}* [[TASK_PTR]])
 // CHECK: call void @__kmpc_omp_task_complete_if0(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]], i8* [[ORIG_TASK_PTR]])
 // CHECK: br label %[[OMP_END]]
 // CHECK: [[OMP_END]]
@@ -130,4 +201,16 @@ int main() {
 // CHECK: call void @{{.+}}fn3
 // CHECK: ret i32
 
+// CHECK: define internal i32 [[CAP_FN4]]
+// CHECK: call void @{{.+}}fn4
+// CHECK: ret i32
+
+// CHECK: define internal i32 [[CAP_FN5]]
+// CHECK: call void @{{.+}}fn5
+// CHECK: ret i32
+
+// CHECK: define internal i32 [[CAP_FN6]]
+// CHECK: call void @{{.+}}fn6
+// CHECK: ret i32
+
 #endif
diff --git a/test/OpenMP/task_private_codegen.cpp b/test/OpenMP/task_private_codegen.cpp
index 3a9ed7c..463913f 100644
--- a/test/OpenMP/task_private_codegen.cpp
+++ b/test/OpenMP/task_private_codegen.cpp
@@ -26,11 +26,11 @@ volatile double g;
 
 // CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)* }
 // CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double }
-// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]* }
+// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { i8 }
 // CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [[S_DOUBLE_TY]], [2 x [[S_DOUBLE_TY]]], i32, [2 x i32]
 // CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 }
-// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
+// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { i8 }
 // CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]] }
 // CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_TMAIN_TY]] }
 template <typename T>
@@ -55,7 +55,7 @@ int main() {
   // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+  // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
 // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
 // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
 // LAMBDA: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
@@ -86,7 +86,7 @@ int main() {
   // BLOCKS: call void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+  // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
   // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
   // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
   // BLOCKS: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
@@ -135,31 +135,18 @@ int main() {
 
 // CHECK: call {{.*}} [[S_DOUBLE_TY_DEF_CONSTR:@.+]]([[S_DOUBLE_TY]]* [[TEST]])
 
-// Store original variables in capture struct.
-// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]],
-// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]],
-// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[S_ARR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]],
-// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
-// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR]], [[S_DOUBLE_TY]]** [[VAR_REF]],
+// Do not store original variables in capture struct.
+// CHECK-NOT: getelementptr inbounds [[CAP_MAIN_TY]],
 
 // Allocate task.
 // Returns struct kmp_task_t {
 //         [[KMP_TASK_T_TY]] task_data;
 //         [[KMP_TASK_MAIN_TY]] privates;
 //       };
-// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 72, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 72, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
 // CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]*
 
-// Fill kmp_task_t->shareds by copying from original capture argument.
 // CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
-// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false)
-
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
 // Also copy address of private copy to the corresponding shareds reference.
 // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
@@ -257,30 +244,18 @@ int main() {
 
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
 
-// Store original variables in capture struct.
-// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]],
-// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]],
-// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_ADDR]], [2 x [[S_INT_TY]]]** [[S_ARR_REF]],
-// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
-// CHECK: store [[S_INT_TY]]* [[VAR_ADDR]], [[S_INT_TY]]** [[VAR_REF]],
+// Do not store original variables in capture struct.
+// CHECK-NOT: getelementptr inbounds [[CAP_TMAIN_TY]],
 
 // Allocate task.
 // Returns struct kmp_task_t {
 //         [[KMP_TASK_T_TY]] task_data;
 //         [[KMP_TASK_TMAIN_TY]] privates;
 //       };
-// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 56, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 56, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
 // CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]*
 
-// Fill kmp_task_t->shareds by copying from original capture argument.
 // CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
-// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false)
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
 // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
diff --git a/test/OpenMP/taskgroup_codegen.cpp b/test/OpenMP/taskgroup_codegen.cpp
index a6aec1f..eb45f31 100644
--- a/test/OpenMP/taskgroup_codegen.cpp
+++ b/test/OpenMP/taskgroup_codegen.cpp
@@ -9,7 +9,7 @@
 
 // CHECK:       [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 
-// CHECK:       define void [[FOO:@.+]]()
+// CHECK:       define {{.*}}void [[FOO:@.+]]()
 
 void foo() {}
 
@@ -19,19 +19,19 @@ int main() {
 // CHECK:       [[A_ADDR:%.+]] = alloca i8
   char a;
 
-// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
-// CHECK:       call void @__kmpc_taskgroup([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK:       [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
+// CHECK:       call {{.*}}void @__kmpc_taskgroup([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
 // CHECK-NEXT:  store i8 2, i8* [[A_ADDR]]
-// CHECK-NEXT:  call void @__kmpc_end_taskgroup([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK-NEXT:  call {{.*}}void @__kmpc_end_taskgroup([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
 #pragma omp taskgroup
   a = 2;
-// CHECK:       call void @__kmpc_taskgroup([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK-NEXT:  invoke void [[FOO]]()
-// CHECK:       call void @__kmpc_end_taskgroup([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK:       call {{.*}}void @__kmpc_taskgroup([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// CHECK-NEXT:  invoke {{.*}}void [[FOO]]()
+// CHECK:       call {{.*}}void @__kmpc_end_taskgroup([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
 #pragma omp taskgroup
   foo();
-// CHECK-NOT:   call void @__kmpc_taskgroup
-// CHECK-NOT:   call void @__kmpc_end_taskgroup
+// CHECK-NOT:   call {{.*}}void @__kmpc_taskgroup
+// CHECK-NOT:   call {{.*}}void @__kmpc_end_taskgroup
   return a;
 }
 
diff --git a/test/PCH/cxx-mangling.cpp b/test/PCH/cxx-mangling.cpp
index d086f26..2802687 100644
--- a/test/PCH/cxx-mangling.cpp
+++ b/test/PCH/cxx-mangling.cpp
@@ -17,11 +17,11 @@ struct A {
 
 template<typename T> void f(T) {}
 
-// CHECK-LABEL: define void @_Z1g1A(
+// CHECK-LABEL: define {{.*}}void @_Z1g1A(
 void g(A a) {
-  // CHECK: call void @_Z1fIN1AUt0_EEvT_(
+  // CHECK: call {{.*}}void @_Z1fIN1AUt0_EEvT_(
   f(a.b);
-  // CHECK: call void @_Z1fIN1AUt_EEvT_(
+  // CHECK: call {{.*}}void @_Z1fIN1AUt_EEvT_(
   f(a.a);
 }
 
diff --git a/test/PCH/objc_container.m b/test/PCH/objc_container.m
index 44aac70..2af51cb 100644
--- a/test/PCH/objc_container.m
+++ b/test/PCH/objc_container.m
@@ -14,7 +14,7 @@
 // CHECK-PRINT: oldObject = dictionary[key];
 // CHECK-PRINT: dictionary[key] = newObject;
 
-// CHECK-IR: define void @all() #0
+// CHECK-IR: define {{.*}}void @all() #0
 // CHECK-IR: {{call.*objc_msgSend}}
 // CHECK-IR: {{call.*objc_msgSend}}
 // CHECK-IR: {{call.*objc_msgSend}}
diff --git a/test/PCH/objc_literals.m b/test/PCH/objc_literals.m
index f96d4af..f65bbe7 100644
--- a/test/PCH/objc_literals.m
+++ b/test/PCH/objc_literals.m
@@ -39,7 +39,7 @@ typedef unsigned char BOOL;
 + (id)dictionaryWithObjects:(const id [])objects forKeys:(const id [])keys count:(unsigned long)cnt;
 @end
 
-// CHECK-IR: define internal void @test_numeric_literals()
+// CHECK-IR: define internal {{.*}}void @test_numeric_literals()
 static inline void test_numeric_literals() {
   // CHECK-PRINT: id intlit = @17
   // CHECK-IR: {{call.*17}}
diff --git a/test/PCH/objc_literals.mm b/test/PCH/objc_literals.mm
index 777046e..7baf4a8 100644
--- a/test/PCH/objc_literals.mm
+++ b/test/PCH/objc_literals.mm
@@ -50,7 +50,7 @@ pair<T, U> make_pair(const T& first, const U& second) {
   return { first, second };
 }
 
-// CHECK-IR: define linkonce_odr void @_Z29variadic_dictionary_expansionIJP8NSStringS1_EJP8NSNumberS3_EEvDp4pairIT_T0_E
+// CHECK-IR: define linkonce_odr {{.*}}void @_Z29variadic_dictionary_expansionIJP8NSStringS1_EJP8NSNumberS3_EEvDp4pairIT_T0_E
 template<typename ...Ts, typename ... Us>
 void variadic_dictionary_expansion(pair<Ts, Us>... key_values) {
   // CHECK-PRINT: id dict = @{ key_values.first : key_values.second... };
diff --git a/test/PCH/subscripting-literals.m b/test/PCH/subscripting-literals.m
index 1675373..725e580 100644
--- a/test/PCH/subscripting-literals.m
+++ b/test/PCH/subscripting-literals.m
@@ -30,6 +30,14 @@
 
 @class NSString;
 
+@interface NSValue
++ (NSValue *)valueWithBytes:(const void *)bytes objCType:(const char *)type;
+@end
+
+typedef struct __attribute__((objc_boxable)) _some_struct {
+  int dummy;
+} some_struct;
+
 id testArray(int idx, id p) {
   NSMutableArray *array;
   array[idx] = p;
@@ -44,4 +52,9 @@ void testDict(NSString *key, id newObject, id oldObject) {
   NSDictionary *dict = @{ key: newObject, key: oldObject };
 }
 
+void testBoxableValue() {
+  some_struct ss;
+  id value = @(ss);
+}
+
 #endif
diff --git a/test/Parser/cxx-class.cpp b/test/Parser/cxx-class.cpp
index 38eef17..9e907f1 100644
--- a/test/Parser/cxx-class.cpp
+++ b/test/Parser/cxx-class.cpp
@@ -24,6 +24,16 @@ public:
   ; // expected-warning{{extra ';' inside a class}}
 
   virtual int vf() const volatile = 0;
+
+  virtual int vf0() = 0l; // expected-error {{does not look like a pure-specifier}}
+  virtual int vf1() = 1; // expected-error {{does not look like a pure-specifier}}
+  virtual int vf2() = 00; // expected-error {{does not look like a pure-specifier}}
+  virtual int vf3() = 0x0; // expected-error {{does not look like a pure-specifier}}
+  virtual int vf4() = 0.0; // expected-error {{does not look like a pure-specifier}}
+  virtual int vf5(){0}; // expected-error +{{}} expected-warning {{unused}}
+  virtual int vf5a(){0;}; // function definition, expected-warning {{unused}}
+  virtual int vf6()(0); // expected-error +{{}} expected-note +{{}}
+  virtual int vf7() = { 0 }; // expected-error {{does not look like a pure-specifier}}
   
 private:
   int x,f(),y,g();
diff --git a/test/Parser/cxx-concept-declaration.cpp b/test/Parser/cxx-concept-declaration.cpp
new file mode 100644
index 0000000..591629c
--- /dev/null
+++ b/test/Parser/cxx-concept-declaration.cpp
@@ -0,0 +1,30 @@
+
+// Support parsing of function concepts and variable concepts
+
+// RUN:  %clang_cc1 -std=c++14 -fconcepts-ts -x c++ -verify %s
+
+template<typename T> concept bool C1 = true;
+
+template<typename T> concept bool C2() { return true; }
+
+template<typename T>
+struct A { typedef bool Boolean; };
+
+template<int N>
+A<void>::Boolean concept C3(!0);
+
+template<typename T, int = 0>
+concept auto C4(void) -> bool { return true; }
+
+constexpr int One = 1;
+
+template <typename>
+static concept decltype(!0) C5 { bool(One) };
+
+template<typename T> concept concept bool C6 = true; // expected-warning {{duplicate 'concept' declaration specifier}}
+
+template<typename T> concept concept bool C7() { return true; } // expected-warning {{duplicate 'concept' declaration specifier}}
+
+concept D1 = true; // expected-error {{C++ requires a type specifier for all declarations}}
+
+template<concept T> concept bool D2 = true; // expected-error {{unknown type name 'T'}}
diff --git a/test/Parser/cxx-concepts-ambig-constraint-expr.cpp b/test/Parser/cxx-concepts-ambig-constraint-expr.cpp
new file mode 100644
index 0000000..12ab338
--- /dev/null
+++ b/test/Parser/cxx-concepts-ambig-constraint-expr.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -std=c++14 -fconcepts-ts -x c++ %s -verify
+
+// Test parsing of constraint-expressions in cases where the grammar is
+// ambiguous with the expectation that the longest token sequence which matches
+// the syntax is consumed without backtracking.
+
+// type-specifier-seq in conversion-type-id
+template <typename T> requires (bool)&T::operator short
+unsigned int foo(); // expected-error {{C++ requires a type specifier for all declarations}}
+
+// type-specifier-seq in new-type-id
+template <typename T> requires (bool)sizeof new (T::f()) short
+unsigned int bar(); // expected-error {{C++ requires a type specifier for all declarations}}
+
+template<typename T> requires (bool)sizeof new (T::f()) unsigned // expected-error {{'struct' cannot be signed or unsigned}}
+struct X { }; // expected-error {{'X' cannot be defined in a type specifier}}
+
+// C-style cast
+// of function call on function-style cast
+template <typename T> requires (bool(T()))
+T (*fp)(); // expected-error {{use of undeclared identifier 'fp'}}
+
+// function-style cast
+// as the callee in a function call
+struct A {
+  static int t;
+  template <typename T> requires bool(T())
+  (A(T (&t))) { } // expected-error {{called object type 'bool' is not a function or function pointer}}
+};
diff --git a/test/Parser/cxx-concepts-requires-clause.cpp b/test/Parser/cxx-concepts-requires-clause.cpp
new file mode 100644
index 0000000..01893a9
--- /dev/null
+++ b/test/Parser/cxx-concepts-requires-clause.cpp
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -std=c++14 -fconcepts-ts -x c++ %s -verify
+// expected-no-diagnostics
+
+// Test parsing of the optional requires-clause in a template-declaration.
+
+template <typename T> requires true
+void foo() { }
+
+
+template <typename T> requires !0
+struct A {
+  void foo();
+  struct AA;
+  enum E : int;
+  static int x;
+
+  template <typename> requires true
+  void Mfoo();
+
+  template <typename> requires true
+  struct M;
+
+  template <typename> requires true
+  static int Mx;
+
+  template <typename TT> requires true
+  using MQ = M<TT>;
+};
+
+template <typename T> requires !0
+void A<T>::foo() { }
+
+template <typename T> requires !0
+struct A<T>::AA { };
+
+template <typename T> requires !0
+enum A<T>::E : int { E0 };
+
+template <typename T> requires !0
+int A<T>::x = 0;
+
+template <typename T> requires !0
+template <typename> requires true
+void A<T>::Mfoo() { }
+
+template <typename T> requires !0
+template <typename> requires true
+struct A<T>::M { };
+
+template <typename T> requires !0
+template <typename> requires true
+int A<T>::Mx = 0;
+
+
+template <typename T> requires true
+int x = 0;
+
+template <typename T> requires true
+using Q = A<T>;
+
+struct C {
+  template <typename> requires true
+  void Mfoo();
+
+  template <typename> requires true
+  struct M;
+
+  template <typename> requires true
+  static int Mx;
+
+  template <typename T> requires true
+  using MQ = M<T>;
+};
+
+template <typename> requires true
+void C::Mfoo() { }
+
+template <typename> requires true
+struct C::M { };
+
+template <typename> requires true
+int C::Mx = 0;
diff --git a/test/Parser/nullability.c b/test/Parser/nullability.c
index f2b6abf..7117bce 100644
--- a/test/Parser/nullability.c
+++ b/test/Parser/nullability.c
@@ -1,14 +1,14 @@
 // RUN: %clang_cc1 -fsyntax-only -std=c99 -Wno-nullability-declspec -pedantic %s -verify
 
-__nonnull int *ptr; // expected-warning{{type nullability specifier '__nonnull' is a Clang extension}}
+_Nonnull int *ptr; // expected-warning{{type nullability specifier '_Nonnull' is a Clang extension}}
 
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wnullability-extension"
-__nonnull int *ptr2; // no-warning
+_Nonnull int *ptr2; // no-warning
 #pragma clang diagnostic pop
 
-#if __has_feature(nullability)
-#  error Nullability should not be supported in C under -pedantic -std=c99
+#if !__has_feature(nullability)
+#  error Nullability should always be supported
 #endif
 
 #if !__has_extension(nullability)
diff --git a/test/Preprocessor/cxx_true.cpp b/test/Preprocessor/cxx_true.cpp
index 39cb349..f6dc459 100644
--- a/test/Preprocessor/cxx_true.cpp
+++ b/test/Preprocessor/cxx_true.cpp
@@ -1,15 +1,18 @@
-/* RUN: %clang_cc1 -E %s -x c++ | grep block_1
-   RUN: %clang_cc1 -E %s -x c++ | not grep block_2
-   RUN: %clang_cc1 -E %s -x c | not grep block
+/* RUN: %clang_cc1 -E %s -x c++ | FileCheck -check-prefix CPP %s
+   RUN: %clang_cc1 -E %s -x c | FileCheck -check-prefix C %s
    RUN: %clang_cc1 -E %s -x c++ -verify -Wundef
 */
 // expected-no-diagnostics
 
 #if true
-block_1
+// CPP: test block_1
+// C-NOT: test block_1
+test block_1
 #endif
 
 #if false
-block_2
+// CPP-NOT: test block_2
+// C-NOT: test block_2
+test block_2
 #endif
 
diff --git a/test/Preprocessor/predefined-nullability.c b/test/Preprocessor/predefined-nullability.c
new file mode 100644
index 0000000..d736afa
--- /dev/null
+++ b/test/Preprocessor/predefined-nullability.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 %s -E -dM -triple i386-apple-darwin10 -o - | FileCheck %s --check-prefix=CHECK-DARWIN
+
+// RUN: %clang_cc1 %s -E -dM -triple x86_64-unknown-linux -o - | FileCheck %s --check-prefix=CHECK-NONDARWIN
+
+
+// CHECK-DARWIN: #define __nonnull _Nonnull
+// CHECK-DARWIN: #define __null_unspecified _Null_unspecified
+// CHECK-DARWIN: #define __nullable _Nullable
+
+// CHECK-NONDARWIN-NOT: __nonnull
+// CHECK-NONDARWIN: #define __clang__
+// CHECK-NONDARWIN-NOT: __nonnull
diff --git a/test/Profile/cxx-lambda.cpp b/test/Profile/cxx-lambda.cpp
index 34e1857..a111f06 100644
--- a/test/Profile/cxx-lambda.cpp
+++ b/test/Profile/cxx-lambda.cpp
@@ -13,14 +13,14 @@
 // PGOGEN: @[[MAC:__llvm_profile_counters_main]] = private global [1 x i64] zeroinitializer
 // LMBGEN: @[[LFC:"__llvm_profile_counters_cxx-lambda.cpp:_ZZ7lambdasvENK3\$_0clEi"]] = private global [3 x i64] zeroinitializer
 
-// PGOGEN-LABEL: define void @_Z7lambdasv()
-// PGOUSE-LABEL: define void @_Z7lambdasv()
+// PGOGEN-LABEL: define {{.*}}void @_Z7lambdasv()
+// PGOUSE-LABEL: define {{.*}}void @_Z7lambdasv()
 // PGOGEN: store {{.*}} @[[LWC]], i64 0, i64 0
 void lambdas() {
   int i = 1;
 
-  // LMBGEN-LABEL: define internal{{( x86_thiscallcc)?( zeroext)?}} i1 @"_ZZ7lambdasvENK3$_0clEi"(
-  // LMBUSE-LABEL: define internal{{( x86_thiscallcc)?( zeroext)?}} i1 @"_ZZ7lambdasvENK3$_0clEi"(
+  // LMBGEN-LABEL: define internal{{( [0-9_a-z]*cc)?( zeroext)?}} i1 @"_ZZ7lambdasvENK3$_0clEi"(
+  // LMBUSE-LABEL: define internal{{( [0-9_a-z]*cc)?( zeroext)?}} i1 @"_ZZ7lambdasvENK3$_0clEi"(
   // LMBGEN: store {{.*}} @[[LFC]], i64 0, i64 0
   auto f = [&i](int k) {
     // LMBGEN: store {{.*}} @[[LFC]], i64 0, i64 1
diff --git a/test/Profile/cxx-rangefor.cpp b/test/Profile/cxx-rangefor.cpp
index f30cdc7..23be0f5 100644
--- a/test/Profile/cxx-rangefor.cpp
+++ b/test/Profile/cxx-rangefor.cpp
@@ -9,7 +9,7 @@
 
 // PGOGEN: @[[RFC:__llvm_profile_counters__Z9range_forv]] = private global [5 x i64] zeroinitializer
 
-// CHECK-LABEL: define void @_Z9range_forv()
+// CHECK-LABEL: define {{.*}}void @_Z9range_forv()
 // PGOGEN: store {{.*}} @[[RFC]], i64 0, i64 0
 void range_for() {
   int arr[] = {1, 2, 3, 4, 5};
diff --git a/test/Profile/cxx-templates.cpp b/test/Profile/cxx-templates.cpp
index ce5651a..ca0e861 100644
--- a/test/Profile/cxx-templates.cpp
+++ b/test/Profile/cxx-templates.cpp
@@ -13,10 +13,10 @@
 // T0GEN: @[[T0C:__llvm_profile_counters__Z4loopILj0EEvv]] = linkonce_odr hidden global [2 x i64] zeroinitializer
 // T100GEN: @[[T100C:__llvm_profile_counters__Z4loopILj100EEvv]] = linkonce_odr hidden global [2 x i64] zeroinitializer
 
-// T0GEN-LABEL: define linkonce_odr void @_Z4loopILj0EEvv()
-// T0USE-LABEL: define linkonce_odr void @_Z4loopILj0EEvv()
-// T100GEN-LABEL: define linkonce_odr void @_Z4loopILj100EEvv()
-// T100USE-LABEL: define linkonce_odr void @_Z4loopILj100EEvv()
+// T0GEN-LABEL: define linkonce_odr {{.*}}void @_Z4loopILj0EEvv()
+// T0USE-LABEL: define linkonce_odr {{.*}}void @_Z4loopILj0EEvv()
+// T100GEN-LABEL: define linkonce_odr {{.*}}void @_Z4loopILj100EEvv()
+// T100USE-LABEL: define linkonce_odr {{.*}}void @_Z4loopILj100EEvv()
 template <unsigned N> void loop() {
   // ALL-NOT: ret
   // T0GEN: store {{.*}} @[[T0C]], i64 0, i64 0
diff --git a/test/Profile/profile-does-not-exist.c b/test/Profile/profile-does-not-exist.c
index 89609bd..d45981f 100644
--- a/test/Profile/profile-does-not-exist.c
+++ b/test/Profile/profile-does-not-exist.c
@@ -1,4 +1,4 @@
 // RUN: not %clang_cc1 -emit-llvm %s -o - -fprofile-instr-use=%t.nonexistent.profdata 2>&1 | FileCheck %s
 
-// CHECK: error: Could not read profile:
+// CHECK: error: Could not read profile {{.*}}.nonexistent.profdata:
 // CHECK-NOT: Assertion failed
diff --git a/test/Sema/arm-microsoft-intrinsics.c b/test/Sema/arm-microsoft-intrinsics.c
new file mode 100644
index 0000000..0637d98
--- /dev/null
+++ b/test/Sema/arm-microsoft-intrinsics.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple armv7 -fms-extensions -fsyntax-only -ffreestanding -verify %s
+
+unsigned int test_MoveFromCoprocessor(const unsigned int value) {
+  return _MoveFromCoprocessor(value, 1, 2, 3, 4); // expected-error-re {{argument to {{.*}} must be a constant integer}}
+}
+
+void test_MoveToCoprocessor(const unsigned int value) {
+  _MoveToCoprocessor(1, 2, value, 3, 4, 5); // expected-error-re {{argument to {{.*}} must be a constant integer}}
+}
diff --git a/test/Sema/attr-malloc.c b/test/Sema/attr-malloc.c
index 5351d75..6af15d2 100644
--- a/test/Sema/attr-malloc.c
+++ b/test/Sema/attr-malloc.c
@@ -19,10 +19,10 @@ __attribute((malloc)) int (*g)(); // expected-warning{{attribute only applies to
 
 __attribute((malloc))
 void * xalloc(unsigned n) { return malloc(n); } // no-warning
-// RUN: grep 'define noalias .* @xalloc(' %t
+// RUN: grep 'define .*noalias .* @xalloc(' %t %t
 
 #define malloc_like __attribute((__malloc__))
 void * xalloc2(unsigned) malloc_like;
 void * xalloc2(unsigned n) { return malloc(n); }
-// RUN: grep 'define noalias .* @xalloc2(' %t
+// RUN: grep 'define .*noalias .* @xalloc2(' %t %t
 
diff --git a/test/Sema/builtin-cpu-supports.c b/test/Sema/builtin-cpu-supports.c
new file mode 100644
index 0000000..c537b41
--- /dev/null
+++ b/test/Sema/builtin-cpu-supports.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64-pc-linux-gnu -verify %s
+// RUN: %clang_cc1 -fsyntax-only -triple powerpc64le-linux-gnu -verify %s
+
+extern void a(const char *);
+
+extern const char *str;
+
+int main() {
+#ifdef __x86_64__
+  if (__builtin_cpu_supports("ss")) // expected-error {{invalid cpu feature string}}
+    a("sse4.2");
+
+  if (__builtin_cpu_supports(str)) // expected-error {{expression is not a string literal}}
+    a(str);
+#else
+  if (__builtin_cpu_supports("vsx")) // expected-error {{use of unknown builtin}}
+    a("vsx");
+#endif
+
+  return 0;
+}
diff --git a/test/Sema/enable_if.c b/test/Sema/enable_if.c
index a3c4323..4644858 100644
--- a/test/Sema/enable_if.c
+++ b/test/Sema/enable_if.c
@@ -49,7 +49,7 @@ size_t strnlen(const char *s, size_t maxlen)  // expected-note{{candidate functi
   __attribute__((unavailable("'maxlen' is larger than the buffer size")));
 
 void test2(const char *s, int i) {
-// CHECK: define void @test2
+// CHECK: define {{.*}}void @test2
   const char c[123];
   strnlen(s, i);
 // CHECK: call {{.*}}strnlen_real1
diff --git a/test/Sema/non-null-warning.c b/test/Sema/non-null-warning.c
index 6cd98e2..024ef1e 100644
--- a/test/Sema/non-null-warning.c
+++ b/test/Sema/non-null-warning.c
@@ -7,29 +7,29 @@
 #endif
 
 
-int * __nullable foo(int * __nonnull x);
+int * _Nullable foo(int * _Nonnull x);
 
-int *__nonnull ret_nonnull();
+int *_Nonnull ret_nonnull();
 
 int *foo(int *x) {
   return 0;
 }
 
-int * __nullable foo1(int * __nonnull x); // expected-note {{previous declaration is here}}
+int * _Nullable foo1(int * _Nonnull x); // expected-note {{previous declaration is here}}
 
-int *foo1(int * __nullable x) { // expected-warning {{nullability specifier '__nullable' conflicts with existing specifier '__nonnull'}}
+int *foo1(int * _Nullable x) { // expected-warning {{nullability specifier '_Nullable' conflicts with existing specifier '_Nonnull'}}
   return 0;
 }
 
-int * __nullable foo2(int * __nonnull x);
+int * _Nullable foo2(int * _Nonnull x);
 
-int *foo2(int * __nonnull x) {
+int *foo2(int * _Nonnull x) {
   return 0;
 }
 
-int * __nullable foo3(int * __nullable x); // expected-note {{previous declaration is here}}
+int * _Nullable foo3(int * _Nullable x); // expected-note {{previous declaration is here}}
 
-int *foo3(int * __nonnull x) { // expected-warning {{nullability specifier '__nonnull' conflicts with existing specifier '__nullable'}}
+int *foo3(int * _Nonnull x) { // expected-warning {{nullability specifier '_Nonnull' conflicts with existing specifier '_Nullable'}}
   return 0;
 }
 
diff --git a/test/Sema/nullability.c b/test/Sema/nullability.c
index 6144b7e..59644c4 100644
--- a/test/Sema/nullability.c
+++ b/test/Sema/nullability.c
@@ -8,88 +8,88 @@
 typedef int * int_ptr;
 
 // Parse nullability type specifiers.
-typedef int * __nonnull nonnull_int_ptr; // expected-note{{'__nonnull' specified here}}
-typedef int * __nullable nullable_int_ptr;
-typedef int * __null_unspecified null_unspecified_int_ptr;
+typedef int * _Nonnull nonnull_int_ptr; // expected-note{{'_Nonnull' specified here}}
+typedef int * _Nullable nullable_int_ptr;
+typedef int * _Null_unspecified null_unspecified_int_ptr;
 
 // Redundant nullability type specifiers.
-typedef int * __nonnull __nonnull redundant_1; // expected-warning{{duplicate nullability specifier '__nonnull'}}
+typedef int * _Nonnull _Nonnull redundant_1; // expected-warning{{duplicate nullability specifier '_Nonnull'}}
 
 // Conflicting nullability type specifiers.
-typedef int * __nonnull __nullable conflicting_1; // expected-error{{nullability specifier '__nonnull' conflicts with existing specifier '__nullable'}}
-typedef int * __null_unspecified __nonnull conflicting_2; // expected-error{{nullability specifier '__null_unspecified' conflicts with existing specifier '__nonnull'}}
+typedef int * _Nonnull _Nullable conflicting_1; // expected-error{{nullability specifier '_Nonnull' conflicts with existing specifier '_Nullable'}}
+typedef int * _Null_unspecified _Nonnull conflicting_2; // expected-error{{nullability specifier '_Null_unspecified' conflicts with existing specifier '_Nonnull'}}
 
 // Redundant nullability specifiers via a typedef are okay.
-typedef nonnull_int_ptr __nonnull redundant_okay_1;
+typedef nonnull_int_ptr _Nonnull redundant_okay_1;
 
 // Conflicting nullability specifiers via a typedef are not.
-typedef nonnull_int_ptr __nullable conflicting_2; // expected-error{{nullability specifier '__nullable' conflicts with existing specifier '__nonnull'}}
+typedef nonnull_int_ptr _Nullable conflicting_2; // expected-error{{nullability specifier '_Nullable' conflicts with existing specifier '_Nonnull'}}
 typedef nonnull_int_ptr nonnull_int_ptr_typedef;
-typedef nonnull_int_ptr_typedef __nullable conflicting_2; // expected-error{{nullability specifier '__nullable' conflicts with existing specifier '__nonnull'}}
+typedef nonnull_int_ptr_typedef _Nullable conflicting_2; // expected-error{{nullability specifier '_Nullable' conflicts with existing specifier '_Nonnull'}}
 typedef nonnull_int_ptr_typedef nonnull_int_ptr_typedef_typedef;
-typedef nonnull_int_ptr_typedef_typedef __null_unspecified conflicting_3; // expected-error{{nullability specifier '__null_unspecified' conflicts with existing specifier '__nonnull'}}
+typedef nonnull_int_ptr_typedef_typedef _Null_unspecified conflicting_3; // expected-error{{nullability specifier '_Null_unspecified' conflicts with existing specifier '_Nonnull'}}
 
 // Nullability applies to all pointer types.
-typedef int (* __nonnull function_pointer_type_1)(int, int);
-typedef int (^ __nonnull block_type_1)(int, int);
+typedef int (* _Nonnull function_pointer_type_1)(int, int);
+typedef int (^ _Nonnull block_type_1)(int, int);
 
 // Nullability must be on a pointer type.
-typedef int __nonnull int_type_1; // expected-error{{nullability specifier '__nonnull' cannot be applied to non-pointer type 'int'}}
+typedef int _Nonnull int_type_1; // expected-error{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'int'}}
 
 // Nullability can move out to a pointer/block pointer declarator
 // (with a suppressed warning).
-typedef __nonnull int * nonnull_int_ptr_2;
-typedef int __nullable * nullable_int_ptr_2;
-typedef __nonnull int (* function_pointer_type_2)(int, int);
-typedef __nonnull int (^ block_type_2)(int, int);
-typedef __nonnull int * * __nullable nonnull_int_ptr_ptr_1;
-typedef __nonnull int *(^ block_type_3)(int, int);
-typedef __nonnull int *(* function_pointer_type_3)(int, int);
-typedef __nonnull int_ptr (^ block_type_4)(int, int);
-typedef __nonnull int_ptr (* function_pointer_type_4)(int, int);
-
-void acceptFunctionPtr(__nonnull int *(*)(void));
-void acceptBlockPtr(__nonnull int *(^)(void));
+typedef _Nonnull int * nonnull_int_ptr_2;
+typedef int _Nullable * nullable_int_ptr_2;
+typedef _Nonnull int (* function_pointer_type_2)(int, int);
+typedef _Nonnull int (^ block_type_2)(int, int);
+typedef _Nonnull int * * _Nullable nonnull_int_ptr_ptr_1;
+typedef _Nonnull int *(^ block_type_3)(int, int);
+typedef _Nonnull int *(* function_pointer_type_3)(int, int);
+typedef _Nonnull int_ptr (^ block_type_4)(int, int);
+typedef _Nonnull int_ptr (* function_pointer_type_4)(int, int);
+
+void acceptFunctionPtr(_Nonnull int *(*)(void));
+void acceptBlockPtr(_Nonnull int *(^)(void));
 
 void testBlockFunctionPtrNullability() {
   float *fp;
-  fp = (function_pointer_type_3)0; // expected-warning{{from 'function_pointer_type_3' (aka 'int * __nonnull (*)(int, int)')}}
-  fp = (block_type_3)0; // expected-error{{from incompatible type 'block_type_3' (aka 'int * __nonnull (^)(int, int)')}}
-  fp = (function_pointer_type_4)0; // expected-warning{{from 'function_pointer_type_4' (aka 'int_ptr  __nonnull (*)(int, int)')}}
-  fp = (block_type_4)0; // expected-error{{from incompatible type 'block_type_4' (aka 'int_ptr  __nonnull (^)(int, int)')}}
+  fp = (function_pointer_type_3)0; // expected-warning{{from 'function_pointer_type_3' (aka 'int * _Nonnull (*)(int, int)')}}
+  fp = (block_type_3)0; // expected-error{{from incompatible type 'block_type_3' (aka 'int * _Nonnull (^)(int, int)')}}
+  fp = (function_pointer_type_4)0; // expected-warning{{from 'function_pointer_type_4' (aka 'int_ptr  _Nonnull (*)(int, int)')}}
+  fp = (block_type_4)0; // expected-error{{from incompatible type 'block_type_4' (aka 'int_ptr  _Nonnull (^)(int, int)')}}
 
   acceptFunctionPtr(0); // no-warning
   acceptBlockPtr(0); // no-warning
 }
 
 // Moving nullability where it creates a conflict.
-typedef __nonnull int * __nullable *  conflict_int_ptr_ptr_2; // expected-error{{nullability specifier '__nonnull' cannot be applied to non-pointer type 'int'}}
+typedef _Nonnull int * _Nullable *  conflict_int_ptr_ptr_2; // expected-error{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'int'}}
 
 // Nullability is not part of the canonical type.
-typedef int * __nonnull ambiguous_int_ptr;
+typedef int * _Nonnull ambiguous_int_ptr;
 typedef int * ambiguous_int_ptr;
-typedef int * __nullable ambiguous_int_ptr;
+typedef int * _Nullable ambiguous_int_ptr;
 
 // Printing of nullability.
 float f;
-int * __nonnull ip_1 = &f; // expected-warning{{incompatible pointer types initializing 'int * __nonnull' with an expression of type 'float *'}}
+int * _Nonnull ip_1 = &f; // expected-warning{{incompatible pointer types initializing 'int * _Nonnull' with an expression of type 'float *'}}
 
 // Check printing of nullability specifiers.
 void printing_nullability(void) {
-  int * __nonnull iptr;
-  float *fptr = iptr; // expected-warning{{incompatible pointer types initializing 'float *' with an expression of type 'int * __nonnull'}}
+  int * _Nonnull iptr;
+  float *fptr = iptr; // expected-warning{{incompatible pointer types initializing 'float *' with an expression of type 'int * _Nonnull'}}
 
-  int * * __nonnull iptrptr;
-  float **fptrptr = iptrptr; // expected-warning{{incompatible pointer types initializing 'float **' with an expression of type 'int ** __nonnull'}}
+  int * * _Nonnull iptrptr;
+  float **fptrptr = iptrptr; // expected-warning{{incompatible pointer types initializing 'float **' with an expression of type 'int ** _Nonnull'}}
 
-  int * __nullable * __nonnull iptrptr2;
-  float * *fptrptr2 = iptrptr2; // expected-warning{{incompatible pointer types initializing 'float **' with an expression of type 'int * __nullable * __nonnull'}}
+  int * _Nullable * _Nonnull iptrptr2;
+  float * *fptrptr2 = iptrptr2; // expected-warning{{incompatible pointer types initializing 'float **' with an expression of type 'int * _Nullable * _Nonnull'}}
 }
 
-// Check passing null to a __nonnull argument.
-void accepts_nonnull_1(__nonnull int *ptr);
-void (*accepts_nonnull_2)(__nonnull int *ptr);
-void (^accepts_nonnull_3)(__nonnull int *ptr);
+// Check passing null to a _Nonnull argument.
+void accepts_nonnull_1(_Nonnull int *ptr);
+void (*accepts_nonnull_2)(_Nonnull int *ptr);
+void (^accepts_nonnull_3)(_Nonnull int *ptr);
 
 void test_accepts_nonnull_null_pointer_literal() {
   accepts_nonnull_1(0); // expected-warning{{null passed to a callee that requires a non-null argument}}
@@ -97,17 +97,17 @@ void test_accepts_nonnull_null_pointer_literal() {
   accepts_nonnull_3(0); // expected-warning{{null passed to a callee that requires a non-null argument}}
 }
 
-// Check returning nil from a __nonnull-returning function.
-__nonnull int *returns_int_ptr(int x) {
+// Check returning nil from a _Nonnull-returning function.
+_Nonnull int *returns_int_ptr(int x) {
   if (x) {
     return 0; // expected-warning{{null returned from function that requires a non-null return value}}
   }
 
-  return (__nonnull int *)0;
+  return (_Nonnull int *)0;
 }
 
 // Check nullable-to-nonnull conversions.
-void nullable_to_nonnull(__nullable int *ptr) {
+void nullable_to_nonnull(_Nullable int *ptr) {
   int *a = ptr; // okay
-  __nonnull int *b = ptr; // expected-warning{{implicit conversion from nullable pointer 'int * __nullable' to non-nullable pointer type 'int * __nonnull'}}
+  _Nonnull int *b = ptr; // expected-warning{{implicit conversion from nullable pointer 'int * _Nullable' to non-nullable pointer type 'int * _Nonnull'}}
 }
diff --git a/test/Sema/typo-correction.c b/test/Sema/typo-correction.c
index d457257..ff43064 100644
--- a/test/Sema/typo-correction.c
+++ b/test/Sema/typo-correction.c
@@ -40,3 +40,12 @@ int PR23101(__m128i __x) {
   return foo((__v2di)__x);  // expected-warning {{implicit declaration of function 'foo'}} \
                             // expected-error {{use of undeclared identifier '__v2di'}}
 }
+
+void f(long *a, long b) {
+      __atomic_or_fetch(a, b, c);  // expected-error {{use of undeclared identifier 'c'}}
+}
+
+extern double cabs(_Complex double z);
+void fn1() {
+  cabs(errij);  // expected-error {{use of undeclared identifier 'errij'}}
+}
diff --git a/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp b/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp
index a78f022..9456dd7 100644
--- a/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp
+++ b/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp
@@ -275,3 +275,10 @@ namespace TemporaryInitListSourceRange_PR22367 {
       {0}
       );
 }
+
+namespace ParameterPackNestedInitializerLists_PR23904c3 {
+  template <typename ...T>
+  void f(std::initializer_list<std::initializer_list<T>> ...tt);
+
+  void foo() { f({{0}}, {{'\0'}}); }
+}
diff --git a/test/SemaCXX/cxx1y-generic-lambdas.cpp b/test/SemaCXX/cxx1y-generic-lambdas.cpp
index f4c67fb..b49a641 100644
--- a/test/SemaCXX/cxx1y-generic-lambdas.cpp
+++ b/test/SemaCXX/cxx1y-generic-lambdas.cpp
@@ -933,3 +933,18 @@ namespace PR22117 {
     };
   }(0)(0);
 }
+
+namespace PR23716 {
+template<typename T>
+auto f(T x) {
+  auto g = [](auto&&... args) {
+    auto h = [args...]() -> int {
+      return 0;
+    };
+    return h;
+  };
+  return g;
+}
+
+auto x = f(0)();
+}
diff --git a/test/SemaCXX/nullability-declspec.cpp b/test/SemaCXX/nullability-declspec.cpp
index ef1a171..1a3a321 100644
--- a/test/SemaCXX/nullability-declspec.cpp
+++ b/test/SemaCXX/nullability-declspec.cpp
@@ -2,8 +2,8 @@
 
 struct X { };
 
-__nullable int *ip1; // expected-error{{nullability specifier '__nullable' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the pointer?}}
-__nullable int (*fp1)(int); // expected-error{{nullability specifier '__nullable' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the function pointer?}}
-__nonnull int (^bp1)(int); // expected-error{{nullability specifier '__nonnull' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the block pointer?}}
-__nonnull int X::*pmd1; // expected-error{{nullability specifier '__nonnull' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the member pointer?}}
-__nonnull int (X::*pmf1)(int); // expected-error{{nullability specifier '__nonnull' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the member function pointer?}}
+_Nullable int *ip1; // expected-error{{nullability specifier '_Nullable' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the pointer?}}
+_Nullable int (*fp1)(int); // expected-error{{nullability specifier '_Nullable' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the function pointer?}}
+_Nonnull int (^bp1)(int); // expected-error{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the block pointer?}}
+_Nonnull int X::*pmd1; // expected-error{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the member pointer?}}
+_Nonnull int (X::*pmf1)(int); // expected-error{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'int'; did you mean to apply the specifier to the member function pointer?}}
diff --git a/test/SemaCXX/nullability.cpp b/test/SemaCXX/nullability.cpp
index f0aa13b..edce6b0 100644
--- a/test/SemaCXX/nullability.cpp
+++ b/test/SemaCXX/nullability.cpp
@@ -1,29 +1,34 @@
 // RUN: %clang_cc1 -std=c++11 -fsyntax-only -Wno-nullability-declspec %s -verify
 
+#if __has_feature(nullability)
+#else
+#  error nullability feature should be defined
+#endif
+
 typedef decltype(nullptr) nullptr_t;
 
 class X {
 };
 
 // Nullability applies to all pointer types.
-typedef int (X::* __nonnull member_function_type_1)(int);
-typedef int X::* __nonnull member_data_type_1;
-typedef nullptr_t __nonnull nonnull_nullptr_t; // expected-error{{nullability specifier '__nonnull' cannot be applied to non-pointer type 'nullptr_t'}}
+typedef int (X::* _Nonnull member_function_type_1)(int);
+typedef int X::* _Nonnull member_data_type_1;
+typedef nullptr_t _Nonnull nonnull_nullptr_t; // expected-error{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'nullptr_t'}}
 
 // Nullability can move into member pointers (this is suppressing a warning).
-typedef __nonnull int (X::* member_function_type_2)(int);
-typedef int (X::* __nonnull member_function_type_3)(int);
-typedef __nonnull int X::* member_data_type_2;
+typedef _Nonnull int (X::* member_function_type_2)(int);
+typedef int (X::* _Nonnull member_function_type_3)(int);
+typedef _Nonnull int X::* member_data_type_2;
 
 // Adding non-null via a template.
 template<typename T>
 struct AddNonNull {
-  typedef __nonnull T type; // expected-error{{nullability specifier '__nonnull' cannot be applied to non-pointer type 'int'}}
-  // expected-error@-1{{nullability specifier '__nonnull' cannot be applied to non-pointer type 'nullptr_t'}}
+  typedef _Nonnull T type; // expected-error{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'int'}}
+  // expected-error@-1{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'nullptr_t'}}
 };
 
 typedef AddNonNull<int *>::type nonnull_int_ptr_1;
-typedef AddNonNull<int * __nullable>::type nonnull_int_ptr_2; // FIXME: check that it was overridden
+typedef AddNonNull<int * _Nullable>::type nonnull_int_ptr_2; // FIXME: check that it was overridden
 typedef AddNonNull<nullptr_t>::type nonnull_int_ptr_3; // expected-note{{in instantiation of template class}}
 
 typedef AddNonNull<int>::type nonnull_non_pointer_1; // expected-note{{in instantiation of template class 'AddNonNull<int>' requested here}}
@@ -31,22 +36,22 @@ typedef AddNonNull<int>::type nonnull_non_pointer_1; // expected-note{{in instan
 // Non-null checking within a template.
 template<typename T>
 struct AddNonNull2 {
-  typedef __nonnull AddNonNull<T> invalid1; // expected-error{{nullability specifier '__nonnull' cannot be applied to non-pointer type 'AddNonNull<T>'}}
-  typedef __nonnull AddNonNull2 invalid2; // expected-error{{nullability specifier '__nonnull' cannot be applied to non-pointer type 'AddNonNull2<T>'}}
-  typedef __nonnull AddNonNull2<T> invalid3; // expected-error{{nullability specifier '__nonnull' cannot be applied to non-pointer type 'AddNonNull2<T>'}}
-  typedef __nonnull typename AddNonNull<T>::type okay1;
+  typedef _Nonnull AddNonNull<T> invalid1; // expected-error{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'AddNonNull<T>'}}
+  typedef _Nonnull AddNonNull2 invalid2; // expected-error{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'AddNonNull2<T>'}}
+  typedef _Nonnull AddNonNull2<T> invalid3; // expected-error{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'AddNonNull2<T>'}}
+  typedef _Nonnull typename AddNonNull<T>::type okay1;
 
   // Don't move past a dependent type even if we know that nullability
   // cannot apply to that specific dependent type.
-  typedef __nonnull AddNonNull<T> (*invalid4); // expected-error{{nullability specifier '__nonnull' cannot be applied to non-pointer type 'AddNonNull<T>'}}
+  typedef _Nonnull AddNonNull<T> (*invalid4); // expected-error{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'AddNonNull<T>'}}
 };
 
-// Check passing null to a __nonnull argument.
-void (*accepts_nonnull_1)(__nonnull int *ptr);
-void (*& accepts_nonnull_2)(__nonnull int *ptr) = accepts_nonnull_1;
-void (X::* accepts_nonnull_3)(__nonnull int *ptr);
-void accepts_nonnull_4(__nonnull int *ptr);
-void (&accepts_nonnull_5)(__nonnull int *ptr) = accepts_nonnull_4;
+// Check passing null to a _Nonnull argument.
+void (*accepts_nonnull_1)(_Nonnull int *ptr);
+void (*& accepts_nonnull_2)(_Nonnull int *ptr) = accepts_nonnull_1;
+void (X::* accepts_nonnull_3)(_Nonnull int *ptr);
+void accepts_nonnull_4(_Nonnull int *ptr);
+void (&accepts_nonnull_5)(_Nonnull int *ptr) = accepts_nonnull_4;
 
 void test_accepts_nonnull_null_pointer_literal(X *x) {
   accepts_nonnull_1(0); // expected-warning{{null passed to a callee that requires a non-null argument}}
@@ -56,7 +61,7 @@ void test_accepts_nonnull_null_pointer_literal(X *x) {
   accepts_nonnull_5(0); // expected-warning{{null passed to a callee that requires a non-null argument}}
 }
 
-template<void FP(__nonnull int*)> 
+template<void FP(_Nonnull int*)> 
 void test_accepts_nonnull_null_pointer_literal_template() {
   FP(0); // expected-warning{{null passed to a callee that requires a non-null argument}}
 }
diff --git a/test/SemaCXX/openmp_default_simd_align.cpp b/test/SemaCXX/openmp_default_simd_align.cpp
new file mode 100644
index 0000000..c869a94
--- /dev/null
+++ b/test/SemaCXX/openmp_default_simd_align.cpp
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -triple x86_64-unknown-unknown -verify %s
+
+struct S0 {
+  int x;
+  static const int test0 = __builtin_omp_required_simd_align(x); // expected-error {{invalid application of '__builtin_omp_required_simd_align' to an expression, only type is allowed}}
+  static const int test1 = __builtin_omp_required_simd_align(decltype(S0::x));
+  auto test2() -> char(&)[__builtin_omp_required_simd_align(decltype(x))];
+};
+
+struct S1; // expected-note 6 {{forward declaration}}
+extern S1 s1;
+const int test3 = __builtin_omp_required_simd_align(decltype(s1)); // expected-error {{invalid application of '__builtin_omp_required_simd_align' to an incomplete type 'decltype(s1)' (aka 'S1')}}
+
+struct S2 {
+  S2();
+  S1 &s;
+  int x;
+
+  int test4 = __builtin_omp_required_simd_align(decltype(x)); // ok
+  int test5 = __builtin_omp_required_simd_align(decltype(s)); // expected-error {{invalid application of '__builtin_omp_required_simd_align' to an incomplete type 'S1'}}
+};
+
+const int test6 = __builtin_omp_required_simd_align(decltype(S2::x));
+const int test7 = __builtin_omp_required_simd_align(decltype(S2::s)); // expected-error {{invalid application of '__builtin_omp_required_simd_align' to an incomplete type 'S1'}}
+
+// Arguably, these should fail like the S1 cases do: the alignment of
+// 's2.x' should depend on the alignment of both x-within-S2 and
+// s2-within-S3 and thus require 'S3' to be complete.  If we start
+// doing the appropriate recursive walk to do that, we should make
+// sure that these cases don't explode.
+struct S3 {
+  S2 s2;
+
+  static const int test8 = __builtin_omp_required_simd_align(decltype(s2.x));
+  static const int test9 = __builtin_omp_required_simd_align(decltype(s2.s)); // expected-error {{invalid application of '__builtin_omp_required_simd_align' to an incomplete type 'S1'}}
+  auto test10() -> char(&)[__builtin_omp_required_simd_align(decltype(s2.x))];
+  static const int test11 = __builtin_omp_required_simd_align(decltype(S3::s2.x));
+  static const int test12 = __builtin_omp_required_simd_align(decltype(S3::s2.s)); // expected-error {{invalid application of '__builtin_omp_required_simd_align' to an incomplete type 'S1'}}
+  auto test13() -> char(&)[__builtin_omp_required_simd_align(decltype(s2.x))];
+};
+
+// Same reasoning as S3.
+struct S4 {
+  union {
+      int x;
+    };
+  static const int test0 = __builtin_omp_required_simd_align(decltype(x));
+  static const int test1 = __builtin_omp_required_simd_align(decltype(S0::x));
+  auto test2() -> char(&)[__builtin_omp_required_simd_align(decltype(x))];
+};
+
+// Regression test for asking for the alignment of a field within an invalid
+// record.
+struct S5 {
+  S1 s;  // expected-error {{incomplete type}}
+  int x;
+};
+const int test8 = __builtin_omp_required_simd_align(decltype(S5::x));
+
+long long int test14[2];
+
+static_assert(__builtin_omp_required_simd_align(decltype(test14)) == 16, "foo");
+
+static_assert(__builtin_omp_required_simd_align(int[2]) == __builtin_omp_required_simd_align(int), ""); // ok
+
+namespace __builtin_omp_required_simd_align_array_expr {
+  alignas(32) extern int n[2];
+  static_assert(__builtin_omp_required_simd_align(decltype(n)) == 16, "");
+
+  template<int> struct S {
+      static int a[];
+    };
+  template<int N> int S<N>::a[N];
+  static_assert(__builtin_omp_required_simd_align(decltype(S<1>::a)) == __builtin_omp_required_simd_align(int), "");
+  static_assert(__builtin_omp_required_simd_align(decltype(S<1128>::a)) == __builtin_omp_required_simd_align(int), "");
+}
+
+template <typename T> void n(T) {
+  alignas(T) int T1;
+  char k[__builtin_omp_required_simd_align(decltype(T1))];
+  static_assert(sizeof(k) == __builtin_omp_required_simd_align(long long), "");
+}
+template void n(long long);
diff --git a/test/SemaCXX/typo-correction-cxx11.cpp b/test/SemaCXX/typo-correction-cxx11.cpp
index 99cb166..8c58820 100644
--- a/test/SemaCXX/typo-correction-cxx11.cpp
+++ b/test/SemaCXX/typo-correction-cxx11.cpp
@@ -32,3 +32,29 @@ void f() {
   }
 }
 }
+
+namespace NewTypoExprFromResolvingTypoAmbiguity {
+struct A {
+  void Swap(A *other);
+};
+
+struct pair {
+  int first;
+  A *second;
+};
+
+struct map {
+public:
+  void swap(map &x);
+  pair find(int x);
+};
+
+void run(A *annotations) {
+  map new_annotations;
+
+  auto &annotation = *annotations;
+  auto new_it = new_annotations.find(5);
+  auto &new_anotation = new_it.second;  // expected-note {{'new_anotation' declared here}}
+  new_annotation->Swap(&annotation);  // expected-error {{use of undeclared identifier 'new_annotation'; did you mean 'new_anotation'?}}
+}
+}
diff --git a/test/SemaCXX/virtual-function-in-union.cpp b/test/SemaCXX/virtual-function-in-union.cpp
new file mode 100644
index 0000000..0c4ba5d
--- /dev/null
+++ b/test/SemaCXX/virtual-function-in-union.cpp
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+union x {
+  virtual void f(); // expected-error {{unions cannot have virtual functions}}
+};
diff --git a/test/SemaCXX/virtuals.cpp b/test/SemaCXX/virtuals.cpp
index 6b8231d..f818074 100644
--- a/test/SemaCXX/virtuals.cpp
+++ b/test/SemaCXX/virtuals.cpp
@@ -51,3 +51,13 @@ namespace pr8264 {
     virtual virtual void func();  // expected-warning {{duplicate 'virtual' declaration specifier}}
   };
 }
+
+namespace VirtualFriend {
+  // DR (filed but no number yet): reject meaningless pure-specifier on a friend declaration.
+  struct A { virtual int f(); };
+  struct B { friend int A::f() = 0; }; // expected-error {{friend declaration cannot have a pure-specifier}}
+  struct C {
+    virtual int f();
+    friend int C::f() = 0; // expected-error {{friend declaration cannot have a pure-specifier}}
+  };
+}
diff --git a/test/SemaObjC/arc-unavailable-for-weakref.m b/test/SemaObjC/arc-unavailable-for-weakref.m
index 53ceaa1..35d5d98 100644
--- a/test/SemaObjC/arc-unavailable-for-weakref.m
+++ b/test/SemaObjC/arc-unavailable-for-weakref.m
@@ -56,7 +56,7 @@ __attribute__((objc_arc_weak_reference_unavailable))
 @interface I
 {
 }
-@property (weak) NSFont *font; // expected-error {{synthesizing __weak instance variable of type 'NSFont * __nullable', which does not support weak references}}
+@property (weak) NSFont *font; // expected-error {{synthesizing __weak instance variable of type 'NSFont * _Nullable', which does not support weak references}}
 @end
 
 @implementation I // expected-note {{when implemented by class I}}
@@ -65,7 +65,7 @@ __attribute__((objc_arc_weak_reference_unavailable))
 
 // rdar://13676793
 @protocol MyProtocol
-@property (weak) NSFont *font; // expected-error {{synthesizing __weak instance variable of type 'NSFont * __nullable', which does not support weak references}}
+@property (weak) NSFont *font; // expected-error {{synthesizing __weak instance variable of type 'NSFont * _Nullable', which does not support weak references}}
 @end
 
 @interface I1 <MyProtocol>
@@ -76,7 +76,7 @@ __attribute__((objc_arc_weak_reference_unavailable))
 @end
 
 @interface Super
-@property (weak) NSFont *font;  // expected-error {{synthesizing __weak instance variable of type 'NSFont * __nullable', which does not support weak references}}
+@property (weak) NSFont *font;  // expected-error {{synthesizing __weak instance variable of type 'NSFont * _Nullable', which does not support weak references}}
 @end
 
 
diff --git a/test/SemaObjC/format-strings-objc.m b/test/SemaObjC/format-strings-objc.m
index f4c528c..079460c 100644
--- a/test/SemaObjC/format-strings-objc.m
+++ b/test/SemaObjC/format-strings-objc.m
@@ -251,3 +251,16 @@ void testUnicode() {
   NSLog(@"%C", 0x202200); // expected-warning{{format specifies type 'unichar' (aka 'unsigned short') but the argument has type 'int'}}
 }
 
+// Test Objective-C modifier flags.
+void testObjCModifierFlags() {
+  NSLog(@"%[]@", @"Foo"); // expected-warning {{missing object format flag}}
+  NSLog(@"%[", @"Foo"); // expected-warning {{incomplete format specifier}}
+  NSLog(@"%[tt", @"Foo");  // expected-warning {{incomplete format specifier}}
+  NSLog(@"%[tt]@", @"Foo"); // no-warning
+  NSLog(@"%[tt]@ %s", @"Foo", "hello"); // no-warning
+  NSLog(@"%s %[tt]@", "hello", @"Foo"); // no-warning
+  NSLog(@"%[blark]@", @"Foo"); // expected-warning {{'blark' is not a valid object format flag}}
+  NSLog(@"%2$[tt]@ %1$[tt]@", @"Foo", @"Bar"); // no-warning
+  NSLog(@"%2$[tt]@ %1$[tt]s", @"Foo", @"Bar"); // expected-warning {{object format flags cannot be used with 's' conversion specifier}}
+}
+
diff --git a/test/SemaObjC/nullability-arc.m b/test/SemaObjC/nullability-arc.m
index 917a808..1c303e8 100644
--- a/test/SemaObjC/nullability-arc.m
+++ b/test/SemaObjC/nullability-arc.m
@@ -5,6 +5,6 @@ __attribute__((objc_root_class))
 @end
 
 // ARC qualifiers stacked with nullability.
-void accepts_arc_qualified(NSFoo * __unsafe_unretained __nonnull obj) {
+void accepts_arc_qualified(NSFoo * __unsafe_unretained _Nonnull obj) {
   accepts_arc_qualified(0); // expected-warning{{null passed to a callee that requires a non-null argument}}
 }
diff --git a/test/SemaObjC/nullability.m b/test/SemaObjC/nullability.m
index ca8c2fc..36ac6b9 100644
--- a/test/SemaObjC/nullability.m
+++ b/test/SemaObjC/nullability.m
@@ -2,29 +2,29 @@
 
 __attribute__((objc_root_class))
 @interface NSFoo
-- (void)methodTakingIntPtr:(__nonnull int *)ptr;
-- (__nonnull int *)methodReturningIntPtr;
+- (void)methodTakingIntPtr:(_Nonnull int *)ptr;
+- (_Nonnull int *)methodReturningIntPtr;
 @end
 
 // Nullability applies to all pointer types.
-typedef NSFoo * __nonnull nonnull_NSFoo_ptr;
-typedef id __nonnull nonnull_id;
-typedef SEL __nonnull nonnull_SEL;
+typedef NSFoo * _Nonnull nonnull_NSFoo_ptr;
+typedef id _Nonnull nonnull_id;
+typedef SEL _Nonnull nonnull_SEL;
 
 // Nullability can move into Objective-C pointer types.
-typedef __nonnull NSFoo * nonnull_NSFoo_ptr_2;
+typedef _Nonnull NSFoo * nonnull_NSFoo_ptr_2;
 
 // Conflicts from nullability moving into Objective-C pointer type.
-typedef __nonnull NSFoo * __nullable conflict_NSFoo_ptr_2; // expected-error{{'__nonnull' cannot be applied to non-pointer type 'NSFoo'}}
+typedef _Nonnull NSFoo * _Nullable conflict_NSFoo_ptr_2; // expected-error{{'_Nonnull' cannot be applied to non-pointer type 'NSFoo'}}
 
-void testBlocksPrinting(NSFoo * __nullable (^bp)(int)) {
-  int *ip = bp; // expected-error{{'NSFoo * __nullable (^)(int)'}}
+void testBlocksPrinting(NSFoo * _Nullable (^bp)(int)) {
+  int *ip = bp; // expected-error{{'NSFoo * _Nullable (^)(int)'}}
 }
 
-// Check returning nil from a __nonnull-returning method.
+// Check returning nil from a _Nonnull-returning method.
 @implementation NSFoo
-- (void)methodTakingIntPtr:(__nonnull int *)ptr { }
-- (__nonnull int *)methodReturningIntPtr {
+- (void)methodTakingIntPtr:(_Nonnull int *)ptr { }
+- (_Nonnull int *)methodReturningIntPtr {
   return 0; // no warning
 }
 @end
@@ -35,15 +35,15 @@ __attribute__((objc_root_class))
 - (nonnull NSFoo *)methodWithFoo:(nonnull NSFoo *)foo;
 
 - (nonnull NSFoo **)invalidMethod1; // expected-error{{nullability keyword 'nonnull' cannot be applied to multi-level pointer type 'NSFoo **'}}
-// expected-note@-1{{use nullability type specifier '__nonnull' to affect the innermost pointer type of 'NSFoo **'}}
-- (nonnull NSFoo * __nullable)conflictingMethod1; // expected-error{{nullability specifier '__nullable' conflicts with existing specifier '__nonnull'}}
-- (nonnull NSFoo * __nonnull)redundantMethod1; // expected-warning{{duplicate nullability specifier '__nonnull'}}
+// expected-note@-1{{use nullability type specifier '_Nonnull' to affect the innermost pointer type of 'NSFoo **'}}
+- (nonnull NSFoo * _Nullable)conflictingMethod1; // expected-error{{nullability specifier '_Nullable' conflicts with existing specifier '_Nonnull'}}
+- (nonnull NSFoo * _Nonnull)redundantMethod1; // expected-warning{{duplicate nullability specifier '_Nonnull'}}
 
 @property(nonnull,retain) NSFoo *property1;
 @property(nullable,assign) NSFoo ** invalidProperty1; // expected-error{{nullability keyword 'nullable' cannot be applied to multi-level pointer type 'NSFoo **'}}
-// expected-note@-1{{use nullability type specifier '__nullable' to affect the innermost pointer type of 'NSFoo **'}}
-@property(null_unspecified,retain) NSFoo * __nullable conflictingProperty1; // expected-error{{nullability specifier '__nullable' conflicts with existing specifier '__null_unspecified'}}
-@property(retain,nonnull) NSFoo * __nonnull redundantProperty1; // expected-warning{{duplicate nullability specifier '__nonnull'}}
+// expected-note@-1{{use nullability type specifier '_Nullable' to affect the innermost pointer type of 'NSFoo **'}}
+@property(null_unspecified,retain) NSFoo * _Nullable conflictingProperty1; // expected-error{{nullability specifier '_Nullable' conflicts with existing specifier '_Null_unspecified'}}
+@property(retain,nonnull) NSFoo * _Nonnull redundantProperty1; // expected-warning{{duplicate nullability specifier '_Nonnull'}}
 
 @property(null_unspecified,retain,nullable) NSFoo *conflictingProperty3; // expected-error{{nullability specifier 'nullable' conflicts with existing specifier 'null_unspecified'}}
 @property(nullable,retain,nullable) NSFoo *redundantProperty3; // expected-warning{{duplicate nullability specifier 'nullable'}}
@@ -52,19 +52,19 @@ __attribute__((objc_root_class))
 @interface NSBar ()
 @property(nonnull,retain) NSFoo *property2;
 @property(nullable,assign) NSFoo ** invalidProperty2; // expected-error{{nullability keyword 'nullable' cannot be applied to multi-level pointer type 'NSFoo **'}}
-// expected-note@-1{{use nullability type specifier '__nullable' to affect the innermost pointer type of 'NSFoo **'}}
-@property(null_unspecified,retain) NSFoo * __nullable conflictingProperty2; // expected-error{{nullability specifier '__nullable' conflicts with existing specifier '__null_unspecified'}}
-@property(retain,nonnull) NSFoo * __nonnull redundantProperty2; // expected-warning{{duplicate nullability specifier '__nonnull'}}
+// expected-note@-1{{use nullability type specifier '_Nullable' to affect the innermost pointer type of 'NSFoo **'}}
+@property(null_unspecified,retain) NSFoo * _Nullable conflictingProperty2; // expected-error{{nullability specifier '_Nullable' conflicts with existing specifier '_Null_unspecified'}}
+@property(retain,nonnull) NSFoo * _Nonnull redundantProperty2; // expected-warning{{duplicate nullability specifier '_Nonnull'}}
 @end
 
-void test_accepts_nonnull_null_pointer_literal(NSFoo *foo, __nonnull NSBar *bar) {
+void test_accepts_nonnull_null_pointer_literal(NSFoo *foo, _Nonnull NSBar *bar) {
   [foo methodTakingIntPtr: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}}
   [bar methodWithFoo: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}}
   bar.property1 = 0; // expected-warning{{null passed to a callee that requires a non-null argument}}
   bar.property2 = 0; // expected-warning{{null passed to a callee that requires a non-null argument}}
   [bar setProperty1: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}}
   [bar setProperty2: 0]; // expected-warning{{null passed to a callee that requires a non-null argument}}
-  int *ptr = bar.property1; // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'NSFoo * __nonnull'}}
+  int *ptr = bar.property1; // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'NSFoo * _Nonnull'}}
 }
 
 // Check returning nil from a nonnull-returning method.
@@ -82,7 +82,7 @@ void test_accepts_nonnull_null_pointer_literal(NSFoo *foo, __nonnull NSBar *bar)
 }
 - (NSFoo *)redundantMethod1 {
   int *ip = 0;
-  return ip; // expected-warning{{result type 'NSFoo * __nonnull'}}
+  return ip; // expected-warning{{result type 'NSFoo * _Nonnull'}}
 }
 @end
 
@@ -95,8 +95,8 @@ __attribute__((objc_root_class))
 
 @implementation NSMerge
 - (NSFoo *)methodA:(NSFoo*)foo {
-  int *ptr = foo; // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'NSFoo * __nonnull'}}
-  return ptr; // expected-warning{{result type 'NSFoo * __nonnull'}}
+  int *ptr = foo; // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'NSFoo * _Nonnull'}}
+  return ptr; // expected-warning{{result type 'NSFoo * _Nonnull'}}
 }
 
 - (nullable NSFoo *)methodB:(null_unspecified NSFoo*)foo { // expected-error{{nullability specifier 'nullable' conflicts with existing specifier 'nonnull'}} \
@@ -106,7 +106,7 @@ __attribute__((objc_root_class))
 
 - (nonnull NSFoo *)methodC:(nullable NSFoo*)foo {
   int *ip = 0;
-  return ip; // expected-warning{{result type 'NSFoo * __nonnull'}}
+  return ip; // expected-warning{{result type 'NSFoo * _Nonnull'}}
 }
 @end
 
@@ -119,27 +119,27 @@ __attribute__((objc_root_class))
 @end
 
 void test_receiver_merge(NSMergeReceiver *none,
-                         __nonnull NSMergeReceiver *nonnull,
-                         __nullable NSMergeReceiver *nullable,
-                         __null_unspecified NSMergeReceiver *null_unspecified) {
+                         _Nonnull NSMergeReceiver *nonnull,
+                         _Nullable NSMergeReceiver *nullable,
+                         _Null_unspecified NSMergeReceiver *null_unspecified) {
   int *ptr;
 
-  ptr = [nullable returnsNullable]; // expected-warning{{'id __nullable'}}
-  ptr = [nullable returnsNullUnspecified]; // expected-warning{{'id __nullable'}}
-  ptr = [nullable returnsNonNull]; // expected-warning{{'id __nullable'}}
-  ptr = [nullable returnsNone]; // expected-warning{{'id __nullable'}}
+  ptr = [nullable returnsNullable]; // expected-warning{{'id _Nullable'}}
+  ptr = [nullable returnsNullUnspecified]; // expected-warning{{'id _Nullable'}}
+  ptr = [nullable returnsNonNull]; // expected-warning{{'id _Nullable'}}
+  ptr = [nullable returnsNone]; // expected-warning{{'id _Nullable'}}
 
-  ptr = [null_unspecified returnsNullable]; // expected-warning{{'id __nullable'}}
-  ptr = [null_unspecified returnsNullUnspecified]; // expected-warning{{'id __null_unspecified'}}
-  ptr = [null_unspecified returnsNonNull]; // expected-warning{{'id __null_unspecified'}}
+  ptr = [null_unspecified returnsNullable]; // expected-warning{{'id _Nullable'}}
+  ptr = [null_unspecified returnsNullUnspecified]; // expected-warning{{'id _Null_unspecified'}}
+  ptr = [null_unspecified returnsNonNull]; // expected-warning{{'id _Null_unspecified'}}
   ptr = [null_unspecified returnsNone]; // expected-warning{{'id'}}
 
-  ptr = [nonnull returnsNullable]; // expected-warning{{'id __nullable'}}
-  ptr = [nonnull returnsNullUnspecified]; // expected-warning{{'id __null_unspecified'}}
-  ptr = [nonnull returnsNonNull]; // expected-warning{{'id __nonnull'}}
+  ptr = [nonnull returnsNullable]; // expected-warning{{'id _Nullable'}}
+  ptr = [nonnull returnsNullUnspecified]; // expected-warning{{'id _Null_unspecified'}}
+  ptr = [nonnull returnsNonNull]; // expected-warning{{'id _Nonnull'}}
   ptr = [nonnull returnsNone]; // expected-warning{{'id'}}
 
-  ptr = [none returnsNullable]; // expected-warning{{'id __nullable'}}
+  ptr = [none returnsNullable]; // expected-warning{{'id _Nullable'}}
   ptr = [none returnsNullUnspecified]; // expected-warning{{'id'}}
   ptr = [none returnsNonNull]; // expected-warning{{'id'}}
   ptr = [none returnsNone]; // expected-warning{{'id'}}
@@ -157,19 +157,19 @@ __attribute__((objc_root_class))
 - (nullable instancetype)returnMe;
 + (nullable instancetype)returnInstanceOfMe;
 
-- (nonnull instancetype __nullable)initWithBlah2:(nonnull id)blah; // expected-error {{nullability specifier '__nullable' conflicts with existing specifier '__nonnull'}}
-- (instancetype __nullable)returnMe2;
-+ (__nonnull instancetype)returnInstanceOfMe2;
+- (nonnull instancetype _Nullable)initWithBlah2:(nonnull id)blah; // expected-error {{nullability specifier '_Nullable' conflicts with existing specifier '_Nonnull'}}
+- (instancetype _Nullable)returnMe2;
++ (_Nonnull instancetype)returnInstanceOfMe2;
 @end
 
-void test_instancetype(InitializableClass * __nonnull ic, id __nonnull object) {
-  int *ip = [ic returnMe]; // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'InitializableClass * __nullable'}}
-  ip = [InitializableClass returnMe]; // expected-warning{{incompatible pointer types assigning to 'int *' from 'id __nullable'}}
-  ip = [InitializableClass returnInstanceOfMe]; // expected-warning{{incompatible pointer types assigning to 'int *' from 'InitializableClass * __nullable'}}
-  ip = [object returnMe]; // expected-warning{{incompatible pointer types assigning to 'int *' from 'id __nullable'}}
+void test_instancetype(InitializableClass * _Nonnull ic, id _Nonnull object) {
+  int *ip = [ic returnMe]; // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'InitializableClass * _Nullable'}}
+  ip = [InitializableClass returnMe]; // expected-warning{{incompatible pointer types assigning to 'int *' from 'id _Nullable'}}
+  ip = [InitializableClass returnInstanceOfMe]; // expected-warning{{incompatible pointer types assigning to 'int *' from 'InitializableClass * _Nullable'}}
+  ip = [object returnMe]; // expected-warning{{incompatible pointer types assigning to 'int *' from 'id _Nullable'}}
 
-  ip = [ic returnMe2]; // expected-warning{{incompatible pointer types assigning to 'int *' from 'InitializableClass * __nullable'}}
-  ip = [InitializableClass returnInstanceOfMe2]; // expected-warning{{incompatible pointer types assigning to 'int *' from 'InitializableClass * __nonnull'}}
+  ip = [ic returnMe2]; // expected-warning{{incompatible pointer types assigning to 'int *' from 'InitializableClass * _Nullable'}}
+  ip = [InitializableClass returnInstanceOfMe2]; // expected-warning{{incompatible pointer types assigning to 'int *' from 'InitializableClass * _Nonnull'}}
 }
 
 // Check null_resettable getters/setters.
@@ -184,14 +184,14 @@ __attribute__((objc_root_class))
 @end
 
 void test_null_resettable(NSResettable *r, int *ip) {
-  [r setResettable1:ip]; // expected-warning{{incompatible pointer types sending 'int *' to parameter of type 'NSResettable * __nullable'}}
-  r.resettable1 = ip; // expected-warning{{incompatible pointer types assigning to 'NSResettable * __nullable' from 'int *'}}
+  [r setResettable1:ip]; // expected-warning{{incompatible pointer types sending 'int *' to parameter of type 'NSResettable * _Nullable'}}
+  r.resettable1 = ip; // expected-warning{{incompatible pointer types assigning to 'NSResettable * _Nullable' from 'int *'}}
 }
 
 @implementation NSResettable // expected-warning{{synthesized setter 'setResettable4:' for null_resettable property 'resettable4' does not handle nil}}
 - (NSResettable *)resettable1 {
   int *ip = 0;
-  return ip; // expected-warning{{result type 'NSResettable * __nonnull'}}
+  return ip; // expected-warning{{result type 'NSResettable * _Nonnull'}}
 }
 
 - (void)setResettable1:(NSResettable *)param {
@@ -218,15 +218,15 @@ void test_null_resettable(NSResettable *r, int *ip) {
 
 void testMultiProp(MultiProp *foo) {
   int *ip;
-  ip = foo.a; // expected-warning{{from 'id __nullable'}}
-  ip = foo.d; // expected-warning{{from 'MultiProp * __nullable'}}
-  ip = foo.e; // expected-error{{incompatible type 'MultiProp *(^ __nullable)(int)'}}
+  ip = foo.a; // expected-warning{{from 'id _Nullable'}}
+  ip = foo.d; // expected-warning{{from 'MultiProp * _Nullable'}}
+  ip = foo.e; // expected-error{{incompatible type 'MultiProp *(^ _Nullable)(int)'}}
 }
 
 void testBlockLiterals() {
   (void)(^id(void) { return 0; });
-  (void)(^id __nullable (void) { return 0; });
-  (void)(^ __nullable id(void) { return 0; });
+  (void)(^id _Nullable (void) { return 0; });
+  (void)(^ _Nullable id(void) { return 0; });
 
-  int *x = (^ __nullable id(void) { return 0; })(); // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'id __nullable'}}
+  int *x = (^ _Nullable id(void) { return 0; })(); // expected-warning{{incompatible pointer types initializing 'int *' with an expression of type 'id _Nullable'}}
 }
diff --git a/test/SemaObjC/nullable-weak-property.m b/test/SemaObjC/nullable-weak-property.m
index 617ff4e..faafd20 100644
--- a/test/SemaObjC/nullable-weak-property.m
+++ b/test/SemaObjC/nullable-weak-property.m
@@ -5,7 +5,7 @@
 @interface NSObject @end
 
 @class NSFoo;
-void foo (NSFoo * __nonnull);
+void foo (NSFoo * _Nonnull);
 
 @interface NSBar : NSObject
 @property(weak) NSFoo *property1;
@@ -13,6 +13,6 @@ void foo (NSFoo * __nonnull);
 
 @implementation NSBar 
 - (void) Meth {
-   foo (self.property1); // expected-warning {{implicit conversion from nullable pointer 'NSFoo * __nullable' to non-nullable pointer type 'NSFoo * __nonnull'}}
+   foo (self.property1); // expected-warning {{implicit conversion from nullable pointer 'NSFoo * _Nullable' to non-nullable pointer type 'NSFoo * _Nonnull'}}
 }
 @end
diff --git a/test/SemaObjCXX/Inputs/nullability-consistency-1.h b/test/SemaObjCXX/Inputs/nullability-consistency-1.h
index 4d6bf79..6ab48fe 100644
--- a/test/SemaObjCXX/Inputs/nullability-consistency-1.h
+++ b/test/SemaObjCXX/Inputs/nullability-consistency-1.h
@@ -1,6 +1,6 @@
 void f1(int *ptr); // expected-warning{{pointer is missing a nullability type specifier}}
 
-void f2(int * __nonnull);
+void f2(int * _Nonnull);
 
 #include "nullability-consistency-2.h"
 
diff --git a/test/SemaObjCXX/Inputs/nullability-consistency-2.h b/test/SemaObjCXX/Inputs/nullability-consistency-2.h
index 8efdfa8..4517738 100644
--- a/test/SemaObjCXX/Inputs/nullability-consistency-2.h
+++ b/test/SemaObjCXX/Inputs/nullability-consistency-2.h
@@ -1,4 +1,4 @@
-void g1(int * __nonnull);
+void g1(int * _Nonnull);
 
 void g2(int (^block)(int, int)); // expected-warning{{block pointer is missing a nullability type specifier}}
 
diff --git a/test/SemaObjCXX/Inputs/nullability-consistency-3.h b/test/SemaObjCXX/Inputs/nullability-consistency-3.h
index a0c0d38..520d1a4 100644
--- a/test/SemaObjCXX/Inputs/nullability-consistency-3.h
+++ b/test/SemaObjCXX/Inputs/nullability-consistency-3.h
@@ -1 +1 @@
-void double_declarator1(int *__nonnull *); // expected-warning{{pointer is missing a nullability type specifier (__nonnull, __nullable, or __null_unspecified)}}
+void double_declarator1(int *_Nonnull *); // expected-warning{{pointer is missing a nullability type specifier (_Nonnull, _Nullable, or _Null_unspecified)}}
diff --git a/test/SemaObjCXX/Inputs/nullability-consistency-4.h b/test/SemaObjCXX/Inputs/nullability-consistency-4.h
index 984280c..ac227a0 100644
--- a/test/SemaObjCXX/Inputs/nullability-consistency-4.h
+++ b/test/SemaObjCXX/Inputs/nullability-consistency-4.h
@@ -1 +1 @@
-void double_declarator1(int * * __nonnull); // expected-warning{{pointer is missing a nullability type specifier (__nonnull, __nullable, or __null_unspecified)}}
+void double_declarator1(int * * _Nonnull); // expected-warning{{pointer is missing a nullability type specifier (_Nonnull, _Nullable, or _Null_unspecified)}}
diff --git a/test/SemaObjCXX/Inputs/nullability-consistency-5.h b/test/SemaObjCXX/Inputs/nullability-consistency-5.h
index 3a685af..1c74ab8 100644
--- a/test/SemaObjCXX/Inputs/nullability-consistency-5.h
+++ b/test/SemaObjCXX/Inputs/nullability-consistency-5.h
@@ -8,7 +8,7 @@ void suppress1(SUPPRESS_NULLABILITY_WARNING(int *) ptr); // no warning
 
 void shouldwarn5(int *ptr); //expected-warning{{missing a nullability type specifier}}
 
-void trigger5(int * __nonnull);
+void trigger5(int * _Nonnull);
 
 void suppress2(SUPPRESS_NULLABILITY_WARNING(int *) ptr); // no warning
 
diff --git a/test/SemaObjCXX/Inputs/nullability-consistency-8.h b/test/SemaObjCXX/Inputs/nullability-consistency-8.h
index 890bb4d..2425a70 100644
--- a/test/SemaObjCXX/Inputs/nullability-consistency-8.h
+++ b/test/SemaObjCXX/Inputs/nullability-consistency-8.h
@@ -1,4 +1,4 @@
-typedef int* __nonnull mynonnull;
+typedef int* _Nonnull mynonnull;
 
 __attribute__((objc_root_class))
 @interface typedefClass
@@ -13,15 +13,15 @@ void func3(int *); // expected-warning{{pointer is missing a nullability type sp
 typedef void *CFTypeRef;
 void cf1(CFTypeRef * p CF_RETURNS_NOT_RETAINED); // expected-warning {{pointer is missing a nullability type specifier}}
 
-void cf2(CFTypeRef * __nullable p CF_RETURNS_NOT_RETAINED);
-void cf3(CFTypeRef * __nonnull p CF_RETURNS_NOT_RETAINED);
+void cf2(CFTypeRef * _Nullable p CF_RETURNS_NOT_RETAINED);
+void cf3(CFTypeRef * _Nonnull p CF_RETURNS_NOT_RETAINED);
 
-void cf4(CFTypeRef __nullable * __nullable p CF_RETURNS_NOT_RETAINED);
-void cf5(CFTypeRef __nonnull * __nullable p CF_RETURNS_NOT_RETAINED);
+void cf4(CFTypeRef _Nullable * _Nullable p CF_RETURNS_NOT_RETAINED);
+void cf5(CFTypeRef _Nonnull * _Nullable p CF_RETURNS_NOT_RETAINED);
 
-void cf6(CFTypeRef * __nullable CF_RETURNS_NOT_RETAINED p);
-void cf7(CF_RETURNS_NOT_RETAINED CFTypeRef * __nonnull p);
+void cf6(CFTypeRef * _Nullable CF_RETURNS_NOT_RETAINED p);
+void cf7(CF_RETURNS_NOT_RETAINED CFTypeRef * _Nonnull p);
 
-typedef CFTypeRef __nullable *CFTypeRefPtr;
+typedef CFTypeRef _Nullable *CFTypeRefPtr;
 void cfp1(CFTypeRefPtr p CF_RETURNS_NOT_RETAINED); // expected-warning {{pointer is missing a nullability type specifier}}
-void cfp2(CFTypeRefPtr __nonnull p CF_RETURNS_NOT_RETAINED);
+void cfp2(CFTypeRefPtr _Nonnull p CF_RETURNS_NOT_RETAINED);
diff --git a/test/SemaObjCXX/Inputs/nullability-consistency-system/nullability-consistency-system.h b/test/SemaObjCXX/Inputs/nullability-consistency-system/nullability-consistency-system.h
index 6dbca16..9161af1 100644
--- a/test/SemaObjCXX/Inputs/nullability-consistency-system/nullability-consistency-system.h
+++ b/test/SemaObjCXX/Inputs/nullability-consistency-system/nullability-consistency-system.h
@@ -5,4 +5,4 @@ void system1(int *ptr);
 // expected-warning@-2{{pointer is missing a nullability type specifier}}
 #endif
 
-void system2(int * __nonnull);
+void system2(int * _Nonnull);
diff --git a/test/SemaObjCXX/Inputs/nullability-pragmas-1.h b/test/SemaObjCXX/Inputs/nullability-pragmas-1.h
index 9501116..4ac813d 100644
--- a/test/SemaObjCXX/Inputs/nullability-pragmas-1.h
+++ b/test/SemaObjCXX/Inputs/nullability-pragmas-1.h
@@ -25,19 +25,19 @@ void f3(A* obj);
 void f4(int (^block)(int, int));
 void f5(int_ptr x);
 void f6(A_ptr obj);
-void f7(int * __nullable x);
-void f8(A * __nullable obj);
+void f7(int * _Nullable x);
+void f8(A * _Nullable obj);
 void f9(int X::* mem_ptr);
 void f10(int (X::*mem_func)(int, int));
-void f11(int X::* __nullable mem_ptr);
-void f12(int (X::* __nullable mem_func)(int, int));
+void f11(int X::* _Nullable mem_ptr);
+void f12(int (X::* _Nullable mem_func)(int, int));
 
 int_ptr f13(void);
 A *f14(void);
 
-int * __null_unspecified f15(void);
-A * __null_unspecified f16(void);
-void f17(CFErrorRef *error); // expected-note{{no known conversion from 'A * __nonnull' to 'CFErrorRef  __nullable * __nullable' (aka '__CFError **') for 1st argument}}
+int * _Null_unspecified f15(void);
+A * _Null_unspecified f16(void);
+void f17(CFErrorRef *error); // expected-note{{no known conversion from 'A * _Nonnull' to 'CFErrorRef  _Nullable * _Nullable' (aka '__CFError **') for 1st argument}}
 void f18(A **); // expected-warning 2{{pointer is missing a nullability type specifier}}
 void f19(CFErrorRefPtr error); // expected-warning{{pointer is missing a nullability type specifier}}
 
@@ -64,14 +64,14 @@ void g5(int (**fp)(int, int)); // expected-warning 2{{pointer is missing a nulla
 
 int *global_int_ptr;
 
-// typedefs not inferred __nonnull
+// typedefs not inferred _Nonnull
 typedef int *int_ptr_2;
 
 typedef int * // expected-warning{{pointer is missing a nullability type specifier}}
             *int_ptr_ptr;
 
 static inline void f30(void) {
-  float *fp = global_int_ptr; // expected-error{{cannot initialize a variable of type 'float *' with an lvalue of type 'int * __nonnull'}}
+  float *fp = global_int_ptr; // expected-error{{cannot initialize a variable of type 'float *' with an lvalue of type 'int * _Nonnull'}}
 
   int_ptr_2 ip2;
   float *fp2 = ip2; // expected-error{{cannot initialize a variable of type 'float *' with an lvalue of type 'int_ptr_2' (aka 'int *')}}
@@ -83,7 +83,7 @@ static inline void f30(void) {
 @interface AA : A {
 @public
   id ivar1;
-  __nonnull id ivar2;
+  _Nonnull id ivar2;
 }
 @end
 
@@ -92,8 +92,8 @@ static inline void f30(void) {
 void f20(A *a); // expected-warning{{pointer is missing a nullability type specifier}}
 void f21(int_ptr x); // expected-warning{{pointer is missing a nullability type specifier}}
 void f22(A_ptr y); // expected-warning{{pointer is missing a nullability type specifier}}
-void f23(int_ptr __nullable x);
-void f24(A_ptr __nullable y);
+void f23(int_ptr _Nullable x);
+void f24(A_ptr _Nullable y);
 void f25(int_ptr_2 x); // expected-warning{{pointer is missing a nullability type specifier}}
 
 @interface A(OutsidePragmas1)
diff --git a/test/SemaObjCXX/nullability-consistency.mm b/test/SemaObjCXX/nullability-consistency.mm
index acb972d..6921d8b 100644
--- a/test/SemaObjCXX/nullability-consistency.mm
+++ b/test/SemaObjCXX/nullability-consistency.mm
@@ -13,4 +13,4 @@
 
 void h1(int *ptr) { } // don't warn
 
-void h2(int * __nonnull) { }
+void h2(int * _Nonnull) { }
diff --git a/test/SemaObjCXX/nullability-pragmas.mm b/test/SemaObjCXX/nullability-pragmas.mm
index 0c61a30..dbf4f37 100644
--- a/test/SemaObjCXX/nullability-pragmas.mm
+++ b/test/SemaObjCXX/nullability-pragmas.mm
@@ -7,7 +7,11 @@
 #  error assume_nonnull feature is not set
 #endif
 
-void test_pragmas_1(A * __nonnull a, AA * __nonnull aa) {
+#if !__has_extension(assume_nonnull)
+#  error assume_nonnull extension is not set
+#endif
+
+void test_pragmas_1(A * _Nonnull a, AA * _Nonnull aa) {
   f1(0); // okay: no nullability annotations
   f2(0); // expected-warning{{null passed to a callee that requires a non-null argument}}
   f3(0); // expected-warning{{null passed to a callee that requires a non-null argument}}
@@ -23,19 +27,19 @@ void test_pragmas_1(A * __nonnull a, AA * __nonnull aa) {
   [a method1:0]; // expected-warning{{null passed to a callee that requires a non-null argument}}
 
   f17(a); // expected-error{{no matching function for call to 'f17'}}
-  [a method3: a]; // expected-error{{cannot initialize a parameter of type 'NSError * __nullable * __nullable' with an lvalue of type 'A * __nonnull'}}
-  [a method4: a]; // expected-error{{cannot initialize a parameter of type 'NSErrorPtr  __nullable * __nullable' (aka 'NSError **') with an lvalue of type 'A * __nonnull'}}
+  [a method3: a]; // expected-error{{cannot initialize a parameter of type 'NSError * _Nullable * _Nullable' with an lvalue of type 'A * _Nonnull'}}
+  [a method4: a]; // expected-error{{cannot initialize a parameter of type 'NSErrorPtr  _Nullable * _Nullable' (aka 'NSError **') with an lvalue of type 'A * _Nonnull'}}
 
   float *ptr;
-  ptr = f13(); // expected-error{{assigning to 'float *' from incompatible type 'int_ptr __nonnull' (aka 'int *')}}
-  ptr = f14(); // expected-error{{assigning to 'float *' from incompatible type 'A * __nonnull'}}
-  ptr = [a method1:a]; // expected-error{{assigning to 'float *' from incompatible type 'A * __nonnull'}}
-  ptr = a.aProp; // expected-error{{assigning to 'float *' from incompatible type 'A * __nonnull'}}
-  ptr = global_int_ptr; // expected-error{{assigning to 'float *' from incompatible type 'int * __nonnull'}}
-  ptr = f15(); // expected-error{{assigning to 'float *' from incompatible type 'int * __null_unspecified'}}
-  ptr = f16(); // expected-error{{assigning to 'float *' from incompatible type 'A * __null_unspecified'}}
-  ptr = [a method2]; // expected-error{{assigning to 'float *' from incompatible type 'A * __null_unspecified'}}
+  ptr = f13(); // expected-error{{assigning to 'float *' from incompatible type 'int_ptr _Nonnull' (aka 'int *')}}
+  ptr = f14(); // expected-error{{assigning to 'float *' from incompatible type 'A * _Nonnull'}}
+  ptr = [a method1:a]; // expected-error{{assigning to 'float *' from incompatible type 'A * _Nonnull'}}
+  ptr = a.aProp; // expected-error{{assigning to 'float *' from incompatible type 'A * _Nonnull'}}
+  ptr = global_int_ptr; // expected-error{{assigning to 'float *' from incompatible type 'int * _Nonnull'}}
+  ptr = f15(); // expected-error{{assigning to 'float *' from incompatible type 'int * _Null_unspecified'}}
+  ptr = f16(); // expected-error{{assigning to 'float *' from incompatible type 'A * _Null_unspecified'}}
+  ptr = [a method2]; // expected-error{{assigning to 'float *' from incompatible type 'A * _Null_unspecified'}}
 
   ptr = aa->ivar1; // expected-error{{from incompatible type 'id'}}
-  ptr = aa->ivar2; // expected-error{{from incompatible type 'id __nonnull'}}
+  ptr = aa->ivar2; // expected-error{{from incompatible type 'id _Nonnull'}}
 }
diff --git a/test/SemaTemplate/instantiate-explicitly-after-fatal.cpp b/test/SemaTemplate/instantiate-explicitly-after-fatal.cpp
new file mode 100644
index 0000000..9693d2f
--- /dev/null
+++ b/test/SemaTemplate/instantiate-explicitly-after-fatal.cpp
@@ -0,0 +1,9 @@
+// RUN: not %clang_cc1 -fsyntax-only -std=c++11 -ferror-limit 1 %s 2>&1 | FileCheck %s
+unknown_type foo(unknown_type);
+// CHECK: fatal error: too many errors emitted, stopping now
+
+template <typename>
+class Bar {};
+
+extern template class Bar<int>;
+template class Bar<int>;
diff --git a/test/SemaTemplate/instantiate-local-class.cpp b/test/SemaTemplate/instantiate-local-class.cpp
index f58d7a4..c0ea6a0 100644
--- a/test/SemaTemplate/instantiate-local-class.cpp
+++ b/test/SemaTemplate/instantiate-local-class.cpp
@@ -394,3 +394,57 @@ void f() {
 
 void g() { f<void>(); }
 }
+
+
+namespace PR21332 {
+  template<typename T> void f1() {
+    struct S {  // expected-note{{in instantiation of member class 'S' requested here}}
+      void g1(int n = T::error);  // expected-error{{type 'int' cannot be used prior to '::' because it has no members}}
+    };
+  }
+  template void f1<int>();  // expected-note{{in instantiation of function template specialization 'PR21332::f1<int>' requested here}}
+
+  template<typename T> void f2() {
+    struct S {  // expected-note{{in instantiation of member class 'S' requested here}}
+      void g2() noexcept(T::error);  // expected-error{{type 'int' cannot be used prior to '::' because it has no members}}
+    };
+  }
+  template void f2<int>();  // expected-note{{in instantiation of function template specialization 'PR21332::f2<int>' requested here}}
+
+  template<typename T> void f3() {
+    enum S {
+      val = T::error;  // expected-error{{expected '}' or ','}} expected-error{{type 'int' cannot be used prior to '::' because it has no members}}
+    };
+  }
+  template void f3<int>();  //expected-note{{in instantiation of function template specialization 'PR21332::f3<int>' requested here}}
+
+  template<typename T> void f4() {
+    enum class S {
+      val = T::error;  // expected-error{{expected '}' or ','}} expected-error{{type 'int' cannot be used prior to '::' because it has no members}}
+    };
+  }
+  template void f4<int>();  // expected-note{{in instantiation of function template specialization 'PR21332::f4<int>' requested here}}
+
+  template<typename T> void f5() {
+    class S {  // expected-note {{in instantiation of default member initializer 'PR21332::f5()::S::val' requested here}}
+      int val = T::error;  // expected-error {{type 'int' cannot be used prior to '::' because it has no members}}
+     };
+  }
+  template void f5<int>();  // expected-note {{in instantiation of function template specialization 'PR21332::f5<int>' requested here}}
+
+  template<typename T> void f6() {
+    class S {  // expected-note {{in instantiation of member function 'PR21332::f6()::S::get' requested here}}
+      void get() {
+        class S2 {  // expected-note {{in instantiation of member class 'S2' requested here}}
+          void g1(int n = T::error);  // expected-error {{type 'int' cannot be used prior to '::' because it has no members}}
+        };
+      }
+    };
+  }
+  template void f6<int>();  // expected-note{{in instantiation of function template specialization 'PR21332::f6<int>' requested here}}
+
+  template<typename T> void f7() {
+    struct S { void g() noexcept(undefined_val); };  // expected-error{{use of undeclared identifier 'undefined_val'}}
+  }
+  template void f7<int>();
+}
diff --git a/test/lit.cfg b/test/lit.cfg
index 51e1e4f..0e947dd 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -452,14 +452,13 @@ if lit.util.which('xmllint'):
     config.available_features.add('xmllint')
 
 # Sanitizers.
-if config.llvm_use_sanitizer == "Address":
+if 'Address' in config.llvm_use_sanitizer:
     config.available_features.add("asan")
 else:
     config.available_features.add("not_asan")
-if (config.llvm_use_sanitizer == "Memory" or
-        config.llvm_use_sanitizer == "MemoryWithOrigins"):
+if 'Memory' in config.llvm_use_sanitizer:
     config.available_features.add("msan")
-if config.llvm_use_sanitizer == "Undefined":
+if 'Undefined' in config.llvm_use_sanitizer:
     config.available_features.add("ubsan")
 else:
     config.available_features.add("not_ubsan")
diff --git a/tools/clang-fuzzer/ClangFuzzer.cpp b/tools/clang-fuzzer/ClangFuzzer.cpp
index 17ef052..124911c 100644
--- a/tools/clang-fuzzer/ClangFuzzer.cpp
+++ b/tools/clang-fuzzer/ClangFuzzer.cpp
@@ -39,5 +39,8 @@ extern "C" void LLVMFuzzerTestOneInput(uint8_t *data, size_t size) {
   Invocation->getPreprocessorOpts().addRemappedFile("./test.cc", Input.release());
   std::unique_ptr<tooling::ToolAction> action(
       tooling::newFrontendActionFactory<clang::SyntaxOnlyAction>());
-  action->runInvocation(Invocation.release(), Files.get(), &Diags);
+  std::shared_ptr<PCHContainerOperations> PCHContainerOps =
+      std::make_shared<RawPCHContainerOperations>();
+  action->runInvocation(Invocation.release(), Files.get(), PCHContainerOps,
+                        &Diags);
 }
diff --git a/tools/driver/cc1as_main.cpp b/tools/driver/cc1as_main.cpp
index f7ac17f..aa92541 100644
--- a/tools/driver/cc1as_main.cpp
+++ b/tools/driver/cc1as_main.cpp
@@ -161,68 +161,68 @@ bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts,
 
   const unsigned IncludedFlagsBitmask = options::CC1AsOption;
   unsigned MissingArgIndex, MissingArgCount;
-  std::unique_ptr<InputArgList> Args(
-      OptTbl->ParseArgs(Argv.begin(), Argv.end(), MissingArgIndex, MissingArgCount,
-                        IncludedFlagsBitmask));
+  InputArgList Args = OptTbl->ParseArgs(Argv, MissingArgIndex, MissingArgCount,
+                                        IncludedFlagsBitmask);
 
   // Check for missing argument error.
   if (MissingArgCount) {
     Diags.Report(diag::err_drv_missing_argument)
-        << Args->getArgString(MissingArgIndex) << MissingArgCount;
+        << Args.getArgString(MissingArgIndex) << MissingArgCount;
     Success = false;
   }
 
   // Issue errors on unknown arguments.
-  for (const Arg *A : Args->filtered(OPT_UNKNOWN)) {
-    Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(*Args);
+  for (const Arg *A : Args.filtered(OPT_UNKNOWN)) {
+    Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args);
     Success = false;
   }
 
   // Construct the invocation.
 
   // Target Options
-  Opts.Triple = llvm::Triple::normalize(Args->getLastArgValue(OPT_triple));
-  Opts.CPU = Args->getLastArgValue(OPT_target_cpu);
-  Opts.Features = Args->getAllArgValues(OPT_target_feature);
+  Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple));
+  Opts.CPU = Args.getLastArgValue(OPT_target_cpu);
+  Opts.Features = Args.getAllArgValues(OPT_target_feature);
 
   // Use the default target triple if unspecified.
   if (Opts.Triple.empty())
     Opts.Triple = llvm::sys::getDefaultTargetTriple();
 
   // Language Options
-  Opts.IncludePaths = Args->getAllArgValues(OPT_I);
-  Opts.NoInitialTextSection = Args->hasArg(OPT_n);
-  Opts.SaveTemporaryLabels = Args->hasArg(OPT_msave_temp_labels);
-  Opts.GenDwarfForAssembly = Args->hasArg(OPT_g_Flag);
-  Opts.CompressDebugSections = Args->hasArg(OPT_compress_debug_sections);
-  if (Args->hasArg(OPT_gdwarf_2))
+  Opts.IncludePaths = Args.getAllArgValues(OPT_I);
+  Opts.NoInitialTextSection = Args.hasArg(OPT_n);
+  Opts.SaveTemporaryLabels = Args.hasArg(OPT_msave_temp_labels);
+  Opts.GenDwarfForAssembly = Args.hasArg(OPT_g_Flag);
+  Opts.CompressDebugSections = Args.hasArg(OPT_compress_debug_sections);
+  if (Args.hasArg(OPT_gdwarf_2))
     Opts.DwarfVersion = 2;
-  if (Args->hasArg(OPT_gdwarf_3))
+  if (Args.hasArg(OPT_gdwarf_3))
     Opts.DwarfVersion = 3;
-  if (Args->hasArg(OPT_gdwarf_4))
+  if (Args.hasArg(OPT_gdwarf_4))
     Opts.DwarfVersion = 4;
-  Opts.DwarfDebugFlags = Args->getLastArgValue(OPT_dwarf_debug_flags);
-  Opts.DwarfDebugProducer = Args->getLastArgValue(OPT_dwarf_debug_producer);
-  Opts.DebugCompilationDir = Args->getLastArgValue(OPT_fdebug_compilation_dir);
-  Opts.MainFileName = Args->getLastArgValue(OPT_main_file_name);
+  Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags);
+  Opts.DwarfDebugProducer = Args.getLastArgValue(OPT_dwarf_debug_producer);
+  Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir);
+  Opts.MainFileName = Args.getLastArgValue(OPT_main_file_name);
 
   // Frontend Options
-  if (Args->hasArg(OPT_INPUT)) {
+  if (Args.hasArg(OPT_INPUT)) {
     bool First = true;
-    for (arg_iterator it = Args->filtered_begin(OPT_INPUT),
-           ie = Args->filtered_end(); it != ie; ++it, First=false) {
+    for (arg_iterator it = Args.filtered_begin(OPT_INPUT),
+                      ie = Args.filtered_end();
+         it != ie; ++it, First = false) {
       const Arg *A = it;
       if (First)
         Opts.InputFile = A->getValue();
       else {
-        Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(*Args);
+        Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args);
         Success = false;
       }
     }
   }
-  Opts.LLVMArgs = Args->getAllArgValues(OPT_mllvm);
-  Opts.OutputPath = Args->getLastArgValue(OPT_o);
-  if (Arg *A = Args->getLastArg(OPT_filetype)) {
+  Opts.LLVMArgs = Args.getAllArgValues(OPT_mllvm);
+  Opts.OutputPath = Args.getLastArgValue(OPT_o);
+  if (Arg *A = Args.getLastArg(OPT_filetype)) {
     StringRef Name = A->getValue();
     unsigned OutputType = StringSwitch<unsigned>(Name)
       .Case("asm", FT_Asm)
@@ -230,25 +230,24 @@ bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts,
       .Case("obj", FT_Obj)
       .Default(~0U);
     if (OutputType == ~0U) {
-      Diags.Report(diag::err_drv_invalid_value)
-        << A->getAsString(*Args) << Name;
+      Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name;
       Success = false;
     } else
       Opts.OutputType = FileType(OutputType);
   }
-  Opts.ShowHelp = Args->hasArg(OPT_help);
-  Opts.ShowVersion = Args->hasArg(OPT_version);
+  Opts.ShowHelp = Args.hasArg(OPT_help);
+  Opts.ShowVersion = Args.hasArg(OPT_version);
 
   // Transliterate Options
   Opts.OutputAsmVariant =
-      getLastArgIntValue(*Args.get(), OPT_output_asm_variant, 0, Diags);
-  Opts.ShowEncoding = Args->hasArg(OPT_show_encoding);
-  Opts.ShowInst = Args->hasArg(OPT_show_inst);
+      getLastArgIntValue(Args, OPT_output_asm_variant, 0, Diags);
+  Opts.ShowEncoding = Args.hasArg(OPT_show_encoding);
+  Opts.ShowInst = Args.hasArg(OPT_show_inst);
 
   // Assemble Options
-  Opts.RelaxAll = Args->hasArg(OPT_mrelax_all);
-  Opts.NoExecStack = Args->hasArg(OPT_mno_exec_stack);
-  Opts.FatalWarnings =  Args->hasArg(OPT_massembler_fatal_warnings);
+  Opts.RelaxAll = Args.hasArg(OPT_mrelax_all);
+  Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack);
+  Opts.FatalWarnings = Args.hasArg(OPT_massembler_fatal_warnings);
 
   return Success;
 }
diff --git a/tools/driver/driver.cpp b/tools/driver/driver.cpp
index ff81b8a..5925447 100644
--- a/tools/driver/driver.cpp
+++ b/tools/driver/driver.cpp
@@ -321,16 +321,16 @@ static void FixupDiagPrefixExeName(TextDiagnosticPrinter *DiagClient,
 // This lets us create the DiagnosticsEngine with a properly-filled-out
 // DiagnosticOptions instance.
 static DiagnosticOptions *
-CreateAndPopulateDiagOpts(SmallVectorImpl<const char *> &argv) {
+CreateAndPopulateDiagOpts(ArrayRef<const char *> argv) {
   auto *DiagOpts = new DiagnosticOptions;
   std::unique_ptr<OptTable> Opts(createDriverOptTable());
   unsigned MissingArgIndex, MissingArgCount;
-  std::unique_ptr<InputArgList> Args(Opts->ParseArgs(
-      argv.begin() + 1, argv.end(), MissingArgIndex, MissingArgCount));
+  InputArgList Args =
+      Opts->ParseArgs(argv.slice(1), MissingArgIndex, MissingArgCount);
   // We ignore MissingArgCount and the return value of ParseDiagnosticArgs.
   // Any errors that would be diagnosed here will also be diagnosed later,
   // when the DiagnosticsEngine actually exists.
-  (void) ParseDiagnosticArgs(*DiagOpts, *Args);
+  (void)ParseDiagnosticArgs(*DiagOpts, Args);
   return DiagOpts;
 }
 
diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp
index 2216ec6..bf5b582 100644
--- a/tools/libclang/CIndex.cpp
+++ b/tools/libclang/CIndex.cpp
@@ -1880,6 +1880,9 @@ public:
   void VisitOMPBarrierDirective(const OMPBarrierDirective *D);
   void VisitOMPTaskwaitDirective(const OMPTaskwaitDirective *D);
   void VisitOMPTaskgroupDirective(const OMPTaskgroupDirective *D);
+  void
+  VisitOMPCancellationPointDirective(const OMPCancellationPointDirective *D);
+  void VisitOMPCancelDirective(const OMPCancelDirective *D);
   void VisitOMPFlushDirective(const OMPFlushDirective *D);
   void VisitOMPOrderedDirective(const OMPOrderedDirective *D);
   void VisitOMPAtomicDirective(const OMPAtomicDirective *D);
@@ -1934,8 +1937,8 @@ void EnqueueVisitor::AddTypeLoc(TypeSourceInfo *TI) {
  }
 void EnqueueVisitor::EnqueueChildren(const Stmt *S) {
   unsigned size = WL.size();
-  for (Stmt::const_child_range Child = S->children(); Child; ++Child) {
-    AddStmt(*Child);
+  for (const Stmt *SubStmt : S->children()) {
+    AddStmt(SubStmt);
   }
   if (size == WL.size())
     return;
@@ -2098,6 +2101,9 @@ OMPClauseEnqueue::VisitOMPCopyprivateClause(const OMPCopyprivateClause *C) {
 void OMPClauseEnqueue::VisitOMPFlushClause(const OMPFlushClause *C) {
   VisitOMPClauseList(C);
 }
+void OMPClauseEnqueue::VisitOMPDependClause(const OMPDependClause *C) {
+  VisitOMPClauseList(C);
+}
 }
 
 void EnqueueVisitor::EnqueueChildren(const OMPClause *S) {
@@ -2504,6 +2510,15 @@ void EnqueueVisitor::VisitOMPTeamsDirective(const OMPTeamsDirective *D) {
   VisitOMPExecutableDirective(D);
 }
 
+void EnqueueVisitor::VisitOMPCancellationPointDirective(
+    const OMPCancellationPointDirective *D) {
+  VisitOMPExecutableDirective(D);
+}
+
+void EnqueueVisitor::VisitOMPCancelDirective(const OMPCancelDirective *D) {
+  VisitOMPExecutableDirective(D);
+}
+
 void CursorVisitor::EnqueueWorkList(VisitorWorkList &WL, const Stmt *S) {
   EnqueueVisitor(WL, MakeCXCursor(S, StmtParent, TU,RegionOfInterest)).Visit(S);
 }
@@ -3827,12 +3842,11 @@ CXString clang_Cursor_getMangling(CXCursor C) {
   // Now apply backend mangling.
   std::unique_ptr<llvm::DataLayout> DL(
       new llvm::DataLayout(Ctx.getTargetInfo().getTargetDescription()));
-  llvm::Mangler BackendMangler(DL.get());
 
   std::string FinalBuf;
   llvm::raw_string_ostream FinalBufOS(FinalBuf);
-  BackendMangler.getNameWithPrefix(FinalBufOS,
-                                   llvm::Twine(FrontendBufOS.str()));
+  llvm::Mangler::getNameWithPrefix(FinalBufOS, llvm::Twine(FrontendBufOS.str()),
+                                   *DL);
 
   return cxstring::createDup(FinalBufOS.str());
 }
@@ -4281,6 +4295,10 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
     return cxstring::createRef("OMPTargetDirective");
   case CXCursor_OMPTeamsDirective:
     return cxstring::createRef("OMPTeamsDirective");
+  case CXCursor_OMPCancellationPointDirective:
+    return cxstring::createRef("OMPCancellationPointDirective");
+  case CXCursor_OMPCancelDirective:
+    return cxstring::createRef("OMPCancelDirective");
   case CXCursor_OverloadCandidate:
       return cxstring::createRef("OverloadCandidate");
   }
diff --git a/tools/libclang/CXCursor.cpp b/tools/libclang/CXCursor.cpp
index b8bb28e..fe9ba4e 100644
--- a/tools/libclang/CXCursor.cpp
+++ b/tools/libclang/CXCursor.cpp
@@ -588,6 +588,12 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
   case Stmt::OMPTeamsDirectiveClass:
     K = CXCursor_OMPTeamsDirective;
     break;
+  case Stmt::OMPCancellationPointDirectiveClass:
+    K = CXCursor_OMPCancellationPointDirective;
+    break;
+  case Stmt::OMPCancelDirectiveClass:
+    K = CXCursor_OMPCancelDirective;
+    break;
   }
 
   CXCursor C = { K, 0, { Parent, S, TU } };
diff --git a/tools/scan-build/ccc-analyzer b/tools/scan-build/ccc-analyzer
index ffe6859..14591ae 100755
--- a/tools/scan-build/ccc-analyzer
+++ b/tools/scan-build/ccc-analyzer
@@ -22,6 +22,33 @@ use File::Basename;
 use Text::ParseWords;
 
 ##===----------------------------------------------------------------------===##
+# List form 'system' with STDOUT and STDERR captured.
+##===----------------------------------------------------------------------===##
+
+sub silent_system {
+  my $HtmlDir = shift;
+  my $Command = shift;
+
+  # Save STDOUT and STDERR and redirect to a temporary file.
+  open OLDOUT, ">&", \*STDOUT;
+  open OLDERR, ">&", \*STDERR;
+  my ($TmpFH, $TmpFile) = tempfile("temp_buf_XXXXXX",
+                                   DIR => $HtmlDir,
+                                   UNLINK => 1);
+  open(STDOUT, ">$TmpFile");
+  open(STDERR, ">&", \*STDOUT);
+
+  # Invoke 'system', STDOUT and STDERR are output to a temporary file.
+  system $Command, @_;
+
+  # Restore STDOUT and STDERR.
+  open STDOUT, ">&", \*OLDOUT;
+  open STDERR, ">&", \*OLDERR;
+
+  return $TmpFH;
+}
+
+##===----------------------------------------------------------------------===##
 # Compiler command setup.
 ##===----------------------------------------------------------------------===##
 
@@ -145,7 +172,7 @@ sub ProcessClangFailure {
   print OUT "@$Args\n";
   close OUT;
   `uname -a >> $PPFile.info.txt 2>&1`;
-  `$Compiler -v >> $PPFile.info.txt 2>&1`;
+  `"$Compiler" -v >> $PPFile.info.txt 2>&1`;
   rename($ofile, "$PPFile.stderr.txt");
   return (basename $PPFile);
 }
@@ -155,27 +182,15 @@ sub ProcessClangFailure {
 ##----------------------------------------------------------------------------##
 
 sub GetCCArgs {
+  my $HtmlDir = shift;
   my $mode = shift;
   my $Args = shift;
-
-  pipe (FROM_CHILD, TO_PARENT);
-  my $pid = fork();
-  if ($pid == 0) {
-    close FROM_CHILD;
-    open(STDOUT,">&", \*TO_PARENT);
-    open(STDERR,">&", \*TO_PARENT);
-    exec $Clang, "-###", $mode, @$Args;
-  }
-  close(TO_PARENT);
   my $line;
-  while (<FROM_CHILD>) {
+  my $OutputStream = silent_system($HtmlDir, $Clang, "-###", $mode, @$Args);
+  while (<$OutputStream>) {
     next if (!/\s"?-cc1"?\s/);
     $line = $_;
   }
-
-  waitpid($pid,0);
-  close(FROM_CHILD);
-
   die "could not find clang line\n" if (!defined $line);
   # Strip leading and trailing whitespace characters.
   $line =~ s/^\s+|\s+$//g;
@@ -207,7 +222,7 @@ sub Analyze {
     $Cmd = $Clang;
 
     # Create arguments for doing regular parsing.
-    my $SyntaxArgs = GetCCArgs("-fsyntax-only", \@Args);
+    my $SyntaxArgs = GetCCArgs($HtmlDir, "-fsyntax-only", \@Args);
     @CmdArgsSansAnalyses = @$SyntaxArgs;
 
     # Create arguments for doing static analysis.
@@ -230,7 +245,7 @@ sub Analyze {
       push @Args, "-Xclang", "-analyzer-viz-egraph-ubigraph";
     }
 
-    my $AnalysisArgs = GetCCArgs("--analyze", \@Args);
+    my $AnalysisArgs = GetCCArgs($HtmlDir, "--analyze", \@Args);
     @CmdArgs = @$AnalysisArgs;
   }
 
@@ -255,31 +270,19 @@ sub Analyze {
     print STDERR "#SHELL (cd '$dir' && @PrintArgs)\n";
   }
 
-  # Capture the STDERR of clang and send it to a temporary file.
-  # Capture the STDOUT of clang and reroute it to ccc-analyzer's STDERR.
+  # Save STDOUT and STDERR of clang to a temporary file and reroute
+  # all clang output to ccc-analyzer's STDERR.
   # We save the output file in the 'crashes' directory if clang encounters
   # any problems with the file.
-  pipe (FROM_CHILD, TO_PARENT);
-  my $pid = fork();
-  if ($pid == 0) {
-    close FROM_CHILD;
-    open(STDOUT,">&", \*TO_PARENT);
-    open(STDERR,">&", \*TO_PARENT);
-    exec $Cmd, @CmdArgs;
-  }
-
-  close TO_PARENT;
   my ($ofh, $ofile) = tempfile("clang_output_XXXXXX", DIR => $HtmlDir);
-
-  while (<FROM_CHILD>) {
+  
+  my $OutputStream = silent_system($HtmlDir, $Cmd, @CmdArgs);
+  while ( <$OutputStream> ) {
     print $ofh $_;
     print STDERR $_;
   }
-  close $ofh;
-
-  waitpid($pid,0);
-  close(FROM_CHILD);
   my $Result = $?;
+  close $ofh;
 
   # Did the command die because of a signal?
   if ($ReportFailures) {
diff --git a/tools/scan-build/scan-build b/tools/scan-build/scan-build
index ac8e22e..d7cadc3 100755
--- a/tools/scan-build/scan-build
+++ b/tools/scan-build/scan-build
@@ -1232,16 +1232,9 @@ ENDTEXT
   }
   my %EnabledCheckers;
   foreach my $lang ("c", "objective-c", "objective-c++", "c++") {
-    pipe(FROM_CHILD, TO_PARENT);
-    my $pid = fork();
-    if ($pid == 0) {
-      close FROM_CHILD;
-      open(STDOUT,">&", \*TO_PARENT);
-      open(STDERR,">&", \*TO_PARENT);
-      exec $Clang, ( @PluginLoadCommandline_xclang, '--analyze', '-x', $lang, '-', '-###');
-    }
-    close(TO_PARENT);
-    while(<FROM_CHILD>) {
+    my $ExecLine = join(' ', qq/"$Clang"/, @PluginLoadCommandline_xclang, "--analyze", "-x", $lang, "-", "-###", "2>&1", "|");
+    open(PS, $ExecLine);
+    while (<PS>) {
       foreach my $val (split /\s+/) {
         $val =~ s/\"//g;
         if ($val =~ /-analyzer-checker\=([^\s]+)/) {
@@ -1249,23 +1242,14 @@ ENDTEXT
         }
       }
     }
-    waitpid($pid,0);
-    close(FROM_CHILD);
   }
 
   # Query clang for complete list of checkers.
   if (defined $Clang && -x $Clang) {
-    pipe(FROM_CHILD, TO_PARENT);
-    my $pid = fork();
-    if ($pid == 0) {
-      close FROM_CHILD;
-      open(STDOUT,">&", \*TO_PARENT);
-      open(STDERR,">&", \*TO_PARENT);
-      exec $Clang, ('-cc1', @PluginsToLoad , '-analyzer-checker-help');
-    }
-    close(TO_PARENT);
+    my $ExecLine = join(' ', qq/"$Clang"/, "-cc1", @PluginsToLoad, "-analyzer-checker-help", "2>&1", "|");
+    open(PS, $ExecLine);
     my $foundCheckers = 0;
-    while(<FROM_CHILD>) {
+    while (<PS>) {
       if (/CHECKERS:/) {
         $foundCheckers = 1;
         last;
@@ -1277,7 +1261,7 @@ ENDTEXT
     else {
       print("\nAVAILABLE CHECKERS:\n\n");
       my $skip = 0;
-      while(<FROM_CHILD>) {
+       while(<PS>) {
         if (/experimental/) {
           $skip = 1;
           next;
@@ -1314,10 +1298,9 @@ ENDTEXT
         }
         print $_;
       }
-      print "\nNOTE: \"+\" indicates that an analysis is enabled by default.\n"
+      print "\nNOTE: \"+\" indicates that an analysis is enabled by default.\n";
     }
-    waitpid($pid,0);
-    close(FROM_CHILD);
+    close PS;
   }
 
 print <<ENDTEXT
diff --git a/unittests/ASTMatchers/ASTMatchersTest.cpp b/unittests/ASTMatchers/ASTMatchersTest.cpp
index ae363e9..8ef3f15 100644
--- a/unittests/ASTMatchers/ASTMatchersTest.cpp
+++ b/unittests/ASTMatchers/ASTMatchersTest.cpp
@@ -482,6 +482,10 @@ TEST(DeclarationMatcher, MatchAnyOf) {
   EXPECT_TRUE(matches("int F() { return 1 + 2; }", MixedTypes));
   EXPECT_TRUE(matches("int F() { if (true) return 1; }", MixedTypes));
   EXPECT_TRUE(notMatches("int F() { return 1; }", MixedTypes));
+
+  EXPECT_TRUE(
+      matches("void f() try { } catch (int) { } catch (...) { }",
+              catchStmt(anyOf(hasDescendant(varDecl()), isCatchAll()))));
 }
 
 TEST(DeclarationMatcher, MatchHas) {
@@ -3321,6 +3325,10 @@ TEST(ExceptionHandling, SimpleCases) {
                       throwExpr()));
   EXPECT_TRUE(matches("void foo() try { throw 5;} catch(int X) { }",
                       throwExpr()));
+  EXPECT_TRUE(matches("void foo() try { throw; } catch(...) { }",
+                      catchStmt(isCatchAll())));
+  EXPECT_TRUE(notMatches("void foo() try { throw; } catch(int) { }",
+                         catchStmt(isCatchAll())));
 }
 
 TEST(HasConditionVariableStatement, DoesNotMatchCondition) {
diff --git a/unittests/CodeGen/BufferSourceTest.cpp b/unittests/CodeGen/BufferSourceTest.cpp
index b2a8ba58..e80e83b 100644
--- a/unittests/CodeGen/BufferSourceTest.cpp
+++ b/unittests/CodeGen/BufferSourceTest.cpp
@@ -62,6 +62,8 @@ TEST(BufferSourceTest, EmitCXXGlobalInitFunc) {
         CreateLLVMCodeGen(
             compiler.getDiagnostics(),
             "EmitCXXGlobalInitFuncTest",
+            compiler.getHeaderSearchOpts(),
+            compiler.getPreprocessorOpts(),
             compiler.getCodeGenOpts(),
             llvm::getGlobalContext())));
 
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index ed2658b..0d85789 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -2391,6 +2391,27 @@ TEST_F(FormatTest, FormatObjCTryCatch) {
                "});\n");
 }
 
+TEST_F(FormatTest, FormatObjCAutoreleasepool) {
+  FormatStyle Style = getLLVMStyle();
+  verifyFormat("@autoreleasepool {\n"
+               "  f();\n"
+               "}\n"
+               "@autoreleasepool {\n"
+               "  f();\n"
+               "}\n",
+               Style);
+  Style.BreakBeforeBraces = FormatStyle::BS_Allman;
+  verifyFormat("@autoreleasepool\n"
+               "{\n"
+               "  f();\n"
+               "}\n"
+               "@autoreleasepool\n"
+               "{\n"
+               "  f();\n"
+               "}\n",
+               Style);
+}
+
 TEST_F(FormatTest, StaticInitializers) {
   verifyFormat("static SomeClass SC = {1, 'a'};");
 
@@ -3163,6 +3184,8 @@ TEST_F(FormatTest, LayoutNestedBlocks) {
                "}, a);",
                Style);
 
+  verifyFormat("SomeFunction({MACRO({ return output; }), b});");
+
   verifyNoCrash("^{v^{a}}");
 }
 
@@ -3900,10 +3923,12 @@ TEST_F(FormatTest, BreaksDesireably) {
                "                      aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n"
                "                  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa);");
 
-  // Indent consistently independent of call expression.
+  // Indent consistently independent of call expression and unary operator.
+  verifyFormat("aaaaaaaaaaa(bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb(\n"
+               "    dddddddddddddddddddddddddddddd));");
+  verifyFormat("aaaaaaaaaaa(!bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb(\n"
+               "    dddddddddddddddddddddddddddddd));");
   verifyFormat("aaaaaaaaaaa(bbbbbbbbbbbbbbbbbbbbbbbbb.ccccccccccccccccc(\n"
-               "    dddddddddddddddddddddddddddddd));\n"
-               "aaaaaaaaaaa(bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb(\n"
                "    dddddddddddddddddddddddddddddd));");
 
   // This test case breaks on an incorrect memoization, i.e. an optimization not
@@ -4595,30 +4620,50 @@ TEST_F(FormatTest, AlignsStringLiterals) {
                "              \"c\";");
 }
 
-TEST_F(FormatTest, AlwaysBreakAfterDefinitionReturnType) {
-  FormatStyle AfterType = getLLVMStyle();
-  AfterType.AlwaysBreakAfterDefinitionReturnType = true;
+TEST_F(FormatTest, DefinitionReturnTypeBreakingStyle) {
+  FormatStyle Style = getLLVMStyle();
+  Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_TopLevel;
+  verifyFormat("class C {\n"
+               "  int f() { return 1; }\n"
+               "};\n"
+               "int\n"
+               "f() {\n"
+               "  return 1;\n"
+               "}",
+               Style);
+  Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
+  verifyFormat("class C {\n"
+               "  int\n"
+               "  f() {\n"
+               "    return 1;\n"
+               "  }\n"
+               "};\n"
+               "int\n"
+               "f() {\n"
+               "  return 1;\n"
+               "}",
+               Style);
   verifyFormat("const char *\n"
                "f(void) {\n" // Break here.
                "  return \"\";\n"
                "}\n"
                "const char *bar(void);\n", // No break here.
-               AfterType);
+               Style);
   verifyFormat("template <class T>\n"
                "T *\n"
                "f(T &c) {\n" // Break here.
                "  return NULL;\n"
                "}\n"
                "template <class T> T *f(T &c);\n", // No break here.
-               AfterType);
-  AfterType.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
+               Style);
+  Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
   verifyFormat("const char *\n"
                "f(void)\n" // Break here.
                "{\n"
                "  return \"\";\n"
                "}\n"
                "const char *bar(void);\n", // No break here.
-               AfterType);
+               Style);
   verifyFormat("template <class T>\n"
                "T *\n"     // Problem here: no line break
                "f(T &c)\n" // Break here.
@@ -4626,7 +4671,7 @@ TEST_F(FormatTest, AlwaysBreakAfterDefinitionReturnType) {
                "  return NULL;\n"
                "}\n"
                "template <class T> T *f(T &c);\n", // No break here.
-               AfterType);
+               Style);
 }
 
 TEST_F(FormatTest, AlwaysBreakBeforeMultilineStrings) {
@@ -5169,7 +5214,7 @@ TEST_F(FormatTest, DoesNotIndentRelativeToUnaryOperators) {
                "        aaaaa)) {\n"
                "}");
   verifyFormat("aaaaaaaaaa(!aaaaaaaaaa( // break\n"
-               "               aaaaa));");
+               "    aaaaa));");
   verifyFormat("*aaa = aaaaaaa( // break\n"
                "    bbbbbb);");
 }
@@ -5213,11 +5258,6 @@ TEST_F(FormatTest, UnderstandsOverloadedOperators) {
   verifyGoogleFormat("operator ::A();");
 
   verifyFormat("using A::operator+;");
-
-  verifyFormat("string // break\n"
-               "operator()() & {}");
-  verifyFormat("string // break\n"
-               "operator()() && {}");
 }
 
 TEST_F(FormatTest, UnderstandsFunctionRefQualification) {
@@ -5481,6 +5521,14 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) {
   verifyFormat("A = new SomeType *[Length]();", PointerMiddle);
   verifyFormat("A = new SomeType *[Length];", PointerMiddle);
   verifyFormat("T ** t = new T *;", PointerMiddle);
+
+  // Member function reference qualifiers aren't binary operators.
+  verifyFormat("string // break\n"
+               "operator()() & {}");
+  verifyFormat("string // break\n"
+               "operator()() && {}");
+  verifyGoogleFormat("template <typename T>\n"
+                     "auto x() & -> int {}");
 }
 
 TEST_F(FormatTest, UnderstandsAttributes) {
@@ -9018,7 +9066,6 @@ TEST_F(FormatTest, ParsesConfigurationBools) {
   CHECK_PARSE_BOOL(AllowShortCaseLabelsOnASingleLine);
   CHECK_PARSE_BOOL(AllowShortIfStatementsOnASingleLine);
   CHECK_PARSE_BOOL(AllowShortLoopsOnASingleLine);
-  CHECK_PARSE_BOOL(AlwaysBreakAfterDefinitionReturnType);
   CHECK_PARSE_BOOL(AlwaysBreakTemplateDeclarations);
   CHECK_PARSE_BOOL(BinPackParameters);
   CHECK_PARSE_BOOL(BinPackArguments);
@@ -9151,6 +9198,15 @@ TEST_F(FormatTest, ParsesConfiguration) {
               FormatStyle::BS_Allman);
   CHECK_PARSE("BreakBeforeBraces: GNU", BreakBeforeBraces, FormatStyle::BS_GNU);
 
+  Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
+  CHECK_PARSE("AlwaysBreakAfterDefinitionReturnType: None",
+              AlwaysBreakAfterDefinitionReturnType, FormatStyle::DRTBS_None);
+  CHECK_PARSE("AlwaysBreakAfterDefinitionReturnType: All",
+              AlwaysBreakAfterDefinitionReturnType, FormatStyle::DRTBS_All);
+  CHECK_PARSE("AlwaysBreakAfterDefinitionReturnType: TopLevel",
+              AlwaysBreakAfterDefinitionReturnType,
+              FormatStyle::DRTBS_TopLevel);
+
   Style.NamespaceIndentation = FormatStyle::NI_All;
   CHECK_PARSE("NamespaceIndentation: None", NamespaceIndentation,
               FormatStyle::NI_None);
diff --git a/unittests/Format/FormatTestJS.cpp b/unittests/Format/FormatTestJS.cpp
index 15d62eb..e6c12f4 100644
--- a/unittests/Format/FormatTestJS.cpp
+++ b/unittests/Format/FormatTestJS.cpp
@@ -579,6 +579,7 @@ TEST_F(FormatTestJS, RegexLiteralClassification) {
   verifyFormat("var x = a && /abc/.test(y);");
   verifyFormat("var x = a || /abc/.test(y);");
   verifyFormat("var x = a + /abc/.search(y);");
+  verifyFormat("/abc/.search(y);");
   verifyFormat("var regexs = {/abc/, /abc/};");
   verifyFormat("return /abc/;");
 
@@ -624,10 +625,23 @@ TEST_F(FormatTestJS, RegexLiteralSpecialCharacters) {
   verifyFormat("var regex = /\a\\//g;");
   verifyFormat("var regex = /a\\//;\n"
                "var x = 0;");
+  EXPECT_EQ("var regex = /'/g;", format("var regex = /'/g ;"));
+  EXPECT_EQ("var regex = /'/g;  //'", format("var regex = /'/g ; //'"));
   EXPECT_EQ("var regex = /\\/*/;\n"
             "var x = 0;",
             format("var regex = /\\/*/;\n"
                    "var x=0;"));
+  EXPECT_EQ("var x = /a\\//;", format("var x = /a\\//  \n;"));
+  verifyFormat("var regex = /\"/;", getGoogleJSStyleWithColumns(16));
+  verifyFormat("var regex =\n"
+               "    /\"/;",
+               getGoogleJSStyleWithColumns(15));
+  verifyFormat("var regex =  //\n"
+               "    /a/;");
+  verifyFormat("var regexs = [\n"
+               "  /d/,   //\n"
+               "  /aa/,  //\n"
+               "];");
 }
 
 TEST_F(FormatTestJS, RegexLiteralModifiers) {
@@ -789,15 +803,11 @@ TEST_F(FormatTestJS, TemplateStrings) {
                    "     ${  name    }\n"
                    "  !`;"));
 
-  // FIXME: +1 / -1 offsets are to work around clang-format miscalculating
-  // widths for unknown tokens that are not whitespace (e.g. '`'). Remove when
-  // the code is corrected.
-
   verifyFormat("var x =\n"
                "    `hello ${world}` >= some();",
                getGoogleJSStyleWithColumns(34)); // Barely doesn't fit.
   verifyFormat("var x = `hello ${world}` >= some();",
-               getGoogleJSStyleWithColumns(35 + 1)); // Barely fits.
+               getGoogleJSStyleWithColumns(35)); // Barely fits.
   EXPECT_EQ("var x = `hello\n"
             "  ${world}` >=\n"
             "        some();",
@@ -812,10 +822,14 @@ TEST_F(FormatTestJS, TemplateStrings) {
                    "  ${world}` >= some();",
                    getGoogleJSStyleWithColumns(22))); // Barely fits.
 
-  verifyFormat("var x =\n    `h`;", getGoogleJSStyleWithColumns(13 - 1));
+  verifyFormat("var x =\n"
+               "    `h`;",
+               getGoogleJSStyleWithColumns(11));
   EXPECT_EQ(
       "var x =\n    `multi\n  line`;",
-      format("var x = `multi\n  line`;", getGoogleJSStyleWithColumns(14 - 1)));
+      format("var x = `multi\n  line`;", getGoogleJSStyleWithColumns(13)));
+  verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n"
+               "    `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`);");
 
   // Make sure template strings get a proper ColumnWidth assigned, even if they
   // are first token in line.
@@ -869,6 +883,8 @@ TEST_F(FormatTestJS, TypeArguments) {
   verifyFormat("class C extends D<E> implements F<G>, H<I> {}");
   verifyFormat("function f(a: List<any> = null) {}");
   verifyFormat("function f(): List<any> {}");
+  verifyFormat("function aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa():\n"
+               "    bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb {}");
 }
 
 TEST_F(FormatTestJS, OptionalTypes) {
diff --git a/unittests/Format/FormatTestJava.cpp b/unittests/Format/FormatTestJava.cpp
index 4c161e0..8e59087 100644
--- a/unittests/Format/FormatTestJava.cpp
+++ b/unittests/Format/FormatTestJava.cpp
@@ -67,6 +67,8 @@ TEST_F(FormatTestJava, FormatsInstanceOfLikeOperators) {
   verifyFormat("return aaaaaaaaaaaaaaaaaaaaaaaaaaaaa instanceof\n"
                "    bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb;",
                Style);
+  verifyFormat("return aaaaaaaaaaaaaaaaaaa instanceof bbbbbbbbbbbbbbbbbbbbbbb\n"
+               "    && ccccccccccccccccccc instanceof dddddddddddddddddddddd;");
 }
 
 TEST_F(FormatTestJava, Chromium) {
diff --git a/unittests/Format/FormatTestProto.cpp b/unittests/Format/FormatTestProto.cpp
index ac8fcbd..74f7005 100644
--- a/unittests/Format/FormatTestProto.cpp
+++ b/unittests/Format/FormatTestProto.cpp
@@ -63,6 +63,10 @@ TEST_F(FormatTestProto, FormatsMessages) {
                "}");
 }
 
+TEST_F(FormatTestProto, KeywordsInOtherLanguages) {
+  verifyFormat("optional string operator = 1;");
+}
+
 TEST_F(FormatTestProto, FormatsEnums) {
   verifyFormat("enum Type {\n"
                "  UNKNOWN = 0;\n"
diff --git a/utils/analyzer/CmpRuns.py b/utils/analyzer/CmpRuns.py
index 30157be..a1f7a56 100755
--- a/utils/analyzer/CmpRuns.py
+++ b/utils/analyzer/CmpRuns.py
@@ -300,7 +300,7 @@ def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
         print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
         print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
         
-    return foundDiffs    
+    return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics)
 
 def main():
     from optparse import OptionParser
diff --git a/utils/analyzer/SATestBuild.py b/utils/analyzer/SATestBuild.py
index 0a2c836..20839c2 100644
--- a/utils/analyzer/SATestBuild.py
+++ b/utils/analyzer/SATestBuild.py
@@ -46,6 +46,7 @@ import math
 import shutil
 import time
 import plistlib
+import argparse
 from subprocess import check_call, CalledProcessError
 
 #------------------------------------------------------------------------------
@@ -216,6 +217,8 @@ def runScanBuild(Dir, SBOutputDir, PBuildLogFile):
         SBPrefix = "scan-build " + SBOptions + " "
         for Command in SBCommandFile:
             Command = Command.strip()
+            if len(Command) == 0:
+                continue;
             # If using 'make', auto imply a -jX argument
             # to speed up analysis.  xcodebuild will
             # automatically use the maximum number of cores.
@@ -277,7 +280,7 @@ def runAnalyzePreprocessed(Dir, SBOutputDir, Mode):
         
         # Build and call the analyzer command.
         OutputOption = "-o " + os.path.join(PlistPath, FileName) + ".plist "
-        Command = CmdPrefix + OutputOption + os.path.join(Dir, FileName)
+        Command = CmdPrefix + OutputOption + FileName
         LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b")
         try:
             if Verbose == 1:        
@@ -404,7 +407,11 @@ class Discarder(object):
         pass # do nothing
 
 # Compare the warnings produced by scan-build.
-def runCmpResults(Dir):   
+# Strictness defines the success criteria for the test:
+#   0 - success if there are no crashes or analyzer failure.
+#   1 - success if there are no difference in the number of reported bugs.
+#   2 - success if all the bug reports are identical.
+def runCmpResults(Dir, Strictness = 0):   
     TBegin = time.time() 
 
     RefDir = os.path.join(Dir, SBOutputDirReferencePrefix + SBOutputDirName)
@@ -448,11 +455,18 @@ def runCmpResults(Dir):
         OLD_STDOUT = sys.stdout
         sys.stdout = Discarder()
         # Scan the results, delete empty plist files.
-        NumDiffs = CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts, False)
+        NumDiffs, ReportsInRef, ReportsInNew = \
+            CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts, False)
         sys.stdout = OLD_STDOUT
         if (NumDiffs > 0) :
             print "Warning: %r differences in diagnostics. See %s" % \
                   (NumDiffs, DiffsPath,)
+        if Strictness >= 2 and NumDiffs > 0:
+            print "Error: Diffs found in strict mode (2)."
+            sys.exit(-1)       
+        elif Strictness >= 1 and ReportsInRef != ReportsInNew:
+            print "Error: The number of results are different in strict mode (1)."
+            sys.exit(-1)       
                     
     print "Diagnostic comparison complete (time: %.2f)." % (time.time()-TBegin) 
     return (NumDiffs > 0)
@@ -486,7 +500,7 @@ def updateSVN(Mode, ProjectsMap):
         print "Error: SVN update failed."
         sys.exit(-1)
         
-def testProject(ID, ProjectBuildMode, IsReferenceBuild=False, Dir=None):
+def testProject(ID, ProjectBuildMode, IsReferenceBuild=False, Dir=None, Strictness = 0):
     print " \n\n--- Building project %s" % (ID,)
 
     TBegin = time.time() 
@@ -505,12 +519,12 @@ def testProject(ID, ProjectBuildMode, IsReferenceBuild=False, Dir=None):
     checkBuild(SBOutputDir)
     
     if IsReferenceBuild == False:
-        runCmpResults(Dir)
+        runCmpResults(Dir, Strictness)
         
     print "Completed tests for project %s (time: %.2f)." % \
           (ID, (time.time()-TBegin))
     
-def testAll(IsReferenceBuild = False, UpdateSVN = False):
+def testAll(IsReferenceBuild = False, UpdateSVN = False, Strictness = 0):
     PMapFile = open(getProjectMapPath(), "rb")
     try:        
         # Validate the input.
@@ -532,7 +546,7 @@ def testAll(IsReferenceBuild = False, UpdateSVN = False):
         # Test the projects.
         PMapFile.seek(0)    
         for I in csv.reader(PMapFile):
-            testProject(I[0], int(I[1]), IsReferenceBuild)
+            testProject(I[0], int(I[1]), IsReferenceBuild, None, Strictness)
 
         # Add reference results to SVN.
         if UpdateSVN == True:
@@ -545,18 +559,25 @@ def testAll(IsReferenceBuild = False, UpdateSVN = False):
         PMapFile.close()    
             
 if __name__ == '__main__':
+    # Parse command line arguments.
+    Parser = argparse.ArgumentParser(description='Test the Clang Static Analyzer.')
+    Parser.add_argument('--strictness', dest='strictness', type=int, default=0,
+                       help='0 to fail on runtime errors, 1 to fail when the number\
+                             of found bugs are different from the reference, 2 to \
+                             fail on any difference from the reference. Default is 0.')
+    Parser.add_argument('-r', dest='regenerate', action='store_true', default=False,
+                        help='Regenerate reference output.')
+    Parser.add_argument('-rs', dest='update_reference', action='store_true',
+                        default=False, help='Regenerate reference output and update svn.')
+    Args = Parser.parse_args()
+
     IsReference = False
     UpdateSVN = False
-    if len(sys.argv) >= 2:
-        if sys.argv[1] == "-r":
-            IsReference = True
-        elif sys.argv[1] == "-rs":
-            IsReference = True
-            UpdateSVN = True
-        else:     
-          print >> sys.stderr, 'Usage: ', sys.argv[0],\
-                             '[-r|-rs]' \
-                             'Use -r to regenerate reference output' \
-                             'Use -rs to regenerate reference output and update svn'
-
-    testAll(IsReference, UpdateSVN)
+    Strictness = Args.strictness
+    if Args.regenerate:
+        IsReference = True
+    elif Args.update_reference:
+        IsReference = True
+        UpdateSVN = True
+
+    testAll(IsReference, UpdateSVN, Strictness)
-- 
cgit v1.1